{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 30000, "global_step": 106250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 4.705882352941177e-05, "grad_norm": 142.81945520234524, "learning_rate": 7.525870178739418e-08, "loss": 9.475961303710937, "step": 5 }, { "epoch": 9.411764705882353e-05, "grad_norm": 147.0614317924902, "learning_rate": 1.6933207902163687e-07, "loss": 9.474417114257813, "step": 10 }, { "epoch": 0.00014117647058823528, "grad_norm": 134.51083492398763, "learning_rate": 2.6340545625587964e-07, "loss": 9.261054992675781, "step": 15 }, { "epoch": 0.00018823529411764707, "grad_norm": 97.55700904933032, "learning_rate": 3.574788334901223e-07, "loss": 8.890567016601562, "step": 20 }, { "epoch": 0.00023529411764705883, "grad_norm": 62.8816286681573, "learning_rate": 4.5155221072436507e-07, "loss": 8.341508483886718, "step": 25 }, { "epoch": 0.00028235294117647056, "grad_norm": 43.507540627843916, "learning_rate": 5.456255879586077e-07, "loss": 8.160541534423828, "step": 30 }, { "epoch": 0.0003294117647058824, "grad_norm": 39.38025465689275, "learning_rate": 6.396989651928505e-07, "loss": 7.419805908203125, "step": 35 }, { "epoch": 0.00037647058823529414, "grad_norm": 28.689404430365197, "learning_rate": 7.337723424270932e-07, "loss": 7.324165344238281, "step": 40 }, { "epoch": 0.0004235294117647059, "grad_norm": 28.534258364791473, "learning_rate": 8.27845719661336e-07, "loss": 6.767093658447266, "step": 45 }, { "epoch": 0.00047058823529411766, "grad_norm": 21.11179223885419, "learning_rate": 9.219190968955787e-07, "loss": 6.489215087890625, "step": 50 }, { "epoch": 0.0005176470588235294, "grad_norm": 17.985912757443092, "learning_rate": 1.0159924741298213e-06, "loss": 6.262983703613282, "step": 55 }, { "epoch": 0.0005647058823529411, "grad_norm": 17.616710860712715, "learning_rate": 1.110065851364064e-06, "loss": 6.084229278564453, "step": 60 }, { "epoch": 0.0006117647058823529, "grad_norm": 15.127556707417783, "learning_rate": 1.2041392285983068e-06, "loss": 5.901029968261719, "step": 65 }, { "epoch": 0.0006588235294117648, "grad_norm": 14.41841589473666, "learning_rate": 1.2982126058325495e-06, "loss": 5.5398609161376955, "step": 70 }, { "epoch": 0.0007058823529411765, "grad_norm": 17.681781896877787, "learning_rate": 1.3922859830667922e-06, "loss": 5.582062530517578, "step": 75 }, { "epoch": 0.0007529411764705883, "grad_norm": 15.918466452708007, "learning_rate": 1.486359360301035e-06, "loss": 5.3630859375, "step": 80 }, { "epoch": 0.0008, "grad_norm": 20.774694113387067, "learning_rate": 1.5804327375352776e-06, "loss": 5.3709869384765625, "step": 85 }, { "epoch": 0.0008470588235294118, "grad_norm": 15.227699194980834, "learning_rate": 1.6745061147695205e-06, "loss": 5.331340789794922, "step": 90 }, { "epoch": 0.0008941176470588236, "grad_norm": 17.557504051162017, "learning_rate": 1.768579492003763e-06, "loss": 5.133724975585937, "step": 95 }, { "epoch": 0.0009411764705882353, "grad_norm": 15.115400503439655, "learning_rate": 1.8626528692380059e-06, "loss": 5.147073745727539, "step": 100 }, { "epoch": 0.000988235294117647, "grad_norm": 19.845792688596312, "learning_rate": 1.9567262464722488e-06, "loss": 5.005277633666992, "step": 105 }, { "epoch": 0.0010352941176470587, "grad_norm": 14.540386130488843, "learning_rate": 2.050799623706491e-06, "loss": 5.117272186279297, "step": 110 }, { "epoch": 0.0010823529411764705, "grad_norm": 18.435372357728053, "learning_rate": 2.144873000940734e-06, "loss": 5.014704895019531, "step": 115 }, { "epoch": 0.0011294117647058823, "grad_norm": 22.17085739331287, "learning_rate": 2.238946378174977e-06, "loss": 4.876046752929687, "step": 120 }, { "epoch": 0.001176470588235294, "grad_norm": 15.501739160225387, "learning_rate": 2.333019755409219e-06, "loss": 4.895670318603516, "step": 125 }, { "epoch": 0.0012235294117647058, "grad_norm": 26.371803422728068, "learning_rate": 2.4270931326434622e-06, "loss": 4.74792251586914, "step": 130 }, { "epoch": 0.0012705882352941178, "grad_norm": 16.259235905217217, "learning_rate": 2.521166509877705e-06, "loss": 4.644318771362305, "step": 135 }, { "epoch": 0.0013176470588235295, "grad_norm": 21.217387852282137, "learning_rate": 2.6152398871119476e-06, "loss": 4.706063842773437, "step": 140 }, { "epoch": 0.0013647058823529413, "grad_norm": 28.75036923488456, "learning_rate": 2.70931326434619e-06, "loss": 4.594903182983399, "step": 145 }, { "epoch": 0.001411764705882353, "grad_norm": 14.451304030580728, "learning_rate": 2.803386641580433e-06, "loss": 4.544287109375, "step": 150 }, { "epoch": 0.0014588235294117648, "grad_norm": 17.006057282132755, "learning_rate": 2.8974600188146757e-06, "loss": 4.484794616699219, "step": 155 }, { "epoch": 0.0015058823529411766, "grad_norm": 13.567462431420205, "learning_rate": 2.9915333960489184e-06, "loss": 4.548626708984375, "step": 160 }, { "epoch": 0.0015529411764705883, "grad_norm": 18.94534086075918, "learning_rate": 3.0856067732831615e-06, "loss": 4.473534393310547, "step": 165 }, { "epoch": 0.0016, "grad_norm": 24.474887594110854, "learning_rate": 3.179680150517404e-06, "loss": 4.416361236572266, "step": 170 }, { "epoch": 0.0016470588235294118, "grad_norm": 16.84772628379869, "learning_rate": 3.2737535277516464e-06, "loss": 4.2915283203125, "step": 175 }, { "epoch": 0.0016941176470588236, "grad_norm": 17.00260472689261, "learning_rate": 3.367826904985889e-06, "loss": 4.330631256103516, "step": 180 }, { "epoch": 0.0017411764705882354, "grad_norm": 17.377893377008913, "learning_rate": 3.461900282220132e-06, "loss": 4.241221237182617, "step": 185 }, { "epoch": 0.0017882352941176471, "grad_norm": 15.567411184835123, "learning_rate": 3.555973659454375e-06, "loss": 4.191771697998047, "step": 190 }, { "epoch": 0.0018352941176470589, "grad_norm": 13.396522437593758, "learning_rate": 3.6500470366886176e-06, "loss": 4.175608825683594, "step": 195 }, { "epoch": 0.0018823529411764706, "grad_norm": 16.032469722975147, "learning_rate": 3.7441204139228603e-06, "loss": 4.145372009277343, "step": 200 }, { "epoch": 0.0019294117647058824, "grad_norm": 15.498385874577124, "learning_rate": 3.8381937911571026e-06, "loss": 4.084629058837891, "step": 205 }, { "epoch": 0.001976470588235294, "grad_norm": 15.122326644182316, "learning_rate": 3.932267168391345e-06, "loss": 4.107077789306641, "step": 210 }, { "epoch": 0.0020235294117647057, "grad_norm": 15.397176122671569, "learning_rate": 4.026340545625588e-06, "loss": 3.886156463623047, "step": 215 }, { "epoch": 0.0020705882352941175, "grad_norm": 15.741831328116398, "learning_rate": 4.1204139228598315e-06, "loss": 3.99342041015625, "step": 220 }, { "epoch": 0.0021176470588235292, "grad_norm": 19.142686743688365, "learning_rate": 4.214487300094074e-06, "loss": 3.8078163146972654, "step": 225 }, { "epoch": 0.002164705882352941, "grad_norm": 17.073139193871228, "learning_rate": 4.308560677328316e-06, "loss": 3.8051490783691406, "step": 230 }, { "epoch": 0.0022117647058823527, "grad_norm": 16.724898769660776, "learning_rate": 4.402634054562559e-06, "loss": 3.6961334228515623, "step": 235 }, { "epoch": 0.0022588235294117645, "grad_norm": 17.780969180607457, "learning_rate": 4.496707431796801e-06, "loss": 3.583592987060547, "step": 240 }, { "epoch": 0.0023058823529411763, "grad_norm": 15.67489955861006, "learning_rate": 4.590780809031045e-06, "loss": 3.536026382446289, "step": 245 }, { "epoch": 0.002352941176470588, "grad_norm": 16.856545992219765, "learning_rate": 4.684854186265288e-06, "loss": 3.3897636413574217, "step": 250 }, { "epoch": 0.0024, "grad_norm": 19.458807504693446, "learning_rate": 4.77892756349953e-06, "loss": 3.224806213378906, "step": 255 }, { "epoch": 0.0024470588235294116, "grad_norm": 17.900320895812406, "learning_rate": 4.873000940733772e-06, "loss": 3.1189937591552734, "step": 260 }, { "epoch": 0.0024941176470588237, "grad_norm": 18.890098428918837, "learning_rate": 4.967074317968016e-06, "loss": 3.052062225341797, "step": 265 }, { "epoch": 0.0025411764705882355, "grad_norm": 20.5701440579552, "learning_rate": 5.061147695202258e-06, "loss": 2.9030521392822264, "step": 270 }, { "epoch": 0.0025882352941176473, "grad_norm": 19.026089615096154, "learning_rate": 5.1552210724365e-06, "loss": 2.789678192138672, "step": 275 }, { "epoch": 0.002635294117647059, "grad_norm": 19.876402498781914, "learning_rate": 5.249294449670743e-06, "loss": 2.6493900299072264, "step": 280 }, { "epoch": 0.002682352941176471, "grad_norm": 18.85378263247251, "learning_rate": 5.343367826904986e-06, "loss": 2.424032974243164, "step": 285 }, { "epoch": 0.0027294117647058825, "grad_norm": 26.835959528875915, "learning_rate": 5.437441204139229e-06, "loss": 2.333036422729492, "step": 290 }, { "epoch": 0.0027764705882352943, "grad_norm": 20.80892292892931, "learning_rate": 5.531514581373472e-06, "loss": 2.1879293441772463, "step": 295 }, { "epoch": 0.002823529411764706, "grad_norm": 16.969151129796394, "learning_rate": 5.6255879586077145e-06, "loss": 2.0764732360839844, "step": 300 }, { "epoch": 0.002870588235294118, "grad_norm": 13.969105787709447, "learning_rate": 5.719661335841957e-06, "loss": 1.9132394790649414, "step": 305 }, { "epoch": 0.0029176470588235296, "grad_norm": 17.14124643182062, "learning_rate": 5.8137347130762e-06, "loss": 1.8584697723388672, "step": 310 }, { "epoch": 0.0029647058823529414, "grad_norm": 12.673397840882812, "learning_rate": 5.9078080903104426e-06, "loss": 1.7489152908325196, "step": 315 }, { "epoch": 0.003011764705882353, "grad_norm": 17.350473811360636, "learning_rate": 6.001881467544686e-06, "loss": 1.6992507934570313, "step": 320 }, { "epoch": 0.003058823529411765, "grad_norm": 20.1553472408823, "learning_rate": 6.095954844778929e-06, "loss": 1.6421085357666017, "step": 325 }, { "epoch": 0.0031058823529411766, "grad_norm": 22.504746480159387, "learning_rate": 6.1900282220131715e-06, "loss": 1.6024240493774413, "step": 330 }, { "epoch": 0.0031529411764705884, "grad_norm": 13.834937653513245, "learning_rate": 6.284101599247413e-06, "loss": 1.5607526779174805, "step": 335 }, { "epoch": 0.0032, "grad_norm": 9.103098997313046, "learning_rate": 6.378174976481656e-06, "loss": 1.553976058959961, "step": 340 }, { "epoch": 0.003247058823529412, "grad_norm": 14.159869230374166, "learning_rate": 6.472248353715899e-06, "loss": 1.5503812789916993, "step": 345 }, { "epoch": 0.0032941176470588237, "grad_norm": 12.33605584928142, "learning_rate": 6.566321730950141e-06, "loss": 1.4997840881347657, "step": 350 }, { "epoch": 0.0033411764705882354, "grad_norm": 13.73328923201431, "learning_rate": 6.660395108184384e-06, "loss": 1.481546974182129, "step": 355 }, { "epoch": 0.003388235294117647, "grad_norm": 9.735642349690448, "learning_rate": 6.754468485418627e-06, "loss": 1.4716547966003417, "step": 360 }, { "epoch": 0.003435294117647059, "grad_norm": 17.82338710600071, "learning_rate": 6.84854186265287e-06, "loss": 1.448979377746582, "step": 365 }, { "epoch": 0.0034823529411764707, "grad_norm": 12.076114016093584, "learning_rate": 6.942615239887113e-06, "loss": 1.4398374557495117, "step": 370 }, { "epoch": 0.0035294117647058825, "grad_norm": 8.755697066096461, "learning_rate": 7.036688617121356e-06, "loss": 1.4047693252563476, "step": 375 }, { "epoch": 0.0035764705882352942, "grad_norm": 8.631136353197052, "learning_rate": 7.130761994355598e-06, "loss": 1.3906591415405274, "step": 380 }, { "epoch": 0.003623529411764706, "grad_norm": 13.176239313624752, "learning_rate": 7.224835371589841e-06, "loss": 1.3614408493041992, "step": 385 }, { "epoch": 0.0036705882352941178, "grad_norm": 11.75631521326247, "learning_rate": 7.318908748824083e-06, "loss": 1.35836181640625, "step": 390 }, { "epoch": 0.0037176470588235295, "grad_norm": 7.689293065156316, "learning_rate": 7.412982126058326e-06, "loss": 1.3588577270507813, "step": 395 }, { "epoch": 0.0037647058823529413, "grad_norm": 6.854397598386545, "learning_rate": 7.507055503292568e-06, "loss": 1.3061065673828125, "step": 400 }, { "epoch": 0.003811764705882353, "grad_norm": 10.434241787702728, "learning_rate": 7.601128880526811e-06, "loss": 1.2756640434265136, "step": 405 }, { "epoch": 0.003858823529411765, "grad_norm": 7.465362539410588, "learning_rate": 7.695202257761055e-06, "loss": 1.2324516296386718, "step": 410 }, { "epoch": 0.0039058823529411766, "grad_norm": 7.508451458107453, "learning_rate": 7.789275634995296e-06, "loss": 1.2183186531066894, "step": 415 }, { "epoch": 0.003952941176470588, "grad_norm": 8.305440097354728, "learning_rate": 7.88334901222954e-06, "loss": 1.2068561553955077, "step": 420 }, { "epoch": 0.004, "grad_norm": 8.718446847585891, "learning_rate": 7.977422389463782e-06, "loss": 1.138078212738037, "step": 425 }, { "epoch": 0.004047058823529411, "grad_norm": 8.1201098188189, "learning_rate": 8.071495766698025e-06, "loss": 1.1054238319396972, "step": 430 }, { "epoch": 0.004094117647058824, "grad_norm": 11.796250225222288, "learning_rate": 8.165569143932269e-06, "loss": 1.059467887878418, "step": 435 }, { "epoch": 0.004141176470588235, "grad_norm": 10.076219199455275, "learning_rate": 8.25964252116651e-06, "loss": 1.0318584442138672, "step": 440 }, { "epoch": 0.004188235294117647, "grad_norm": 21.7321475493828, "learning_rate": 8.353715898400754e-06, "loss": 1.016046905517578, "step": 445 }, { "epoch": 0.0042352941176470585, "grad_norm": 6.648929500180892, "learning_rate": 8.447789275634996e-06, "loss": 0.974774169921875, "step": 450 }, { "epoch": 0.004282352941176471, "grad_norm": 8.163619198155834, "learning_rate": 8.541862652869238e-06, "loss": 0.9275302886962891, "step": 455 }, { "epoch": 0.004329411764705882, "grad_norm": 7.726738155066538, "learning_rate": 8.635936030103481e-06, "loss": 0.894963550567627, "step": 460 }, { "epoch": 0.004376470588235294, "grad_norm": 8.296051291844813, "learning_rate": 8.730009407337723e-06, "loss": 0.8325037002563477, "step": 465 }, { "epoch": 0.0044235294117647055, "grad_norm": 7.136323668179335, "learning_rate": 8.824082784571967e-06, "loss": 0.7902743816375732, "step": 470 }, { "epoch": 0.004470588235294118, "grad_norm": 8.651125552354515, "learning_rate": 8.91815616180621e-06, "loss": 0.8135829925537109, "step": 475 }, { "epoch": 0.004517647058823529, "grad_norm": 8.360326931051041, "learning_rate": 9.012229539040452e-06, "loss": 0.738460636138916, "step": 480 }, { "epoch": 0.004564705882352941, "grad_norm": 7.164381859668902, "learning_rate": 9.106302916274696e-06, "loss": 0.6993799209594727, "step": 485 }, { "epoch": 0.0046117647058823525, "grad_norm": 8.346900190760582, "learning_rate": 9.200376293508938e-06, "loss": 0.6906004428863526, "step": 490 }, { "epoch": 0.004658823529411765, "grad_norm": 8.589404527010496, "learning_rate": 9.294449670743181e-06, "loss": 0.6680570602416992, "step": 495 }, { "epoch": 0.004705882352941176, "grad_norm": 7.980014937456095, "learning_rate": 9.388523047977423e-06, "loss": 0.6105616092681885, "step": 500 }, { "epoch": 0.004752941176470588, "grad_norm": 8.58481140928495, "learning_rate": 9.482596425211665e-06, "loss": 0.5839996337890625, "step": 505 }, { "epoch": 0.0048, "grad_norm": 8.090116406561263, "learning_rate": 9.576669802445908e-06, "loss": 0.6143483638763427, "step": 510 }, { "epoch": 0.004847058823529412, "grad_norm": 6.39130795731067, "learning_rate": 9.67074317968015e-06, "loss": 0.5281930923461914, "step": 515 }, { "epoch": 0.004894117647058823, "grad_norm": 6.216770120891524, "learning_rate": 9.764816556914394e-06, "loss": 0.5190860748291015, "step": 520 }, { "epoch": 0.004941176470588235, "grad_norm": 7.88709022452486, "learning_rate": 9.858889934148637e-06, "loss": 0.48416967391967775, "step": 525 }, { "epoch": 0.0049882352941176475, "grad_norm": 6.019621304194523, "learning_rate": 9.952963311382879e-06, "loss": 0.4897456169128418, "step": 530 }, { "epoch": 0.005035294117647059, "grad_norm": 8.359809885662456, "learning_rate": 1.0047036688617123e-05, "loss": 0.4698637008666992, "step": 535 }, { "epoch": 0.005082352941176471, "grad_norm": 6.356619838004564, "learning_rate": 1.0141110065851364e-05, "loss": 0.4461772918701172, "step": 540 }, { "epoch": 0.005129411764705882, "grad_norm": 5.5560652977912754, "learning_rate": 1.0235183443085608e-05, "loss": 0.4109856128692627, "step": 545 }, { "epoch": 0.0051764705882352945, "grad_norm": 5.867851737707349, "learning_rate": 1.0329256820319852e-05, "loss": 0.4038969039916992, "step": 550 }, { "epoch": 0.005223529411764706, "grad_norm": 5.881637028578318, "learning_rate": 1.0423330197554093e-05, "loss": 0.3856362342834473, "step": 555 }, { "epoch": 0.005270588235294118, "grad_norm": 4.565061761011708, "learning_rate": 1.0517403574788337e-05, "loss": 0.41910858154296876, "step": 560 }, { "epoch": 0.005317647058823529, "grad_norm": 5.507765478066968, "learning_rate": 1.0611476952022579e-05, "loss": 0.37532634735107423, "step": 565 }, { "epoch": 0.005364705882352942, "grad_norm": 4.4320026192810476, "learning_rate": 1.0705550329256822e-05, "loss": 0.346097731590271, "step": 570 }, { "epoch": 0.005411764705882353, "grad_norm": 6.878565111495912, "learning_rate": 1.0799623706491064e-05, "loss": 0.34480676651000974, "step": 575 }, { "epoch": 0.005458823529411765, "grad_norm": 4.805540559976007, "learning_rate": 1.0893697083725308e-05, "loss": 0.3245478630065918, "step": 580 }, { "epoch": 0.005505882352941176, "grad_norm": 5.503663359842319, "learning_rate": 1.0987770460959551e-05, "loss": 0.310454797744751, "step": 585 }, { "epoch": 0.005552941176470589, "grad_norm": 10.323584455583957, "learning_rate": 1.1081843838193793e-05, "loss": 0.33109073638916015, "step": 590 }, { "epoch": 0.0056, "grad_norm": 8.470958814664916, "learning_rate": 1.1175917215428033e-05, "loss": 0.3292854309082031, "step": 595 }, { "epoch": 0.005647058823529412, "grad_norm": 4.677638348532707, "learning_rate": 1.1269990592662277e-05, "loss": 0.3163057565689087, "step": 600 }, { "epoch": 0.0056941176470588235, "grad_norm": 5.2420171885669715, "learning_rate": 1.136406396989652e-05, "loss": 0.32653002738952636, "step": 605 }, { "epoch": 0.005741176470588236, "grad_norm": 5.0931181509170536, "learning_rate": 1.1458137347130762e-05, "loss": 0.3196044206619263, "step": 610 }, { "epoch": 0.005788235294117647, "grad_norm": 6.6678128024787595, "learning_rate": 1.1552210724365006e-05, "loss": 0.2994951009750366, "step": 615 }, { "epoch": 0.005835294117647059, "grad_norm": 4.703346684285803, "learning_rate": 1.1646284101599247e-05, "loss": 0.2762923240661621, "step": 620 }, { "epoch": 0.0058823529411764705, "grad_norm": 7.009343872163713, "learning_rate": 1.1740357478833491e-05, "loss": 0.3137629508972168, "step": 625 }, { "epoch": 0.005929411764705883, "grad_norm": 5.143589637280419, "learning_rate": 1.1834430856067733e-05, "loss": 0.28861203193664553, "step": 630 }, { "epoch": 0.005976470588235294, "grad_norm": 4.033071033574523, "learning_rate": 1.1928504233301976e-05, "loss": 0.26896915435791013, "step": 635 }, { "epoch": 0.006023529411764706, "grad_norm": 5.205721918078067, "learning_rate": 1.202257761053622e-05, "loss": 0.26204571723937986, "step": 640 }, { "epoch": 0.0060705882352941175, "grad_norm": 4.206513646943677, "learning_rate": 1.2116650987770462e-05, "loss": 0.2782004356384277, "step": 645 }, { "epoch": 0.00611764705882353, "grad_norm": 5.033392414302604, "learning_rate": 1.2210724365004705e-05, "loss": 0.2872858285903931, "step": 650 }, { "epoch": 0.006164705882352941, "grad_norm": 4.824901385088235, "learning_rate": 1.2304797742238947e-05, "loss": 0.2717441558837891, "step": 655 }, { "epoch": 0.006211764705882353, "grad_norm": 4.404880382625993, "learning_rate": 1.239887111947319e-05, "loss": 0.3102854251861572, "step": 660 }, { "epoch": 0.006258823529411765, "grad_norm": 5.108815536108673, "learning_rate": 1.2492944496707433e-05, "loss": 0.25733373165130613, "step": 665 }, { "epoch": 0.006305882352941177, "grad_norm": 3.705067848794147, "learning_rate": 1.2587017873941676e-05, "loss": 0.25894317626953123, "step": 670 }, { "epoch": 0.006352941176470588, "grad_norm": 5.898836236346231, "learning_rate": 1.268109125117592e-05, "loss": 0.27397747039794923, "step": 675 }, { "epoch": 0.0064, "grad_norm": 5.264247482513141, "learning_rate": 1.2775164628410161e-05, "loss": 0.24723358154296876, "step": 680 }, { "epoch": 0.006447058823529412, "grad_norm": 4.2897833840685085, "learning_rate": 1.2869238005644405e-05, "loss": 0.23966381549835206, "step": 685 }, { "epoch": 0.006494117647058824, "grad_norm": 3.888594574544066, "learning_rate": 1.2963311382878647e-05, "loss": 0.2425478219985962, "step": 690 }, { "epoch": 0.006541176470588235, "grad_norm": 4.075910742579148, "learning_rate": 1.305738476011289e-05, "loss": 0.23977236747741698, "step": 695 }, { "epoch": 0.006588235294117647, "grad_norm": 4.205467265052762, "learning_rate": 1.3151458137347132e-05, "loss": 0.2362279176712036, "step": 700 }, { "epoch": 0.006635294117647059, "grad_norm": 3.658574401533278, "learning_rate": 1.3245531514581376e-05, "loss": 0.23093066215515137, "step": 705 }, { "epoch": 0.006682352941176471, "grad_norm": 6.415685698934081, "learning_rate": 1.3339604891815616e-05, "loss": 0.24186139106750487, "step": 710 }, { "epoch": 0.006729411764705882, "grad_norm": 3.725926512717292, "learning_rate": 1.343367826904986e-05, "loss": 0.24680876731872559, "step": 715 }, { "epoch": 0.006776470588235294, "grad_norm": 4.082293202060228, "learning_rate": 1.3527751646284101e-05, "loss": 0.2409443140029907, "step": 720 }, { "epoch": 0.006823529411764706, "grad_norm": 3.6248193366105093, "learning_rate": 1.3621825023518345e-05, "loss": 0.23628573417663573, "step": 725 }, { "epoch": 0.006870588235294118, "grad_norm": 4.29348049622931, "learning_rate": 1.3715898400752588e-05, "loss": 0.23737053871154784, "step": 730 }, { "epoch": 0.006917647058823529, "grad_norm": 4.314835565924856, "learning_rate": 1.380997177798683e-05, "loss": 0.2653051853179932, "step": 735 }, { "epoch": 0.0069647058823529414, "grad_norm": 3.450148752570623, "learning_rate": 1.3904045155221074e-05, "loss": 0.2331615209579468, "step": 740 }, { "epoch": 0.007011764705882353, "grad_norm": 3.448970199137868, "learning_rate": 1.3998118532455316e-05, "loss": 0.21166086196899414, "step": 745 }, { "epoch": 0.007058823529411765, "grad_norm": 3.4706534886432103, "learning_rate": 1.4092191909689559e-05, "loss": 0.22688145637512208, "step": 750 }, { "epoch": 0.007105882352941176, "grad_norm": 4.037055950386718, "learning_rate": 1.4186265286923801e-05, "loss": 0.2162724733352661, "step": 755 }, { "epoch": 0.0071529411764705885, "grad_norm": 3.3980630581444724, "learning_rate": 1.4280338664158044e-05, "loss": 0.20278463363647461, "step": 760 }, { "epoch": 0.0072, "grad_norm": 3.9386303651581582, "learning_rate": 1.4374412041392288e-05, "loss": 0.21443042755126954, "step": 765 }, { "epoch": 0.007247058823529412, "grad_norm": 5.558411211868726, "learning_rate": 1.446848541862653e-05, "loss": 0.22567338943481446, "step": 770 }, { "epoch": 0.007294117647058823, "grad_norm": 3.8793793232970364, "learning_rate": 1.4562558795860773e-05, "loss": 0.23990893363952637, "step": 775 }, { "epoch": 0.0073411764705882355, "grad_norm": 3.7600646300680753, "learning_rate": 1.4656632173095015e-05, "loss": 0.20938427448272706, "step": 780 }, { "epoch": 0.007388235294117647, "grad_norm": 3.5321899266622463, "learning_rate": 1.4750705550329259e-05, "loss": 0.21227388381958007, "step": 785 }, { "epoch": 0.007435294117647059, "grad_norm": 4.380341915017975, "learning_rate": 1.48447789275635e-05, "loss": 0.20908761024475098, "step": 790 }, { "epoch": 0.00748235294117647, "grad_norm": 4.155770451824065, "learning_rate": 1.4938852304797744e-05, "loss": 0.23477721214294434, "step": 795 }, { "epoch": 0.0075294117647058826, "grad_norm": 3.5618428780543003, "learning_rate": 1.5032925682031988e-05, "loss": 0.20050110816955566, "step": 800 }, { "epoch": 0.007576470588235294, "grad_norm": 3.2362377159426754, "learning_rate": 1.512699905926623e-05, "loss": 0.20849015712738037, "step": 805 }, { "epoch": 0.007623529411764706, "grad_norm": 5.332034190060947, "learning_rate": 1.5221072436500473e-05, "loss": 0.20781683921813965, "step": 810 }, { "epoch": 0.007670588235294117, "grad_norm": 4.73433498981794, "learning_rate": 1.5315145813734715e-05, "loss": 0.22815027236938476, "step": 815 }, { "epoch": 0.00771764705882353, "grad_norm": 3.7680308233408035, "learning_rate": 1.540921919096896e-05, "loss": 0.21748275756835939, "step": 820 }, { "epoch": 0.007764705882352941, "grad_norm": 3.622885152679587, "learning_rate": 1.55032925682032e-05, "loss": 0.21869773864746095, "step": 825 }, { "epoch": 0.007811764705882353, "grad_norm": 4.375942759968506, "learning_rate": 1.5597365945437442e-05, "loss": 0.2289440155029297, "step": 830 }, { "epoch": 0.007858823529411765, "grad_norm": 3.953925161218553, "learning_rate": 1.5691439322671686e-05, "loss": 0.21241753101348876, "step": 835 }, { "epoch": 0.007905882352941176, "grad_norm": 3.7137252884302296, "learning_rate": 1.5785512699905926e-05, "loss": 0.21971573829650878, "step": 840 }, { "epoch": 0.007952941176470588, "grad_norm": 3.2084093891947916, "learning_rate": 1.587958607714017e-05, "loss": 0.20570197105407714, "step": 845 }, { "epoch": 0.008, "grad_norm": 3.4735387570770717, "learning_rate": 1.5973659454374413e-05, "loss": 0.1984121561050415, "step": 850 }, { "epoch": 0.008047058823529412, "grad_norm": 3.450341382650347, "learning_rate": 1.6067732831608656e-05, "loss": 0.2120612382888794, "step": 855 }, { "epoch": 0.008094117647058823, "grad_norm": 2.7852695374860645, "learning_rate": 1.61618062088429e-05, "loss": 0.20537757873535156, "step": 860 }, { "epoch": 0.008141176470588235, "grad_norm": 3.74221041980676, "learning_rate": 1.625587958607714e-05, "loss": 0.18836572170257568, "step": 865 }, { "epoch": 0.008188235294117647, "grad_norm": 3.4147977125752633, "learning_rate": 1.6349952963311384e-05, "loss": 0.19526255130767822, "step": 870 }, { "epoch": 0.00823529411764706, "grad_norm": 2.7280883775745925, "learning_rate": 1.6444026340545627e-05, "loss": 0.18625023365020751, "step": 875 }, { "epoch": 0.00828235294117647, "grad_norm": 3.45867959242785, "learning_rate": 1.653809971777987e-05, "loss": 0.19525468349456787, "step": 880 }, { "epoch": 0.008329411764705882, "grad_norm": 4.04262575707035, "learning_rate": 1.663217309501411e-05, "loss": 0.1962231755256653, "step": 885 }, { "epoch": 0.008376470588235294, "grad_norm": 3.3038308408263886, "learning_rate": 1.6726246472248354e-05, "loss": 0.1885596990585327, "step": 890 }, { "epoch": 0.008423529411764706, "grad_norm": 2.869983602562244, "learning_rate": 1.6820319849482598e-05, "loss": 0.17981960773468017, "step": 895 }, { "epoch": 0.008470588235294117, "grad_norm": 3.4778301696020137, "learning_rate": 1.691439322671684e-05, "loss": 0.19267985820770264, "step": 900 }, { "epoch": 0.008517647058823529, "grad_norm": 3.189412777600385, "learning_rate": 1.7008466603951085e-05, "loss": 0.20353496074676514, "step": 905 }, { "epoch": 0.008564705882352941, "grad_norm": 3.526581810747813, "learning_rate": 1.7102539981185325e-05, "loss": 0.19697890281677247, "step": 910 }, { "epoch": 0.008611764705882353, "grad_norm": 2.9402275069473194, "learning_rate": 1.719661335841957e-05, "loss": 0.19366905689239503, "step": 915 }, { "epoch": 0.008658823529411764, "grad_norm": 2.75656524791094, "learning_rate": 1.7290686735653812e-05, "loss": 0.1810498595237732, "step": 920 }, { "epoch": 0.008705882352941176, "grad_norm": 2.802555001826997, "learning_rate": 1.7384760112888056e-05, "loss": 0.20423536300659179, "step": 925 }, { "epoch": 0.008752941176470588, "grad_norm": 4.198044168287404, "learning_rate": 1.74788334901223e-05, "loss": 0.1964618444442749, "step": 930 }, { "epoch": 0.0088, "grad_norm": 3.435531730612208, "learning_rate": 1.757290686735654e-05, "loss": 0.18173363208770751, "step": 935 }, { "epoch": 0.008847058823529411, "grad_norm": 3.22624338942102, "learning_rate": 1.766698024459078e-05, "loss": 0.1997997283935547, "step": 940 }, { "epoch": 0.008894117647058823, "grad_norm": 2.999549787257119, "learning_rate": 1.7761053621825023e-05, "loss": 0.1758178949356079, "step": 945 }, { "epoch": 0.008941176470588235, "grad_norm": 3.429906617170976, "learning_rate": 1.7855126999059267e-05, "loss": 0.1946726083755493, "step": 950 }, { "epoch": 0.008988235294117648, "grad_norm": 2.832652617564554, "learning_rate": 1.794920037629351e-05, "loss": 0.18196818828582764, "step": 955 }, { "epoch": 0.009035294117647058, "grad_norm": 3.0140394775456976, "learning_rate": 1.8043273753527754e-05, "loss": 0.1954061508178711, "step": 960 }, { "epoch": 0.00908235294117647, "grad_norm": 3.450348368452861, "learning_rate": 1.8137347130761994e-05, "loss": 0.19518197774887086, "step": 965 }, { "epoch": 0.009129411764705882, "grad_norm": 3.064805905219883, "learning_rate": 1.8231420507996237e-05, "loss": 0.17740739583969117, "step": 970 }, { "epoch": 0.009176470588235295, "grad_norm": 3.6920975013252892, "learning_rate": 1.832549388523048e-05, "loss": 0.20130691528320313, "step": 975 }, { "epoch": 0.009223529411764705, "grad_norm": 2.7151573977973817, "learning_rate": 1.8419567262464724e-05, "loss": 0.19658136367797852, "step": 980 }, { "epoch": 0.009270588235294117, "grad_norm": 2.922860891581976, "learning_rate": 1.8513640639698968e-05, "loss": 0.15995209217071532, "step": 985 }, { "epoch": 0.00931764705882353, "grad_norm": 2.7397978847265687, "learning_rate": 1.8607714016933208e-05, "loss": 0.19958760738372802, "step": 990 }, { "epoch": 0.009364705882352942, "grad_norm": 2.9394944253058046, "learning_rate": 1.870178739416745e-05, "loss": 0.1790924072265625, "step": 995 }, { "epoch": 0.009411764705882352, "grad_norm": 3.2321570606554286, "learning_rate": 1.8795860771401695e-05, "loss": 0.18339781761169432, "step": 1000 }, { "epoch": 0.009458823529411764, "grad_norm": 3.3643226318141615, "learning_rate": 1.888993414863594e-05, "loss": 0.1998583436012268, "step": 1005 }, { "epoch": 0.009505882352941177, "grad_norm": 2.5087447154998412, "learning_rate": 1.898400752587018e-05, "loss": 0.17512025833129882, "step": 1010 }, { "epoch": 0.009552941176470589, "grad_norm": 3.1694377464991113, "learning_rate": 1.9078080903104422e-05, "loss": 0.19672645330429078, "step": 1015 }, { "epoch": 0.0096, "grad_norm": 3.069119660614403, "learning_rate": 1.9172154280338666e-05, "loss": 0.18875232934951783, "step": 1020 }, { "epoch": 0.009647058823529411, "grad_norm": 3.138168916974063, "learning_rate": 1.926622765757291e-05, "loss": 0.1783250093460083, "step": 1025 }, { "epoch": 0.009694117647058824, "grad_norm": 3.310843157496055, "learning_rate": 1.9360301034807153e-05, "loss": 0.18879700899124147, "step": 1030 }, { "epoch": 0.009741176470588236, "grad_norm": 2.484811345213728, "learning_rate": 1.9454374412041393e-05, "loss": 0.19757485389709473, "step": 1035 }, { "epoch": 0.009788235294117646, "grad_norm": 2.4962995019987892, "learning_rate": 1.9548447789275637e-05, "loss": 0.1860889196395874, "step": 1040 }, { "epoch": 0.009835294117647058, "grad_norm": 2.668627811508129, "learning_rate": 1.964252116650988e-05, "loss": 0.1763948917388916, "step": 1045 }, { "epoch": 0.00988235294117647, "grad_norm": 2.834910617741788, "learning_rate": 1.9736594543744124e-05, "loss": 0.19723875522613527, "step": 1050 }, { "epoch": 0.009929411764705883, "grad_norm": 2.186266574226311, "learning_rate": 1.9830667920978364e-05, "loss": 0.18811073303222656, "step": 1055 }, { "epoch": 0.009976470588235295, "grad_norm": 4.1455019070778425, "learning_rate": 1.9924741298212607e-05, "loss": 0.17704455852508544, "step": 1060 }, { "epoch": 0.010023529411764705, "grad_norm": 2.5019761929220428, "learning_rate": 1.9990599294427767e-05, "loss": 0.18313865661621093, "step": 1065 }, { "epoch": 0.010070588235294118, "grad_norm": 3.2813936337830096, "learning_rate": 1.9943793799874162e-05, "loss": 0.18927063941955566, "step": 1070 }, { "epoch": 0.01011764705882353, "grad_norm": 2.614089389341326, "learning_rate": 1.989731554093587e-05, "loss": 0.1693912386894226, "step": 1075 }, { "epoch": 0.010164705882352942, "grad_norm": 2.537035390382222, "learning_rate": 1.9851160722244667e-05, "loss": 0.17415382862091064, "step": 1080 }, { "epoch": 0.010211764705882352, "grad_norm": 2.2625053165322124, "learning_rate": 1.9805325609775393e-05, "loss": 0.169755220413208, "step": 1085 }, { "epoch": 0.010258823529411765, "grad_norm": 2.324694769256803, "learning_rate": 1.97598065295771e-05, "loss": 0.162303364276886, "step": 1090 }, { "epoch": 0.010305882352941177, "grad_norm": 2.889736248442711, "learning_rate": 1.9714599866536054e-05, "loss": 0.16836907863616943, "step": 1095 }, { "epoch": 0.010352941176470589, "grad_norm": 2.4978555264530056, "learning_rate": 1.9669702063169828e-05, "loss": 0.1811073899269104, "step": 1100 }, { "epoch": 0.0104, "grad_norm": 3.2567638911246473, "learning_rate": 1.9625109618451356e-05, "loss": 0.17686843872070312, "step": 1105 }, { "epoch": 0.010447058823529412, "grad_norm": 2.1793015718443605, "learning_rate": 1.9580819086662305e-05, "loss": 0.16917933225631715, "step": 1110 }, { "epoch": 0.010494117647058824, "grad_norm": 2.606066145915245, "learning_rate": 1.953682707627466e-05, "loss": 0.17768266201019287, "step": 1115 }, { "epoch": 0.010541176470588236, "grad_norm": 2.114030698716573, "learning_rate": 1.9493130248859906e-05, "loss": 0.14959053993225097, "step": 1120 }, { "epoch": 0.010588235294117647, "grad_norm": 4.193392783638332, "learning_rate": 1.9449725318024913e-05, "loss": 0.17837367057800294, "step": 1125 }, { "epoch": 0.010635294117647059, "grad_norm": 2.962423625202895, "learning_rate": 1.9406609048373746e-05, "loss": 0.14975569248199463, "step": 1130 }, { "epoch": 0.010682352941176471, "grad_norm": 2.1377365332267844, "learning_rate": 1.9363778254494694e-05, "loss": 0.15483636856079103, "step": 1135 }, { "epoch": 0.010729411764705883, "grad_norm": 2.560180268440705, "learning_rate": 1.9321229799971814e-05, "loss": 0.14960073232650756, "step": 1140 }, { "epoch": 0.010776470588235294, "grad_norm": 2.871319669978186, "learning_rate": 1.9278960596420174e-05, "loss": 0.1628350019454956, "step": 1145 }, { "epoch": 0.010823529411764706, "grad_norm": 1.9399105026699568, "learning_rate": 1.9236967602544336e-05, "loss": 0.169977605342865, "step": 1150 }, { "epoch": 0.010870588235294118, "grad_norm": 2.7391141650108217, "learning_rate": 1.9195247823219182e-05, "loss": 0.16972997188568115, "step": 1155 }, { "epoch": 0.01091764705882353, "grad_norm": 2.454659634924606, "learning_rate": 1.9153798308592653e-05, "loss": 0.1535276174545288, "step": 1160 }, { "epoch": 0.01096470588235294, "grad_norm": 3.319307421216668, "learning_rate": 1.9112616153209674e-05, "loss": 0.16198982000350953, "step": 1165 }, { "epoch": 0.011011764705882353, "grad_norm": 2.4158075803530723, "learning_rate": 1.9071698495156705e-05, "loss": 0.15071310997009277, "step": 1170 }, { "epoch": 0.011058823529411765, "grad_norm": 2.5459663339023315, "learning_rate": 1.903104251522639e-05, "loss": 0.15523574352264405, "step": 1175 }, { "epoch": 0.011105882352941177, "grad_norm": 2.4043466925951114, "learning_rate": 1.8990645436101656e-05, "loss": 0.14850263595581054, "step": 1180 }, { "epoch": 0.011152941176470588, "grad_norm": 2.7895336894447826, "learning_rate": 1.895050452155883e-05, "loss": 0.17481403350830077, "step": 1185 }, { "epoch": 0.0112, "grad_norm": 2.0165505429144956, "learning_rate": 1.8910617075689185e-05, "loss": 0.15263593196868896, "step": 1190 }, { "epoch": 0.011247058823529412, "grad_norm": 2.6612750095354047, "learning_rate": 1.887098044213843e-05, "loss": 0.17912554740905762, "step": 1195 }, { "epoch": 0.011294117647058824, "grad_norm": 3.665378603779818, "learning_rate": 1.8831592003363682e-05, "loss": 0.1545348882675171, "step": 1200 }, { "epoch": 0.011341176470588235, "grad_norm": 2.285968734978304, "learning_rate": 1.879244917990739e-05, "loss": 0.1692918658256531, "step": 1205 }, { "epoch": 0.011388235294117647, "grad_norm": 1.700094128549152, "learning_rate": 1.8753549429687844e-05, "loss": 0.1353761672973633, "step": 1210 }, { "epoch": 0.011435294117647059, "grad_norm": 2.024623404009247, "learning_rate": 1.8714890247305715e-05, "loss": 0.17459824085235595, "step": 1215 }, { "epoch": 0.011482352941176471, "grad_norm": 2.8855930734646034, "learning_rate": 1.86764691633663e-05, "loss": 0.14493269920349122, "step": 1220 }, { "epoch": 0.011529411764705882, "grad_norm": 2.422117417196261, "learning_rate": 1.8638283743816993e-05, "loss": 0.1665704846382141, "step": 1225 }, { "epoch": 0.011576470588235294, "grad_norm": 1.8481058906048562, "learning_rate": 1.8600331589299582e-05, "loss": 0.1521193265914917, "step": 1230 }, { "epoch": 0.011623529411764706, "grad_norm": 2.1486134608847736, "learning_rate": 1.856261033451703e-05, "loss": 0.1488564729690552, "step": 1235 }, { "epoch": 0.011670588235294118, "grad_norm": 2.735886768705446, "learning_rate": 1.852511764761428e-05, "loss": 0.1591731309890747, "step": 1240 }, { "epoch": 0.011717647058823529, "grad_norm": 2.3257845410098605, "learning_rate": 1.848785122957281e-05, "loss": 0.14902873039245607, "step": 1245 }, { "epoch": 0.011764705882352941, "grad_norm": 2.5686293613193354, "learning_rate": 1.845080881361848e-05, "loss": 0.1463287353515625, "step": 1250 }, { "epoch": 0.011811764705882353, "grad_norm": 1.9437722055697688, "learning_rate": 1.8413988164642436e-05, "loss": 0.1393170952796936, "step": 1255 }, { "epoch": 0.011858823529411765, "grad_norm": 1.7630262738200333, "learning_rate": 1.8377387078634625e-05, "loss": 0.146123206615448, "step": 1260 }, { "epoch": 0.011905882352941176, "grad_norm": 1.7864969247950897, "learning_rate": 1.834100338212965e-05, "loss": 0.1451685070991516, "step": 1265 }, { "epoch": 0.011952941176470588, "grad_norm": 1.700603910950616, "learning_rate": 1.830483493166471e-05, "loss": 0.13938491344451903, "step": 1270 }, { "epoch": 0.012, "grad_norm": 2.5139523083660436, "learning_rate": 1.8268879613249167e-05, "loss": 0.15424048900604248, "step": 1275 }, { "epoch": 0.012047058823529412, "grad_norm": 2.7072920865241015, "learning_rate": 1.8233135341845566e-05, "loss": 0.13673725128173828, "step": 1280 }, { "epoch": 0.012094117647058823, "grad_norm": 1.6773387712274581, "learning_rate": 1.8197600060861818e-05, "loss": 0.1516347885131836, "step": 1285 }, { "epoch": 0.012141176470588235, "grad_norm": 2.117061944730157, "learning_rate": 1.816227174165413e-05, "loss": 0.14654622077941895, "step": 1290 }, { "epoch": 0.012188235294117647, "grad_norm": 2.389733299107014, "learning_rate": 1.8127148383040638e-05, "loss": 0.13459618091583253, "step": 1295 }, { "epoch": 0.01223529411764706, "grad_norm": 2.609098544788935, "learning_rate": 1.809222801082523e-05, "loss": 0.13773717880249023, "step": 1300 }, { "epoch": 0.01228235294117647, "grad_norm": 2.3904306857766175, "learning_rate": 1.805750867733149e-05, "loss": 0.14986448287963866, "step": 1305 }, { "epoch": 0.012329411764705882, "grad_norm": 2.227519648218531, "learning_rate": 1.8022988460946406e-05, "loss": 0.13927145004272462, "step": 1310 }, { "epoch": 0.012376470588235294, "grad_norm": 2.2060863675463422, "learning_rate": 1.798866546567368e-05, "loss": 0.1512730360031128, "step": 1315 }, { "epoch": 0.012423529411764707, "grad_norm": 2.1469683564445377, "learning_rate": 1.795453782069631e-05, "loss": 0.1320899486541748, "step": 1320 }, { "epoch": 0.012470588235294117, "grad_norm": 3.389579411337994, "learning_rate": 1.79206036799483e-05, "loss": 0.14841251373291015, "step": 1325 }, { "epoch": 0.01251764705882353, "grad_norm": 2.3088044924503968, "learning_rate": 1.7886861221695204e-05, "loss": 0.14846515655517578, "step": 1330 }, { "epoch": 0.012564705882352941, "grad_norm": 2.1668901079873732, "learning_rate": 1.785330864812339e-05, "loss": 0.15390207767486572, "step": 1335 }, { "epoch": 0.012611764705882354, "grad_norm": 2.2280639876265145, "learning_rate": 1.781994418493768e-05, "loss": 0.15720953941345214, "step": 1340 }, { "epoch": 0.012658823529411764, "grad_norm": 2.1612794759695655, "learning_rate": 1.7786766080967267e-05, "loss": 0.1596134901046753, "step": 1345 }, { "epoch": 0.012705882352941176, "grad_norm": 1.8422023238696856, "learning_rate": 1.7753772607779658e-05, "loss": 0.14748339653015136, "step": 1350 }, { "epoch": 0.012752941176470588, "grad_norm": 2.0193484480117307, "learning_rate": 1.772096205930248e-05, "loss": 0.14216600656509398, "step": 1355 }, { "epoch": 0.0128, "grad_norm": 2.371751314337639, "learning_rate": 1.7688332751452925e-05, "loss": 0.12158293724060058, "step": 1360 }, { "epoch": 0.012847058823529411, "grad_norm": 2.1021076370682654, "learning_rate": 1.7655883021774708e-05, "loss": 0.139100980758667, "step": 1365 }, { "epoch": 0.012894117647058823, "grad_norm": 2.042947636704815, "learning_rate": 1.762361122908228e-05, "loss": 0.13336633443832396, "step": 1370 }, { "epoch": 0.012941176470588235, "grad_norm": 2.4161233295696514, "learning_rate": 1.7591515753112186e-05, "loss": 0.13596153259277344, "step": 1375 }, { "epoch": 0.012988235294117648, "grad_norm": 1.9079967362971926, "learning_rate": 1.7559594994181416e-05, "loss": 0.14415372610092164, "step": 1380 }, { "epoch": 0.013035294117647058, "grad_norm": 1.7859884651014692, "learning_rate": 1.752784737285248e-05, "loss": 0.14693795442581176, "step": 1385 }, { "epoch": 0.01308235294117647, "grad_norm": 2.035038288441927, "learning_rate": 1.74962713296052e-05, "loss": 0.12170298099517822, "step": 1390 }, { "epoch": 0.013129411764705883, "grad_norm": 1.8535461739654404, "learning_rate": 1.7464865324514914e-05, "loss": 0.13737235069274903, "step": 1395 }, { "epoch": 0.013176470588235295, "grad_norm": 1.5935152387491371, "learning_rate": 1.743362783693705e-05, "loss": 0.13401331901550292, "step": 1400 }, { "epoch": 0.013223529411764705, "grad_norm": 2.151635963500009, "learning_rate": 1.7402557365197877e-05, "loss": 0.1458134174346924, "step": 1405 }, { "epoch": 0.013270588235294117, "grad_norm": 2.43548637636356, "learning_rate": 1.7371652426291296e-05, "loss": 0.14991708993911743, "step": 1410 }, { "epoch": 0.01331764705882353, "grad_norm": 1.9195559530763338, "learning_rate": 1.734091155558152e-05, "loss": 0.13590999841690063, "step": 1415 }, { "epoch": 0.013364705882352942, "grad_norm": 1.5999300382127526, "learning_rate": 1.7310333306511546e-05, "loss": 0.13021808862686157, "step": 1420 }, { "epoch": 0.013411764705882352, "grad_norm": 2.0283570778820237, "learning_rate": 1.7279916250317234e-05, "loss": 0.12960344552993774, "step": 1425 }, { "epoch": 0.013458823529411764, "grad_norm": 2.256053881346782, "learning_rate": 1.7249658975746915e-05, "loss": 0.14209675788879395, "step": 1430 }, { "epoch": 0.013505882352941177, "grad_norm": 1.6969441572041304, "learning_rate": 1.7219560088786384e-05, "loss": 0.13819745779037476, "step": 1435 }, { "epoch": 0.013552941176470589, "grad_norm": 2.5480728159752717, "learning_rate": 1.7189618212389122e-05, "loss": 0.13375790119171144, "step": 1440 }, { "epoch": 0.0136, "grad_norm": 2.056255172276664, "learning_rate": 1.715983198621169e-05, "loss": 0.1272059440612793, "step": 1445 }, { "epoch": 0.013647058823529411, "grad_norm": 1.9916957980545842, "learning_rate": 1.7130200066354146e-05, "loss": 0.13010939359664916, "step": 1450 }, { "epoch": 0.013694117647058824, "grad_norm": 1.5813447450482496, "learning_rate": 1.710072112510535e-05, "loss": 0.11628469228744506, "step": 1455 }, { "epoch": 0.013741176470588236, "grad_norm": 1.7401011417606702, "learning_rate": 1.707139385069313e-05, "loss": 0.12206790447235108, "step": 1460 }, { "epoch": 0.013788235294117646, "grad_norm": 1.785820375632923, "learning_rate": 1.7042216947039062e-05, "loss": 0.13443735837936402, "step": 1465 }, { "epoch": 0.013835294117647058, "grad_norm": 2.010483269500242, "learning_rate": 1.7013189133517924e-05, "loss": 0.1345515012741089, "step": 1470 }, { "epoch": 0.01388235294117647, "grad_norm": 2.0973661419968392, "learning_rate": 1.6984309144721597e-05, "loss": 0.12144393920898437, "step": 1475 }, { "epoch": 0.013929411764705883, "grad_norm": 1.8058988001398388, "learning_rate": 1.695557573022737e-05, "loss": 0.12788152694702148, "step": 1480 }, { "epoch": 0.013976470588235293, "grad_norm": 2.1193214845720485, "learning_rate": 1.692698765437055e-05, "loss": 0.13222699165344237, "step": 1485 }, { "epoch": 0.014023529411764706, "grad_norm": 2.4623848108503927, "learning_rate": 1.6898543696021262e-05, "loss": 0.1247570276260376, "step": 1490 }, { "epoch": 0.014070588235294118, "grad_norm": 2.6008544344544555, "learning_rate": 1.6870242648365366e-05, "loss": 0.1385913610458374, "step": 1495 }, { "epoch": 0.01411764705882353, "grad_norm": 1.611685881802938, "learning_rate": 1.68420833186894e-05, "loss": 0.1261878490447998, "step": 1500 }, { "epoch": 0.01416470588235294, "grad_norm": 1.8915013439654755, "learning_rate": 1.681406452816947e-05, "loss": 0.12048056125640869, "step": 1505 }, { "epoch": 0.014211764705882353, "grad_norm": 2.1100810903019704, "learning_rate": 1.6786185111663948e-05, "loss": 0.13202967643737792, "step": 1510 }, { "epoch": 0.014258823529411765, "grad_norm": 1.729498819213767, "learning_rate": 1.675844391750999e-05, "loss": 0.11515072584152222, "step": 1515 }, { "epoch": 0.014305882352941177, "grad_norm": 3.7886701155558495, "learning_rate": 1.6730839807323717e-05, "loss": 0.1327887535095215, "step": 1520 }, { "epoch": 0.014352941176470587, "grad_norm": 1.7382807572418182, "learning_rate": 1.6703371655803984e-05, "loss": 0.11488265991210937, "step": 1525 }, { "epoch": 0.0144, "grad_norm": 1.56988660391372, "learning_rate": 1.6676038350539745e-05, "loss": 0.11504762172698975, "step": 1530 }, { "epoch": 0.014447058823529412, "grad_norm": 1.9146519134619961, "learning_rate": 1.6648838791820785e-05, "loss": 0.12843022346496583, "step": 1535 }, { "epoch": 0.014494117647058824, "grad_norm": 1.7894825630492706, "learning_rate": 1.6621771892451912e-05, "loss": 0.11256697177886962, "step": 1540 }, { "epoch": 0.014541176470588234, "grad_norm": 2.09449764299496, "learning_rate": 1.659483657757043e-05, "loss": 0.11211121082305908, "step": 1545 }, { "epoch": 0.014588235294117647, "grad_norm": 3.39730364952003, "learning_rate": 1.6568031784466855e-05, "loss": 0.13079502582550048, "step": 1550 }, { "epoch": 0.014635294117647059, "grad_norm": 1.603028601664894, "learning_rate": 1.6541356462408808e-05, "loss": 0.11661988496780396, "step": 1555 }, { "epoch": 0.014682352941176471, "grad_norm": 2.24417492899237, "learning_rate": 1.651480957246802e-05, "loss": 0.122049880027771, "step": 1560 }, { "epoch": 0.014729411764705881, "grad_norm": 1.700139140733156, "learning_rate": 1.648839008735038e-05, "loss": 0.12061234712600707, "step": 1565 }, { "epoch": 0.014776470588235294, "grad_norm": 2.0291979186911178, "learning_rate": 1.6462096991228965e-05, "loss": 0.1308051109313965, "step": 1570 }, { "epoch": 0.014823529411764706, "grad_norm": 2.094384649128011, "learning_rate": 1.643592927957998e-05, "loss": 0.11727795600891114, "step": 1575 }, { "epoch": 0.014870588235294118, "grad_norm": 2.3189941397140976, "learning_rate": 1.6409885959021585e-05, "loss": 0.11803874969482422, "step": 1580 }, { "epoch": 0.01491764705882353, "grad_norm": 1.7008258929496523, "learning_rate": 1.6383966047155477e-05, "loss": 0.11448423862457276, "step": 1585 }, { "epoch": 0.01496470588235294, "grad_norm": 1.8524275749280519, "learning_rate": 1.6358168572411267e-05, "loss": 0.11561713218688965, "step": 1590 }, { "epoch": 0.015011764705882353, "grad_norm": 2.0089746521950365, "learning_rate": 1.6332492573893494e-05, "loss": 0.12899856567382811, "step": 1595 }, { "epoch": 0.015058823529411765, "grad_norm": 1.6372272638816987, "learning_rate": 1.6306937101231326e-05, "loss": 0.1186557412147522, "step": 1600 }, { "epoch": 0.015105882352941177, "grad_norm": 1.6819292091815914, "learning_rate": 1.6281501214430788e-05, "loss": 0.1167655348777771, "step": 1605 }, { "epoch": 0.015152941176470588, "grad_norm": 1.4813987559584962, "learning_rate": 1.625618398372955e-05, "loss": 0.10256211757659912, "step": 1610 }, { "epoch": 0.0152, "grad_norm": 3.11627642465331, "learning_rate": 1.6230984489454177e-05, "loss": 0.1168405532836914, "step": 1615 }, { "epoch": 0.015247058823529412, "grad_norm": 1.6305863392377584, "learning_rate": 1.6205901821879822e-05, "loss": 0.12163922786712647, "step": 1620 }, { "epoch": 0.015294117647058824, "grad_norm": 1.6888720263467, "learning_rate": 1.6180935081092263e-05, "loss": 0.1271350145339966, "step": 1625 }, { "epoch": 0.015341176470588235, "grad_norm": 1.783047572537791, "learning_rate": 1.6156083376852313e-05, "loss": 0.09713532924652099, "step": 1630 }, { "epoch": 0.015388235294117647, "grad_norm": 1.8342826676208241, "learning_rate": 1.613134582846249e-05, "loss": 0.10892043113708497, "step": 1635 }, { "epoch": 0.01543529411764706, "grad_norm": 2.001125050212917, "learning_rate": 1.610672156463592e-05, "loss": 0.11874353885650635, "step": 1640 }, { "epoch": 0.015482352941176471, "grad_norm": 1.992124072774406, "learning_rate": 1.6082209723367456e-05, "loss": 0.1047966718673706, "step": 1645 }, { "epoch": 0.015529411764705882, "grad_norm": 2.5717587725682627, "learning_rate": 1.6057809451806922e-05, "loss": 0.11907479763031006, "step": 1650 }, { "epoch": 0.015576470588235294, "grad_norm": 2.4214577273853033, "learning_rate": 1.6033519906134495e-05, "loss": 0.12749476432800294, "step": 1655 }, { "epoch": 0.015623529411764706, "grad_norm": 2.1107349426862565, "learning_rate": 1.600934025143813e-05, "loss": 0.10260875225067138, "step": 1660 }, { "epoch": 0.015670588235294117, "grad_norm": 2.4206371847674695, "learning_rate": 1.598526966159302e-05, "loss": 0.11826019287109375, "step": 1665 }, { "epoch": 0.01571764705882353, "grad_norm": 1.6265079134976117, "learning_rate": 1.596130731914305e-05, "loss": 0.11336181163787842, "step": 1670 }, { "epoch": 0.01576470588235294, "grad_norm": 1.4937171102252556, "learning_rate": 1.5937452415184193e-05, "loss": 0.10404456853866577, "step": 1675 }, { "epoch": 0.01581176470588235, "grad_norm": 1.6629140932079962, "learning_rate": 1.5913704149249825e-05, "loss": 0.11489067077636719, "step": 1680 }, { "epoch": 0.015858823529411765, "grad_norm": 1.8912211979174147, "learning_rate": 1.58900617291979e-05, "loss": 0.11047827005386353, "step": 1685 }, { "epoch": 0.015905882352941176, "grad_norm": 3.770839691587118, "learning_rate": 1.5866524371099974e-05, "loss": 0.11287771463394165, "step": 1690 }, { "epoch": 0.01595294117647059, "grad_norm": 12.441239635241352, "learning_rate": 1.5843091299132026e-05, "loss": 0.11089942455291749, "step": 1695 }, { "epoch": 0.016, "grad_norm": 1.5727446267702274, "learning_rate": 1.5819761745467067e-05, "loss": 0.11765661239624023, "step": 1700 }, { "epoch": 0.01604705882352941, "grad_norm": 2.1761974885626727, "learning_rate": 1.579653495016945e-05, "loss": 0.10773911476135253, "step": 1705 }, { "epoch": 0.016094117647058825, "grad_norm": 1.9937964354652087, "learning_rate": 1.577341016109091e-05, "loss": 0.1156536340713501, "step": 1710 }, { "epoch": 0.016141176470588235, "grad_norm": 1.5314328449763794, "learning_rate": 1.5750386633768255e-05, "loss": 0.1163593053817749, "step": 1715 }, { "epoch": 0.016188235294117646, "grad_norm": 1.9011374755378498, "learning_rate": 1.5727463631322702e-05, "loss": 0.11986325979232788, "step": 1720 }, { "epoch": 0.01623529411764706, "grad_norm": 1.220327392539162, "learning_rate": 1.570464042436084e-05, "loss": 0.10604977607727051, "step": 1725 }, { "epoch": 0.01628235294117647, "grad_norm": 1.5573311965309973, "learning_rate": 1.568191629087713e-05, "loss": 0.10215256214141846, "step": 1730 }, { "epoch": 0.016329411764705884, "grad_norm": 1.5222225916065313, "learning_rate": 1.565929051615797e-05, "loss": 0.10166820287704467, "step": 1735 }, { "epoch": 0.016376470588235294, "grad_norm": 1.5103576618087164, "learning_rate": 1.563676239268729e-05, "loss": 0.12328221797943115, "step": 1740 }, { "epoch": 0.016423529411764705, "grad_norm": 2.3116978470733445, "learning_rate": 1.5614331220053626e-05, "loss": 0.11830967664718628, "step": 1745 }, { "epoch": 0.01647058823529412, "grad_norm": 2.087793987937307, "learning_rate": 1.5591996304858662e-05, "loss": 0.10782763957977295, "step": 1750 }, { "epoch": 0.01651764705882353, "grad_norm": 1.7586148807924489, "learning_rate": 1.556975696062719e-05, "loss": 0.1208655834197998, "step": 1755 }, { "epoch": 0.01656470588235294, "grad_norm": 1.7021309174932813, "learning_rate": 1.5547612507718507e-05, "loss": 0.13368998765945433, "step": 1760 }, { "epoch": 0.016611764705882354, "grad_norm": 2.0399268574153755, "learning_rate": 1.552556227323915e-05, "loss": 0.1111254096031189, "step": 1765 }, { "epoch": 0.016658823529411764, "grad_norm": 2.1101469041079786, "learning_rate": 1.5503605590957054e-05, "loss": 0.09702225923538207, "step": 1770 }, { "epoch": 0.016705882352941178, "grad_norm": 2.448236548524075, "learning_rate": 1.548174180121698e-05, "loss": 0.10086886882781983, "step": 1775 }, { "epoch": 0.01675294117647059, "grad_norm": 1.6573308726519795, "learning_rate": 1.5459970250857273e-05, "loss": 0.10985307693481446, "step": 1780 }, { "epoch": 0.0168, "grad_norm": 1.8054443975553562, "learning_rate": 1.5438290293127925e-05, "loss": 0.12402856349945068, "step": 1785 }, { "epoch": 0.016847058823529413, "grad_norm": 1.5117509487354195, "learning_rate": 1.541670128760987e-05, "loss": 0.10078647136688232, "step": 1790 }, { "epoch": 0.016894117647058823, "grad_norm": 2.747448689704069, "learning_rate": 1.5395202600135525e-05, "loss": 0.1270325779914856, "step": 1795 }, { "epoch": 0.016941176470588234, "grad_norm": 1.6725237633927148, "learning_rate": 1.537379360271055e-05, "loss": 0.0996906042098999, "step": 1800 }, { "epoch": 0.016988235294117648, "grad_norm": 1.4855857120285567, "learning_rate": 1.5352473673436793e-05, "loss": 0.10285897254943847, "step": 1805 }, { "epoch": 0.017035294117647058, "grad_norm": 1.71936294572677, "learning_rate": 1.5331242196436442e-05, "loss": 0.0967603087425232, "step": 1810 }, { "epoch": 0.017082352941176472, "grad_norm": 1.4639670571914136, "learning_rate": 1.5310098561777257e-05, "loss": 0.11830694675445556, "step": 1815 }, { "epoch": 0.017129411764705883, "grad_norm": 1.310485610471321, "learning_rate": 1.528904216539902e-05, "loss": 0.10701130628585816, "step": 1820 }, { "epoch": 0.017176470588235293, "grad_norm": 1.452423616551041, "learning_rate": 1.526807240904101e-05, "loss": 0.11958463191986084, "step": 1825 }, { "epoch": 0.017223529411764707, "grad_norm": 2.725730352275778, "learning_rate": 1.524718870017064e-05, "loss": 0.11022477149963379, "step": 1830 }, { "epoch": 0.017270588235294117, "grad_norm": 1.675215520167778, "learning_rate": 1.522639045191313e-05, "loss": 0.1098146915435791, "step": 1835 }, { "epoch": 0.017317647058823528, "grad_norm": 1.5013620273404553, "learning_rate": 1.5205677082982211e-05, "loss": 0.0977870523929596, "step": 1840 }, { "epoch": 0.017364705882352942, "grad_norm": 1.5007782461000527, "learning_rate": 1.5185048017611932e-05, "loss": 0.11320031881332397, "step": 1845 }, { "epoch": 0.017411764705882352, "grad_norm": 1.5933122950935399, "learning_rate": 1.5164502685489401e-05, "loss": 0.11203532218933106, "step": 1850 }, { "epoch": 0.017458823529411766, "grad_norm": 1.7182706436107131, "learning_rate": 1.5144040521688585e-05, "loss": 0.10847320556640624, "step": 1855 }, { "epoch": 0.017505882352941177, "grad_norm": 1.6980803061947654, "learning_rate": 1.5123660966605056e-05, "loss": 0.10764615535736084, "step": 1860 }, { "epoch": 0.017552941176470587, "grad_norm": 1.413963387135439, "learning_rate": 1.5103363465891718e-05, "loss": 0.09594017267227173, "step": 1865 }, { "epoch": 0.0176, "grad_norm": 1.780437452472896, "learning_rate": 1.5083147470395468e-05, "loss": 0.10961310863494873, "step": 1870 }, { "epoch": 0.01764705882352941, "grad_norm": 1.2127878366023792, "learning_rate": 1.5063012436094802e-05, "loss": 0.0980715274810791, "step": 1875 }, { "epoch": 0.017694117647058822, "grad_norm": 1.2537961283752606, "learning_rate": 1.5042957824038317e-05, "loss": 0.09150509834289551, "step": 1880 }, { "epoch": 0.017741176470588236, "grad_norm": 3.166079956041817, "learning_rate": 1.502298310028413e-05, "loss": 0.09939475059509277, "step": 1885 }, { "epoch": 0.017788235294117646, "grad_norm": 1.6922806765723197, "learning_rate": 1.5003087735840149e-05, "loss": 0.09923087954521179, "step": 1890 }, { "epoch": 0.01783529411764706, "grad_norm": 1.8884723757919915, "learning_rate": 1.4983271206605266e-05, "loss": 0.11087967157363891, "step": 1895 }, { "epoch": 0.01788235294117647, "grad_norm": 1.4473265756013263, "learning_rate": 1.4963532993311333e-05, "loss": 0.10596222877502441, "step": 1900 }, { "epoch": 0.01792941176470588, "grad_norm": 1.441708470604347, "learning_rate": 1.4943872581466039e-05, "loss": 0.10168507099151611, "step": 1905 }, { "epoch": 0.017976470588235295, "grad_norm": 1.5381232072138864, "learning_rate": 1.4924289461296571e-05, "loss": 0.09054867029190064, "step": 1910 }, { "epoch": 0.018023529411764706, "grad_norm": 1.5404994631746574, "learning_rate": 1.4904783127694096e-05, "loss": 0.09459162950515747, "step": 1915 }, { "epoch": 0.018070588235294116, "grad_norm": 1.2494878583996083, "learning_rate": 1.488535308015905e-05, "loss": 0.10469415187835693, "step": 1920 }, { "epoch": 0.01811764705882353, "grad_norm": 2.107417383226892, "learning_rate": 1.4865998822747199e-05, "loss": 0.11848934888839721, "step": 1925 }, { "epoch": 0.01816470588235294, "grad_norm": 1.392701408944615, "learning_rate": 1.484671986401645e-05, "loss": 0.10576884746551514, "step": 1930 }, { "epoch": 0.018211764705882354, "grad_norm": 1.7284448120081, "learning_rate": 1.4827515716974478e-05, "loss": 0.11187279224395752, "step": 1935 }, { "epoch": 0.018258823529411765, "grad_norm": 1.5136388801969745, "learning_rate": 1.4808385899027024e-05, "loss": 0.11555705070495606, "step": 1940 }, { "epoch": 0.018305882352941175, "grad_norm": 1.4740623996187008, "learning_rate": 1.478932993192697e-05, "loss": 0.10553826093673706, "step": 1945 }, { "epoch": 0.01835294117647059, "grad_norm": 2.1159675961113873, "learning_rate": 1.477034734172413e-05, "loss": 0.0969990611076355, "step": 1950 }, { "epoch": 0.0184, "grad_norm": 1.1107725195443812, "learning_rate": 1.4751437658715726e-05, "loss": 0.09624652862548828, "step": 1955 }, { "epoch": 0.01844705882352941, "grad_norm": 1.1574102204504197, "learning_rate": 1.4732600417397584e-05, "loss": 0.09954138994216918, "step": 1960 }, { "epoch": 0.018494117647058824, "grad_norm": 1.5523619027516018, "learning_rate": 1.4713835156415995e-05, "loss": 0.09637891054153443, "step": 1965 }, { "epoch": 0.018541176470588235, "grad_norm": 1.8325319980068089, "learning_rate": 1.4695141418520267e-05, "loss": 0.09532389640808106, "step": 1970 }, { "epoch": 0.01858823529411765, "grad_norm": 2.3531460901962324, "learning_rate": 1.4676518750515913e-05, "loss": 0.1006763219833374, "step": 1975 }, { "epoch": 0.01863529411764706, "grad_norm": 1.7483076734685778, "learning_rate": 1.4657966703218524e-05, "loss": 0.0921079695224762, "step": 1980 }, { "epoch": 0.01868235294117647, "grad_norm": 1.375271501627427, "learning_rate": 1.4639484831408253e-05, "loss": 0.10392544269561768, "step": 1985 }, { "epoch": 0.018729411764705883, "grad_norm": 1.715753071047132, "learning_rate": 1.4621072693784926e-05, "loss": 0.0970266580581665, "step": 1990 }, { "epoch": 0.018776470588235294, "grad_norm": 1.4086551863902512, "learning_rate": 1.4602729852923818e-05, "loss": 0.08654673099517822, "step": 1995 }, { "epoch": 0.018823529411764704, "grad_norm": 1.456180511482994, "learning_rate": 1.458445587523197e-05, "loss": 0.09786170125007629, "step": 2000 }, { "epoch": 0.018870588235294118, "grad_norm": 1.533866841992261, "learning_rate": 1.4566250330905165e-05, "loss": 0.09890753030776978, "step": 2005 }, { "epoch": 0.01891764705882353, "grad_norm": 1.3542476932484322, "learning_rate": 1.4548112793885458e-05, "loss": 0.10385078191757202, "step": 2010 }, { "epoch": 0.018964705882352943, "grad_norm": 1.4608063135107792, "learning_rate": 1.45300428418193e-05, "loss": 0.09795002937316895, "step": 2015 }, { "epoch": 0.019011764705882353, "grad_norm": 1.175551810838094, "learning_rate": 1.451204005601623e-05, "loss": 0.0897471010684967, "step": 2020 }, { "epoch": 0.019058823529411763, "grad_norm": 1.541815713897409, "learning_rate": 1.4494104021408126e-05, "loss": 0.11195945739746094, "step": 2025 }, { "epoch": 0.019105882352941177, "grad_norm": 1.453462734575097, "learning_rate": 1.4476234326509019e-05, "loss": 0.10576132535934449, "step": 2030 }, { "epoch": 0.019152941176470588, "grad_norm": 1.4291783413288521, "learning_rate": 1.4458430563375436e-05, "loss": 0.09322161078453065, "step": 2035 }, { "epoch": 0.0192, "grad_norm": 1.4991901594045483, "learning_rate": 1.4440692327567282e-05, "loss": 0.09345648288726807, "step": 2040 }, { "epoch": 0.019247058823529412, "grad_norm": 1.446157678332772, "learning_rate": 1.4423019218109264e-05, "loss": 0.09884282946586609, "step": 2045 }, { "epoch": 0.019294117647058823, "grad_norm": 2.4615428384034823, "learning_rate": 1.440541083745281e-05, "loss": 0.09968430995941162, "step": 2050 }, { "epoch": 0.019341176470588237, "grad_norm": 1.3238370575530536, "learning_rate": 1.4387866791438514e-05, "loss": 0.10739036798477172, "step": 2055 }, { "epoch": 0.019388235294117647, "grad_norm": 2.28068339056829, "learning_rate": 1.4370386689259075e-05, "loss": 0.09890904426574706, "step": 2060 }, { "epoch": 0.019435294117647058, "grad_norm": 1.400243473226569, "learning_rate": 1.4352970143422738e-05, "loss": 0.10693192481994629, "step": 2065 }, { "epoch": 0.01948235294117647, "grad_norm": 2.1622315383297717, "learning_rate": 1.4335616769717209e-05, "loss": 0.09063813090324402, "step": 2070 }, { "epoch": 0.019529411764705882, "grad_norm": 1.1401637606364556, "learning_rate": 1.4318326187174059e-05, "loss": 0.08476819992065429, "step": 2075 }, { "epoch": 0.019576470588235292, "grad_norm": 1.8310864489244327, "learning_rate": 1.43010980180336e-05, "loss": 0.10051870346069336, "step": 2080 }, { "epoch": 0.019623529411764706, "grad_norm": 1.462602897258667, "learning_rate": 1.4283931887710216e-05, "loss": 0.08880829811096191, "step": 2085 }, { "epoch": 0.019670588235294117, "grad_norm": 2.006178090982239, "learning_rate": 1.4266827424758159e-05, "loss": 0.10796618461608887, "step": 2090 }, { "epoch": 0.01971764705882353, "grad_norm": 1.3004213904929125, "learning_rate": 1.4249784260837784e-05, "loss": 0.10158025026321411, "step": 2095 }, { "epoch": 0.01976470588235294, "grad_norm": 1.5699545506862183, "learning_rate": 1.4232802030682246e-05, "loss": 0.0988214373588562, "step": 2100 }, { "epoch": 0.01981176470588235, "grad_norm": 1.5053425338862079, "learning_rate": 1.4215880372064608e-05, "loss": 0.08671671152114868, "step": 2105 }, { "epoch": 0.019858823529411766, "grad_norm": 1.3028594722651228, "learning_rate": 1.4199018925765392e-05, "loss": 0.08592954874038697, "step": 2110 }, { "epoch": 0.019905882352941176, "grad_norm": 1.4613392089049555, "learning_rate": 1.4182217335540558e-05, "loss": 0.09366158246994019, "step": 2115 }, { "epoch": 0.01995294117647059, "grad_norm": 1.5833897049517347, "learning_rate": 1.416547524808989e-05, "loss": 0.0885269045829773, "step": 2120 }, { "epoch": 0.02, "grad_norm": 1.7005031635989734, "learning_rate": 1.4148792313025785e-05, "loss": 0.11230356693267822, "step": 2125 }, { "epoch": 0.02004705882352941, "grad_norm": 1.8681559457296555, "learning_rate": 1.4132168182842459e-05, "loss": 0.08918297290802002, "step": 2130 }, { "epoch": 0.020094117647058825, "grad_norm": 1.463565219271874, "learning_rate": 1.4115602512885544e-05, "loss": 0.09028968811035157, "step": 2135 }, { "epoch": 0.020141176470588235, "grad_norm": 1.6121236268714594, "learning_rate": 1.4099094961322084e-05, "loss": 0.0975719690322876, "step": 2140 }, { "epoch": 0.020188235294117646, "grad_norm": 2.247649528645784, "learning_rate": 1.4082645189110891e-05, "loss": 0.09588139057159424, "step": 2145 }, { "epoch": 0.02023529411764706, "grad_norm": 1.66241654180244, "learning_rate": 1.406625285997331e-05, "loss": 0.10103714466094971, "step": 2150 }, { "epoch": 0.02028235294117647, "grad_norm": 1.360600365609006, "learning_rate": 1.404991764036435e-05, "loss": 0.08837596774101257, "step": 2155 }, { "epoch": 0.020329411764705884, "grad_norm": 1.187624508203464, "learning_rate": 1.403363919944417e-05, "loss": 0.09189974069595337, "step": 2160 }, { "epoch": 0.020376470588235295, "grad_norm": 2.0696920273204396, "learning_rate": 1.4017417209049938e-05, "loss": 0.09820308685302734, "step": 2165 }, { "epoch": 0.020423529411764705, "grad_norm": 1.6556876442188369, "learning_rate": 1.4001251343668043e-05, "loss": 0.10059250593185425, "step": 2170 }, { "epoch": 0.02047058823529412, "grad_norm": 1.5003819283137036, "learning_rate": 1.3985141280406669e-05, "loss": 0.0915514051914215, "step": 2175 }, { "epoch": 0.02051764705882353, "grad_norm": 1.8216010966319842, "learning_rate": 1.3969086698968701e-05, "loss": 0.10145647525787353, "step": 2180 }, { "epoch": 0.02056470588235294, "grad_norm": 1.2380926420995868, "learning_rate": 1.3953087281624977e-05, "loss": 0.09978727102279664, "step": 2185 }, { "epoch": 0.020611764705882354, "grad_norm": 1.3944653357661014, "learning_rate": 1.3937142713187885e-05, "loss": 0.086364483833313, "step": 2190 }, { "epoch": 0.020658823529411764, "grad_norm": 1.705057226744088, "learning_rate": 1.392125268098527e-05, "loss": 0.0846337378025055, "step": 2195 }, { "epoch": 0.020705882352941178, "grad_norm": 1.3282601686568167, "learning_rate": 1.3905416874834706e-05, "loss": 0.10293201208114625, "step": 2200 }, { "epoch": 0.02075294117647059, "grad_norm": 1.4789556287764352, "learning_rate": 1.3889634987018028e-05, "loss": 0.10048737525939941, "step": 2205 }, { "epoch": 0.0208, "grad_norm": 1.283288553578449, "learning_rate": 1.3873906712256262e-05, "loss": 0.10100864171981812, "step": 2210 }, { "epoch": 0.020847058823529413, "grad_norm": 1.3275760987194158, "learning_rate": 1.3858231747684784e-05, "loss": 0.09610369801521301, "step": 2215 }, { "epoch": 0.020894117647058823, "grad_norm": 1.3635519754024767, "learning_rate": 1.3842609792828842e-05, "loss": 0.08768154382705688, "step": 2220 }, { "epoch": 0.020941176470588234, "grad_norm": 1.4810534985798054, "learning_rate": 1.3827040549579372e-05, "loss": 0.09866788387298583, "step": 2225 }, { "epoch": 0.020988235294117648, "grad_norm": 1.6663367231848658, "learning_rate": 1.3811523722169089e-05, "loss": 0.08782994747161865, "step": 2230 }, { "epoch": 0.02103529411764706, "grad_norm": 1.3382525353983519, "learning_rate": 1.3796059017148895e-05, "loss": 0.0939136028289795, "step": 2235 }, { "epoch": 0.021082352941176472, "grad_norm": 1.3746755058562137, "learning_rate": 1.3780646143364574e-05, "loss": 0.08640863299369812, "step": 2240 }, { "epoch": 0.021129411764705883, "grad_norm": 1.2217398320209467, "learning_rate": 1.3765284811933758e-05, "loss": 0.08349363207817077, "step": 2245 }, { "epoch": 0.021176470588235293, "grad_norm": 1.5146566340405276, "learning_rate": 1.3749974736223205e-05, "loss": 0.09813992977142334, "step": 2250 }, { "epoch": 0.021223529411764707, "grad_norm": 1.2597077821945364, "learning_rate": 1.3734715631826309e-05, "loss": 0.09448193311691284, "step": 2255 }, { "epoch": 0.021270588235294118, "grad_norm": 1.6737072402936108, "learning_rate": 1.371950721654094e-05, "loss": 0.08484482169151306, "step": 2260 }, { "epoch": 0.021317647058823528, "grad_norm": 1.337618596245297, "learning_rate": 1.3704349210347493e-05, "loss": 0.09367218017578124, "step": 2265 }, { "epoch": 0.021364705882352942, "grad_norm": 1.3228292097404508, "learning_rate": 1.368924133538726e-05, "loss": 0.08691080808639526, "step": 2270 }, { "epoch": 0.021411764705882352, "grad_norm": 1.3534030671283344, "learning_rate": 1.3674183315941015e-05, "loss": 0.09313431978225709, "step": 2275 }, { "epoch": 0.021458823529411766, "grad_norm": 1.4712235843416055, "learning_rate": 1.3659174878407896e-05, "loss": 0.0948680579662323, "step": 2280 }, { "epoch": 0.021505882352941177, "grad_norm": 1.5853501368365006, "learning_rate": 1.3644215751284526e-05, "loss": 0.08681892156600952, "step": 2285 }, { "epoch": 0.021552941176470587, "grad_norm": 1.2767052668982941, "learning_rate": 1.3629305665144364e-05, "loss": 0.08572573661804199, "step": 2290 }, { "epoch": 0.0216, "grad_norm": 1.781713694544902, "learning_rate": 1.361444435261735e-05, "loss": 0.09897452592849731, "step": 2295 }, { "epoch": 0.02164705882352941, "grad_norm": 2.613771761310701, "learning_rate": 1.3599631548369748e-05, "loss": 0.0869605302810669, "step": 2300 }, { "epoch": 0.021694117647058822, "grad_norm": 1.2596358998650776, "learning_rate": 1.3584866989084255e-05, "loss": 0.08842167854309083, "step": 2305 }, { "epoch": 0.021741176470588236, "grad_norm": 1.1964237000009217, "learning_rate": 1.3570150413440355e-05, "loss": 0.08912470340728759, "step": 2310 }, { "epoch": 0.021788235294117646, "grad_norm": 1.4595213571795942, "learning_rate": 1.3555481562094868e-05, "loss": 0.08793283104896546, "step": 2315 }, { "epoch": 0.02183529411764706, "grad_norm": 2.132751376116855, "learning_rate": 1.3540860177662769e-05, "loss": 0.09814255237579346, "step": 2320 }, { "epoch": 0.02188235294117647, "grad_norm": 1.0724874610218815, "learning_rate": 1.3526286004698224e-05, "loss": 0.08056010007858276, "step": 2325 }, { "epoch": 0.02192941176470588, "grad_norm": 2.142544655513549, "learning_rate": 1.3511758789675833e-05, "loss": 0.10274477005004883, "step": 2330 }, { "epoch": 0.021976470588235295, "grad_norm": 1.2818575815959505, "learning_rate": 1.3497278280972121e-05, "loss": 0.09416711330413818, "step": 2335 }, { "epoch": 0.022023529411764706, "grad_norm": 2.0104387361875036, "learning_rate": 1.3482844228847219e-05, "loss": 0.08530280590057374, "step": 2340 }, { "epoch": 0.022070588235294116, "grad_norm": 1.4017660253824586, "learning_rate": 1.3468456385426779e-05, "loss": 0.09277162551879883, "step": 2345 }, { "epoch": 0.02211764705882353, "grad_norm": 1.3874405276312234, "learning_rate": 1.34541145046841e-05, "loss": 0.087246572971344, "step": 2350 }, { "epoch": 0.02216470588235294, "grad_norm": 1.0359560574304711, "learning_rate": 1.3439818342422461e-05, "loss": 0.08166497945785522, "step": 2355 }, { "epoch": 0.022211764705882354, "grad_norm": 1.647154113715186, "learning_rate": 1.3425567656257636e-05, "loss": 0.09507103562355042, "step": 2360 }, { "epoch": 0.022258823529411765, "grad_norm": 1.233605764457866, "learning_rate": 1.3411362205600664e-05, "loss": 0.08443020582199097, "step": 2365 }, { "epoch": 0.022305882352941175, "grad_norm": 1.7466388140163027, "learning_rate": 1.3397201751640763e-05, "loss": 0.0907909631729126, "step": 2370 }, { "epoch": 0.02235294117647059, "grad_norm": 1.3560663198008631, "learning_rate": 1.3383086057328487e-05, "loss": 0.09649119973182678, "step": 2375 }, { "epoch": 0.0224, "grad_norm": 1.2092671342111092, "learning_rate": 1.3369014887359051e-05, "loss": 0.08915149569511413, "step": 2380 }, { "epoch": 0.02244705882352941, "grad_norm": 0.9465130853990805, "learning_rate": 1.3354988008155852e-05, "loss": 0.07784945964813232, "step": 2385 }, { "epoch": 0.022494117647058824, "grad_norm": 1.6364753490113844, "learning_rate": 1.3341005187854193e-05, "loss": 0.08847392797470092, "step": 2390 }, { "epoch": 0.022541176470588235, "grad_norm": 1.34098090454625, "learning_rate": 1.332706619628518e-05, "loss": 0.09739335179328919, "step": 2395 }, { "epoch": 0.02258823529411765, "grad_norm": 1.2472318520531842, "learning_rate": 1.3313170804959809e-05, "loss": 0.07907204627990723, "step": 2400 }, { "epoch": 0.02263529411764706, "grad_norm": 1.4464796306686523, "learning_rate": 1.3299318787053239e-05, "loss": 0.08958604335784912, "step": 2405 }, { "epoch": 0.02268235294117647, "grad_norm": 1.2383379434267356, "learning_rate": 1.3285509917389237e-05, "loss": 0.0877078652381897, "step": 2410 }, { "epoch": 0.022729411764705883, "grad_norm": 1.345210328780468, "learning_rate": 1.3271743972424802e-05, "loss": 0.09634618163108825, "step": 2415 }, { "epoch": 0.022776470588235294, "grad_norm": 1.6302812727607612, "learning_rate": 1.325802073023497e-05, "loss": 0.0875393033027649, "step": 2420 }, { "epoch": 0.022823529411764704, "grad_norm": 1.71407204847763, "learning_rate": 1.324433997049779e-05, "loss": 0.0979651927947998, "step": 2425 }, { "epoch": 0.022870588235294118, "grad_norm": 0.9948013650805152, "learning_rate": 1.3230701474479459e-05, "loss": 0.08357690572738648, "step": 2430 }, { "epoch": 0.02291764705882353, "grad_norm": 1.2835830866619553, "learning_rate": 1.3217105025019638e-05, "loss": 0.08972322940826416, "step": 2435 }, { "epoch": 0.022964705882352943, "grad_norm": 1.1878165050144844, "learning_rate": 1.3203550406516929e-05, "loss": 0.08995974063873291, "step": 2440 }, { "epoch": 0.023011764705882353, "grad_norm": 1.1630749063059869, "learning_rate": 1.3190037404914524e-05, "loss": 0.0831382155418396, "step": 2445 }, { "epoch": 0.023058823529411764, "grad_norm": 1.188714204747308, "learning_rate": 1.3176565807685981e-05, "loss": 0.09085511565208435, "step": 2450 }, { "epoch": 0.023105882352941177, "grad_norm": 1.407144537383892, "learning_rate": 1.3163135403821223e-05, "loss": 0.09957512617111205, "step": 2455 }, { "epoch": 0.023152941176470588, "grad_norm": 1.2550637504553932, "learning_rate": 1.3149745983812626e-05, "loss": 0.08910548686981201, "step": 2460 }, { "epoch": 0.0232, "grad_norm": 1.185970845978084, "learning_rate": 1.3136397339641303e-05, "loss": 0.0822945237159729, "step": 2465 }, { "epoch": 0.023247058823529412, "grad_norm": 1.218737567187783, "learning_rate": 1.312308926476353e-05, "loss": 0.07504807710647583, "step": 2470 }, { "epoch": 0.023294117647058823, "grad_norm": 1.7377715720194793, "learning_rate": 1.3109821554097328e-05, "loss": 0.0956015944480896, "step": 2475 }, { "epoch": 0.023341176470588237, "grad_norm": 1.284365640557527, "learning_rate": 1.3096594004009178e-05, "loss": 0.09927380681037903, "step": 2480 }, { "epoch": 0.023388235294117647, "grad_norm": 1.1181993337297076, "learning_rate": 1.3083406412300906e-05, "loss": 0.07517058849334717, "step": 2485 }, { "epoch": 0.023435294117647058, "grad_norm": 1.619388418069284, "learning_rate": 1.3070258578196686e-05, "loss": 0.08415756225585938, "step": 2490 }, { "epoch": 0.02348235294117647, "grad_norm": 1.1018128200513886, "learning_rate": 1.3057150302330229e-05, "loss": 0.08328561186790466, "step": 2495 }, { "epoch": 0.023529411764705882, "grad_norm": 1.2447655089684722, "learning_rate": 1.3044081386732048e-05, "loss": 0.09166672229766845, "step": 2500 }, { "epoch": 0.023576470588235292, "grad_norm": 1.4618894781365293, "learning_rate": 1.303105163481694e-05, "loss": 0.09295114874839783, "step": 2505 }, { "epoch": 0.023623529411764706, "grad_norm": 2.3825594212557633, "learning_rate": 1.301806085137153e-05, "loss": 0.09946979284286499, "step": 2510 }, { "epoch": 0.023670588235294117, "grad_norm": 1.1894364044003574, "learning_rate": 1.3005108842542011e-05, "loss": 0.08192832469940185, "step": 2515 }, { "epoch": 0.02371764705882353, "grad_norm": 0.9855294410408074, "learning_rate": 1.2992195415821991e-05, "loss": 0.0847775936126709, "step": 2520 }, { "epoch": 0.02376470588235294, "grad_norm": 1.3819471063674886, "learning_rate": 1.2979320380040466e-05, "loss": 0.08548556566238404, "step": 2525 }, { "epoch": 0.02381176470588235, "grad_norm": 1.1778858045518994, "learning_rate": 1.2966483545349944e-05, "loss": 0.07783730030059814, "step": 2530 }, { "epoch": 0.023858823529411766, "grad_norm": 1.0715718957604918, "learning_rate": 1.295368472321467e-05, "loss": 0.08415815830230713, "step": 2535 }, { "epoch": 0.023905882352941176, "grad_norm": 1.2547759037227733, "learning_rate": 1.2940923726399033e-05, "loss": 0.0850517749786377, "step": 2540 }, { "epoch": 0.023952941176470587, "grad_norm": 1.4484071524824305, "learning_rate": 1.2928200368956008e-05, "loss": 0.073746657371521, "step": 2545 }, { "epoch": 0.024, "grad_norm": 1.3630406596256377, "learning_rate": 1.2915514466215827e-05, "loss": 0.09025704264640808, "step": 2550 }, { "epoch": 0.02404705882352941, "grad_norm": 1.1891982347830818, "learning_rate": 1.2902865834774685e-05, "loss": 0.09191115498542786, "step": 2555 }, { "epoch": 0.024094117647058825, "grad_norm": 1.2397762241246129, "learning_rate": 1.2890254292483626e-05, "loss": 0.07589585781097412, "step": 2560 }, { "epoch": 0.024141176470588235, "grad_norm": 1.2059787511638806, "learning_rate": 1.2877679658437519e-05, "loss": 0.07389014959335327, "step": 2565 }, { "epoch": 0.024188235294117646, "grad_norm": 1.2464228932237396, "learning_rate": 1.286514175296415e-05, "loss": 0.08457326889038086, "step": 2570 }, { "epoch": 0.02423529411764706, "grad_norm": 1.409453989135007, "learning_rate": 1.285264039761345e-05, "loss": 0.0860101580619812, "step": 2575 }, { "epoch": 0.02428235294117647, "grad_norm": 1.4156241467092006, "learning_rate": 1.2840175415146827e-05, "loss": 0.07223634719848633, "step": 2580 }, { "epoch": 0.02432941176470588, "grad_norm": 1.5485071245041397, "learning_rate": 1.282774662952661e-05, "loss": 0.0958572804927826, "step": 2585 }, { "epoch": 0.024376470588235295, "grad_norm": 1.7289143926910397, "learning_rate": 1.2815353865905603e-05, "loss": 0.08286961913108826, "step": 2590 }, { "epoch": 0.024423529411764705, "grad_norm": 1.4123810201615006, "learning_rate": 1.2802996950616755e-05, "loss": 0.0794704794883728, "step": 2595 }, { "epoch": 0.02447058823529412, "grad_norm": 1.0865368694955755, "learning_rate": 1.2790675711162955e-05, "loss": 0.07697920799255371, "step": 2600 }, { "epoch": 0.02451764705882353, "grad_norm": 1.1802671566744205, "learning_rate": 1.2778389976206884e-05, "loss": 0.07835951447486877, "step": 2605 }, { "epoch": 0.02456470588235294, "grad_norm": 1.203198591980749, "learning_rate": 1.2766139575561049e-05, "loss": 0.08622424602508545, "step": 2610 }, { "epoch": 0.024611764705882354, "grad_norm": 1.3627680228208794, "learning_rate": 1.2753924340177846e-05, "loss": 0.08286594152450562, "step": 2615 }, { "epoch": 0.024658823529411764, "grad_norm": 1.6017287812781407, "learning_rate": 1.2741744102139784e-05, "loss": 0.08363651037216187, "step": 2620 }, { "epoch": 0.024705882352941175, "grad_norm": 1.072729942026173, "learning_rate": 1.2729598694649776e-05, "loss": 0.08196839094161987, "step": 2625 }, { "epoch": 0.02475294117647059, "grad_norm": 0.921927898185011, "learning_rate": 1.2717487952021564e-05, "loss": 0.06980993747711181, "step": 2630 }, { "epoch": 0.0248, "grad_norm": 1.1606432710639132, "learning_rate": 1.2705411709670206e-05, "loss": 0.08068140149116516, "step": 2635 }, { "epoch": 0.024847058823529413, "grad_norm": 1.5394296436455215, "learning_rate": 1.2693369804102691e-05, "loss": 0.08487731218338013, "step": 2640 }, { "epoch": 0.024894117647058824, "grad_norm": 1.1182404510622725, "learning_rate": 1.2681362072908643e-05, "loss": 0.08350064754486083, "step": 2645 }, { "epoch": 0.024941176470588234, "grad_norm": 1.150702943000424, "learning_rate": 1.266938835475112e-05, "loss": 0.08792102336883545, "step": 2650 }, { "epoch": 0.024988235294117648, "grad_norm": 1.2249749087482382, "learning_rate": 1.2657448489357501e-05, "loss": 0.07470267415046691, "step": 2655 }, { "epoch": 0.02503529411764706, "grad_norm": 1.4046560037020046, "learning_rate": 1.2645542317510501e-05, "loss": 0.07350093126296997, "step": 2660 }, { "epoch": 0.025082352941176472, "grad_norm": 1.3308817969822306, "learning_rate": 1.263366968103922e-05, "loss": 0.07576621174812317, "step": 2665 }, { "epoch": 0.025129411764705883, "grad_norm": 1.2501971450131582, "learning_rate": 1.262183042281034e-05, "loss": 0.07548702359199524, "step": 2670 }, { "epoch": 0.025176470588235293, "grad_norm": 1.311749330250047, "learning_rate": 1.2610024386719379e-05, "loss": 0.09473938941955566, "step": 2675 }, { "epoch": 0.025223529411764707, "grad_norm": 1.7915568920602414, "learning_rate": 1.2598251417682062e-05, "loss": 0.07589675188064575, "step": 2680 }, { "epoch": 0.025270588235294118, "grad_norm": 1.180045464973714, "learning_rate": 1.258651136162574e-05, "loss": 0.08379631042480469, "step": 2685 }, { "epoch": 0.025317647058823528, "grad_norm": 1.398743118036714, "learning_rate": 1.2574804065480947e-05, "loss": 0.08329250812530517, "step": 2690 }, { "epoch": 0.025364705882352942, "grad_norm": 1.2369601766327474, "learning_rate": 1.2563129377173e-05, "loss": 0.08306484818458557, "step": 2695 }, { "epoch": 0.025411764705882352, "grad_norm": 1.381877013389452, "learning_rate": 1.2551487145613708e-05, "loss": 0.07037372589111328, "step": 2700 }, { "epoch": 0.025458823529411766, "grad_norm": 1.4658635642819677, "learning_rate": 1.253987722069316e-05, "loss": 0.09120460748672485, "step": 2705 }, { "epoch": 0.025505882352941177, "grad_norm": 1.3803207114291018, "learning_rate": 1.2528299453271595e-05, "loss": 0.08384729623794555, "step": 2710 }, { "epoch": 0.025552941176470587, "grad_norm": 2.6783106865187123, "learning_rate": 1.251675369517136e-05, "loss": 0.08391890525817872, "step": 2715 }, { "epoch": 0.0256, "grad_norm": 1.24406701015702, "learning_rate": 1.2505239799168933e-05, "loss": 0.09274013042449951, "step": 2720 }, { "epoch": 0.02564705882352941, "grad_norm": 1.185035013309355, "learning_rate": 1.2493757618987054e-05, "loss": 0.07629606723785401, "step": 2725 }, { "epoch": 0.025694117647058822, "grad_norm": 1.0892007300405222, "learning_rate": 1.248230700928691e-05, "loss": 0.0741351842880249, "step": 2730 }, { "epoch": 0.025741176470588236, "grad_norm": 1.3850667389465157, "learning_rate": 1.2470887825660402e-05, "loss": 0.07315894365310668, "step": 2735 }, { "epoch": 0.025788235294117647, "grad_norm": 1.3055855174811735, "learning_rate": 1.2459499924622518e-05, "loss": 0.09088518619537353, "step": 2740 }, { "epoch": 0.02583529411764706, "grad_norm": 1.189405928737777, "learning_rate": 1.2448143163603734e-05, "loss": 0.07803643345832825, "step": 2745 }, { "epoch": 0.02588235294117647, "grad_norm": 1.0887696232611488, "learning_rate": 1.2436817400942536e-05, "loss": 0.07104333639144897, "step": 2750 }, { "epoch": 0.02592941176470588, "grad_norm": 1.4306609132045547, "learning_rate": 1.2425522495877995e-05, "loss": 0.07290538549423217, "step": 2755 }, { "epoch": 0.025976470588235295, "grad_norm": 1.6304812877329082, "learning_rate": 1.241425830854241e-05, "loss": 0.0767890214920044, "step": 2760 }, { "epoch": 0.026023529411764706, "grad_norm": 1.088001800423714, "learning_rate": 1.2403024699954043e-05, "loss": 0.0722109317779541, "step": 2765 }, { "epoch": 0.026070588235294116, "grad_norm": 1.7029028333872875, "learning_rate": 1.2391821532009915e-05, "loss": 0.09149854779243469, "step": 2770 }, { "epoch": 0.02611764705882353, "grad_norm": 1.173481356507694, "learning_rate": 1.2380648667478674e-05, "loss": 0.07501391172409058, "step": 2775 }, { "epoch": 0.02616470588235294, "grad_norm": 1.1822115401502078, "learning_rate": 1.2369505969993533e-05, "loss": 0.06824345588684082, "step": 2780 }, { "epoch": 0.026211764705882355, "grad_norm": 1.00828874965458, "learning_rate": 1.2358393304045281e-05, "loss": 0.06493427157402039, "step": 2785 }, { "epoch": 0.026258823529411765, "grad_norm": 1.1225202234672242, "learning_rate": 1.2347310534975368e-05, "loss": 0.07951203584671021, "step": 2790 }, { "epoch": 0.026305882352941175, "grad_norm": 1.4387504920919656, "learning_rate": 1.2336257528969031e-05, "loss": 0.07067574262619018, "step": 2795 }, { "epoch": 0.02635294117647059, "grad_norm": 1.2552867523178808, "learning_rate": 1.2325234153048538e-05, "loss": 0.07737421989440918, "step": 2800 }, { "epoch": 0.0264, "grad_norm": 1.275944216612932, "learning_rate": 1.2314240275066444e-05, "loss": 0.08001942634582519, "step": 2805 }, { "epoch": 0.02644705882352941, "grad_norm": 1.128918601259321, "learning_rate": 1.2303275763698947e-05, "loss": 0.06815186142921448, "step": 2810 }, { "epoch": 0.026494117647058824, "grad_norm": 0.9655412843456191, "learning_rate": 1.2292340488439306e-05, "loss": 0.07048195600509644, "step": 2815 }, { "epoch": 0.026541176470588235, "grad_norm": 0.8709722172676703, "learning_rate": 1.2281434319591296e-05, "loss": 0.06748769283294678, "step": 2820 }, { "epoch": 0.02658823529411765, "grad_norm": 0.932687088504138, "learning_rate": 1.2270557128262772e-05, "loss": 0.07112994194030761, "step": 2825 }, { "epoch": 0.02663529411764706, "grad_norm": 1.3671756518495393, "learning_rate": 1.225970878635926e-05, "loss": 0.07914814352989197, "step": 2830 }, { "epoch": 0.02668235294117647, "grad_norm": 0.9266104361329686, "learning_rate": 1.2248889166577612e-05, "loss": 0.07548049688339234, "step": 2835 }, { "epoch": 0.026729411764705883, "grad_norm": 1.308095173527331, "learning_rate": 1.2238098142399745e-05, "loss": 0.08126233220100403, "step": 2840 }, { "epoch": 0.026776470588235294, "grad_norm": 1.2511579181977908, "learning_rate": 1.2227335588086434e-05, "loss": 0.0790614902973175, "step": 2845 }, { "epoch": 0.026823529411764704, "grad_norm": 1.1816071324310224, "learning_rate": 1.2216601378671126e-05, "loss": 0.08265239000320435, "step": 2850 }, { "epoch": 0.02687058823529412, "grad_norm": 1.239516777292856, "learning_rate": 1.2205895389953888e-05, "loss": 0.066336989402771, "step": 2855 }, { "epoch": 0.02691764705882353, "grad_norm": 1.2514735827283168, "learning_rate": 1.2195217498495337e-05, "loss": 0.07706860899925232, "step": 2860 }, { "epoch": 0.026964705882352943, "grad_norm": 1.9982550373986465, "learning_rate": 1.2184567581610682e-05, "loss": 0.08475109338760375, "step": 2865 }, { "epoch": 0.027011764705882353, "grad_norm": 2.0041572819535953, "learning_rate": 1.2173945517363796e-05, "loss": 0.0806045651435852, "step": 2870 }, { "epoch": 0.027058823529411764, "grad_norm": 1.239509759792863, "learning_rate": 1.2163351184561354e-05, "loss": 0.07912273406982422, "step": 2875 }, { "epoch": 0.027105882352941178, "grad_norm": 1.13188966328472, "learning_rate": 1.2152784462747023e-05, "loss": 0.06779500246047973, "step": 2880 }, { "epoch": 0.027152941176470588, "grad_norm": 0.90675440978437, "learning_rate": 1.2142245232195712e-05, "loss": 0.0713754415512085, "step": 2885 }, { "epoch": 0.0272, "grad_norm": 0.9266301548579471, "learning_rate": 1.2131733373907876e-05, "loss": 0.07608978748321533, "step": 2890 }, { "epoch": 0.027247058823529412, "grad_norm": 1.2310547815477335, "learning_rate": 1.2121248769603873e-05, "loss": 0.07578789591789245, "step": 2895 }, { "epoch": 0.027294117647058823, "grad_norm": 1.4214640782624561, "learning_rate": 1.2110791301718363e-05, "loss": 0.08251051902770996, "step": 2900 }, { "epoch": 0.027341176470588237, "grad_norm": 1.137775359356791, "learning_rate": 1.2100360853394792e-05, "loss": 0.06824783086776734, "step": 2905 }, { "epoch": 0.027388235294117647, "grad_norm": 1.1324049150085018, "learning_rate": 1.2089957308479889e-05, "loss": 0.07012475728988647, "step": 2910 }, { "epoch": 0.027435294117647058, "grad_norm": 1.1788315890658216, "learning_rate": 1.2079580551518242e-05, "loss": 0.077016282081604, "step": 2915 }, { "epoch": 0.02748235294117647, "grad_norm": 1.1893700976333272, "learning_rate": 1.2069230467746908e-05, "loss": 0.07787474393844604, "step": 2920 }, { "epoch": 0.027529411764705882, "grad_norm": 1.0098869584077061, "learning_rate": 1.2058906943090102e-05, "loss": 0.07054822444915772, "step": 2925 }, { "epoch": 0.027576470588235293, "grad_norm": 1.0201579257380866, "learning_rate": 1.2048609864153895e-05, "loss": 0.08121978044509888, "step": 2930 }, { "epoch": 0.027623529411764706, "grad_norm": 1.154088065201581, "learning_rate": 1.203833911822099e-05, "loss": 0.07738101482391357, "step": 2935 }, { "epoch": 0.027670588235294117, "grad_norm": 1.4236111594444754, "learning_rate": 1.202809459324554e-05, "loss": 0.08637952208518981, "step": 2940 }, { "epoch": 0.02771764705882353, "grad_norm": 1.2582514166492087, "learning_rate": 1.2017876177848023e-05, "loss": 0.06616110801696777, "step": 2945 }, { "epoch": 0.02776470588235294, "grad_norm": 1.8154856547211786, "learning_rate": 1.2007683761310134e-05, "loss": 0.07693740129470825, "step": 2950 }, { "epoch": 0.027811764705882352, "grad_norm": 1.2517619187950806, "learning_rate": 1.199751723356977e-05, "loss": 0.07525935173034667, "step": 2955 }, { "epoch": 0.027858823529411766, "grad_norm": 1.2374111030199888, "learning_rate": 1.1987376485216033e-05, "loss": 0.07180552482604981, "step": 2960 }, { "epoch": 0.027905882352941176, "grad_norm": 1.0911140272905766, "learning_rate": 1.1977261407484264e-05, "loss": 0.07961370944976806, "step": 2965 }, { "epoch": 0.027952941176470587, "grad_norm": 1.1578961645858232, "learning_rate": 1.1967171892251171e-05, "loss": 0.07763807773590088, "step": 2970 }, { "epoch": 0.028, "grad_norm": 1.224554264507807, "learning_rate": 1.1957107832029961e-05, "loss": 0.06325386762619019, "step": 2975 }, { "epoch": 0.02804705882352941, "grad_norm": 1.240593350659932, "learning_rate": 1.194706911996553e-05, "loss": 0.08180184960365296, "step": 2980 }, { "epoch": 0.028094117647058825, "grad_norm": 1.342566340413089, "learning_rate": 1.193705564982971e-05, "loss": 0.08485949039459229, "step": 2985 }, { "epoch": 0.028141176470588235, "grad_norm": 0.9126396787071287, "learning_rate": 1.1927067316016523e-05, "loss": 0.08116430044174194, "step": 2990 }, { "epoch": 0.028188235294117646, "grad_norm": 0.8745023632900983, "learning_rate": 1.191710401353753e-05, "loss": 0.07684516906738281, "step": 2995 }, { "epoch": 0.02823529411764706, "grad_norm": 1.0910016752083023, "learning_rate": 1.1907165638017179e-05, "loss": 0.07272008657455445, "step": 3000 }, { "epoch": 0.02828235294117647, "grad_norm": 1.3628114484991478, "learning_rate": 1.1897252085688224e-05, "loss": 0.08042248487472534, "step": 3005 }, { "epoch": 0.02832941176470588, "grad_norm": 1.022740884148708, "learning_rate": 1.1887363253387163e-05, "loss": 0.08209394216537476, "step": 3010 }, { "epoch": 0.028376470588235295, "grad_norm": 1.2466325084687484, "learning_rate": 1.1877499038549733e-05, "loss": 0.07411707639694214, "step": 3015 }, { "epoch": 0.028423529411764705, "grad_norm": 1.1296481982591753, "learning_rate": 1.1867659339206452e-05, "loss": 0.0685991883277893, "step": 3020 }, { "epoch": 0.02847058823529412, "grad_norm": 1.5423280482949226, "learning_rate": 1.1857844053978178e-05, "loss": 0.0872009813785553, "step": 3025 }, { "epoch": 0.02851764705882353, "grad_norm": 1.196726379620823, "learning_rate": 1.1848053082071727e-05, "loss": 0.0787052869796753, "step": 3030 }, { "epoch": 0.02856470588235294, "grad_norm": 1.6917766657541062, "learning_rate": 1.1838286323275525e-05, "loss": 0.08077715039253235, "step": 3035 }, { "epoch": 0.028611764705882354, "grad_norm": 1.3450151052311328, "learning_rate": 1.182854367795531e-05, "loss": 0.07109349370002746, "step": 3040 }, { "epoch": 0.028658823529411764, "grad_norm": 1.1730929106523575, "learning_rate": 1.181882504704984e-05, "loss": 0.06640090942382812, "step": 3045 }, { "epoch": 0.028705882352941175, "grad_norm": 1.0310782496999649, "learning_rate": 1.1809130332066696e-05, "loss": 0.07439139485359192, "step": 3050 }, { "epoch": 0.02875294117647059, "grad_norm": 0.9289159068154752, "learning_rate": 1.179945943507805e-05, "loss": 0.06049703359603882, "step": 3055 }, { "epoch": 0.0288, "grad_norm": 1.1063055315009147, "learning_rate": 1.1789812258716547e-05, "loss": 0.06798433065414429, "step": 3060 }, { "epoch": 0.028847058823529413, "grad_norm": 1.3456191616421618, "learning_rate": 1.1780188706171163e-05, "loss": 0.0671571969985962, "step": 3065 }, { "epoch": 0.028894117647058824, "grad_norm": 0.9776462974201193, "learning_rate": 1.177058868118314e-05, "loss": 0.07431777715682983, "step": 3070 }, { "epoch": 0.028941176470588234, "grad_norm": 1.0411559097921153, "learning_rate": 1.1761012088041927e-05, "loss": 0.07345380783081054, "step": 3075 }, { "epoch": 0.028988235294117648, "grad_norm": 1.3213661150640954, "learning_rate": 1.175145883158119e-05, "loss": 0.09320554733276368, "step": 3080 }, { "epoch": 0.02903529411764706, "grad_norm": 1.1217789706817842, "learning_rate": 1.1741928817174824e-05, "loss": 0.06608688831329346, "step": 3085 }, { "epoch": 0.02908235294117647, "grad_norm": 0.9936661795455102, "learning_rate": 1.1732421950733025e-05, "loss": 0.07361909747123718, "step": 3090 }, { "epoch": 0.029129411764705883, "grad_norm": 1.1486729997729184, "learning_rate": 1.1722938138698387e-05, "loss": 0.07920184135437011, "step": 3095 }, { "epoch": 0.029176470588235293, "grad_norm": 1.0380717387008678, "learning_rate": 1.1713477288042037e-05, "loss": 0.08040435314178467, "step": 3100 }, { "epoch": 0.029223529411764707, "grad_norm": 1.166038813509096, "learning_rate": 1.1704039306259794e-05, "loss": 0.06386116743087769, "step": 3105 }, { "epoch": 0.029270588235294118, "grad_norm": 1.273726824735226, "learning_rate": 1.1694624101368382e-05, "loss": 0.0799661099910736, "step": 3110 }, { "epoch": 0.029317647058823528, "grad_norm": 0.8877458002566008, "learning_rate": 1.1685231581901654e-05, "loss": 0.06740361452102661, "step": 3115 }, { "epoch": 0.029364705882352942, "grad_norm": 1.1464352274574645, "learning_rate": 1.167586165690687e-05, "loss": 0.06454131603240967, "step": 3120 }, { "epoch": 0.029411764705882353, "grad_norm": 0.9444065057467329, "learning_rate": 1.1666514235940983e-05, "loss": 0.06828256845474243, "step": 3125 }, { "epoch": 0.029458823529411763, "grad_norm": 1.5323189856074368, "learning_rate": 1.1657189229066992e-05, "loss": 0.07056750655174256, "step": 3130 }, { "epoch": 0.029505882352941177, "grad_norm": 1.048631505797484, "learning_rate": 1.1647886546850296e-05, "loss": 0.06981065273284912, "step": 3135 }, { "epoch": 0.029552941176470587, "grad_norm": 1.1591905178390756, "learning_rate": 1.163860610035509e-05, "loss": 0.07393852472305298, "step": 3140 }, { "epoch": 0.0296, "grad_norm": 1.0294381273204174, "learning_rate": 1.1629347801140796e-05, "loss": 0.07699021100997924, "step": 3145 }, { "epoch": 0.029647058823529412, "grad_norm": 0.8523561167131661, "learning_rate": 1.1620111561258524e-05, "loss": 0.07672640085220336, "step": 3150 }, { "epoch": 0.029694117647058822, "grad_norm": 1.348807027424343, "learning_rate": 1.161089729324757e-05, "loss": 0.07188184261322021, "step": 3155 }, { "epoch": 0.029741176470588236, "grad_norm": 1.2933990971010965, "learning_rate": 1.1601704910131917e-05, "loss": 0.077167809009552, "step": 3160 }, { "epoch": 0.029788235294117647, "grad_norm": 0.9909646835272478, "learning_rate": 1.1592534325416818e-05, "loss": 0.06132155656814575, "step": 3165 }, { "epoch": 0.02983529411764706, "grad_norm": 1.3162299063371439, "learning_rate": 1.1583385453085342e-05, "loss": 0.07221863865852356, "step": 3170 }, { "epoch": 0.02988235294117647, "grad_norm": 1.5152647572275364, "learning_rate": 1.157425820759502e-05, "loss": 0.07435308694839478, "step": 3175 }, { "epoch": 0.02992941176470588, "grad_norm": 1.7149324869416962, "learning_rate": 1.1565152503874455e-05, "loss": 0.07025845050811767, "step": 3180 }, { "epoch": 0.029976470588235295, "grad_norm": 1.0659171081194752, "learning_rate": 1.1556068257320012e-05, "loss": 0.08034671545028686, "step": 3185 }, { "epoch": 0.030023529411764706, "grad_norm": 1.526868010096062, "learning_rate": 1.1547005383792517e-05, "loss": 0.08010567426681518, "step": 3190 }, { "epoch": 0.030070588235294116, "grad_norm": 0.9805882866437664, "learning_rate": 1.1537963799613963e-05, "loss": 0.07666631937026977, "step": 3195 }, { "epoch": 0.03011764705882353, "grad_norm": 0.9486754367398618, "learning_rate": 1.1528943421564291e-05, "loss": 0.06314796209335327, "step": 3200 }, { "epoch": 0.03016470588235294, "grad_norm": 0.977890804002438, "learning_rate": 1.1519944166878157e-05, "loss": 0.058986234664916995, "step": 3205 }, { "epoch": 0.030211764705882355, "grad_norm": 1.2329418864396484, "learning_rate": 1.1510965953241744e-05, "loss": 0.07237377762794495, "step": 3210 }, { "epoch": 0.030258823529411765, "grad_norm": 1.0172186310333917, "learning_rate": 1.1502008698789606e-05, "loss": 0.08052777647972106, "step": 3215 }, { "epoch": 0.030305882352941176, "grad_norm": 1.073073417545764, "learning_rate": 1.1493072322101529e-05, "loss": 0.0694852590560913, "step": 3220 }, { "epoch": 0.03035294117647059, "grad_norm": 1.3777624971367213, "learning_rate": 1.1484156742199424e-05, "loss": 0.06957822442054748, "step": 3225 }, { "epoch": 0.0304, "grad_norm": 1.2909941881997407, "learning_rate": 1.147526187854424e-05, "loss": 0.07900978922843933, "step": 3230 }, { "epoch": 0.03044705882352941, "grad_norm": 1.6225447222087628, "learning_rate": 1.1466387651032913e-05, "loss": 0.07537040114402771, "step": 3235 }, { "epoch": 0.030494117647058824, "grad_norm": 1.1271521893322678, "learning_rate": 1.1457533979995336e-05, "loss": 0.0759891390800476, "step": 3240 }, { "epoch": 0.030541176470588235, "grad_norm": 0.928777348390852, "learning_rate": 1.1448700786191358e-05, "loss": 0.0678899884223938, "step": 3245 }, { "epoch": 0.03058823529411765, "grad_norm": 1.0621111533626821, "learning_rate": 1.1439887990807794e-05, "loss": 0.06558465957641602, "step": 3250 }, { "epoch": 0.03063529411764706, "grad_norm": 1.5087806339669068, "learning_rate": 1.1431095515455486e-05, "loss": 0.06919119358062745, "step": 3255 }, { "epoch": 0.03068235294117647, "grad_norm": 1.135020443702049, "learning_rate": 1.1422323282166363e-05, "loss": 0.07759436964988708, "step": 3260 }, { "epoch": 0.030729411764705884, "grad_norm": 1.4748494963268357, "learning_rate": 1.1413571213390545e-05, "loss": 0.07708510756492615, "step": 3265 }, { "epoch": 0.030776470588235294, "grad_norm": 1.1989034887300345, "learning_rate": 1.1404839231993454e-05, "loss": 0.0777761995792389, "step": 3270 }, { "epoch": 0.030823529411764704, "grad_norm": 1.014167794096035, "learning_rate": 1.1396127261252974e-05, "loss": 0.07142805457115173, "step": 3275 }, { "epoch": 0.03087058823529412, "grad_norm": 0.9179175800739979, "learning_rate": 1.1387435224856605e-05, "loss": 0.0674073040485382, "step": 3280 }, { "epoch": 0.03091764705882353, "grad_norm": 1.0428179705910692, "learning_rate": 1.1378763046898662e-05, "loss": 0.0703356146812439, "step": 3285 }, { "epoch": 0.030964705882352943, "grad_norm": 1.2717482444743788, "learning_rate": 1.137011065187749e-05, "loss": 0.06530610918998718, "step": 3290 }, { "epoch": 0.031011764705882353, "grad_norm": 0.8212963545766033, "learning_rate": 1.1361477964692708e-05, "loss": 0.06313766241073608, "step": 3295 }, { "epoch": 0.031058823529411764, "grad_norm": 1.3883042206060643, "learning_rate": 1.1352864910642456e-05, "loss": 0.06972560286521912, "step": 3300 }, { "epoch": 0.031105882352941178, "grad_norm": 1.0038749912378904, "learning_rate": 1.1344271415420705e-05, "loss": 0.06765131950378418, "step": 3305 }, { "epoch": 0.031152941176470588, "grad_norm": 1.1147642933528181, "learning_rate": 1.1335697405114531e-05, "loss": 0.06902738213539124, "step": 3310 }, { "epoch": 0.0312, "grad_norm": 1.1545823461560898, "learning_rate": 1.1327142806201487e-05, "loss": 0.07665424346923828, "step": 3315 }, { "epoch": 0.031247058823529412, "grad_norm": 1.1064154964261528, "learning_rate": 1.1318607545546914e-05, "loss": 0.06385798454284668, "step": 3320 }, { "epoch": 0.031294117647058826, "grad_norm": 0.8469577092563324, "learning_rate": 1.1310091550401336e-05, "loss": 0.06065886020660401, "step": 3325 }, { "epoch": 0.03134117647058823, "grad_norm": 1.1515906313134971, "learning_rate": 1.1301594748397862e-05, "loss": 0.07057394981384277, "step": 3330 }, { "epoch": 0.03138823529411765, "grad_norm": 0.9741995806131093, "learning_rate": 1.1293117067549587e-05, "loss": 0.05880298614501953, "step": 3335 }, { "epoch": 0.03143529411764706, "grad_norm": 1.065354474819722, "learning_rate": 1.128465843624703e-05, "loss": 0.060080337524414065, "step": 3340 }, { "epoch": 0.03148235294117647, "grad_norm": 1.311490084066046, "learning_rate": 1.1276218783255623e-05, "loss": 0.07415622472763062, "step": 3345 }, { "epoch": 0.03152941176470588, "grad_norm": 1.1981393297531906, "learning_rate": 1.1267798037713143e-05, "loss": 0.06928645372390747, "step": 3350 }, { "epoch": 0.031576470588235296, "grad_norm": 1.5419230735076377, "learning_rate": 1.1259396129127257e-05, "loss": 0.07821025848388671, "step": 3355 }, { "epoch": 0.0316235294117647, "grad_norm": 0.9719408482161437, "learning_rate": 1.125101298737302e-05, "loss": 0.07345223426818848, "step": 3360 }, { "epoch": 0.03167058823529412, "grad_norm": 1.0486097805092214, "learning_rate": 1.1242648542690419e-05, "loss": 0.07579683661460876, "step": 3365 }, { "epoch": 0.03171764705882353, "grad_norm": 0.9211314374230181, "learning_rate": 1.1234302725681944e-05, "loss": 0.07014250755310059, "step": 3370 }, { "epoch": 0.03176470588235294, "grad_norm": 2.5388171635530123, "learning_rate": 1.1225975467310166e-05, "loss": 0.06364313364028931, "step": 3375 }, { "epoch": 0.03181176470588235, "grad_norm": 1.2059828991824024, "learning_rate": 1.1217666698895326e-05, "loss": 0.07497323751449585, "step": 3380 }, { "epoch": 0.031858823529411766, "grad_norm": 1.4819251149085946, "learning_rate": 1.120937635211298e-05, "loss": 0.06788721084594726, "step": 3385 }, { "epoch": 0.03190588235294118, "grad_norm": 0.8166784605409777, "learning_rate": 1.1201104358991614e-05, "loss": 0.05584346652030945, "step": 3390 }, { "epoch": 0.03195294117647059, "grad_norm": 1.1044456974340784, "learning_rate": 1.1192850651910325e-05, "loss": 0.06402584910392761, "step": 3395 }, { "epoch": 0.032, "grad_norm": 1.0669496870135569, "learning_rate": 1.118461516359648e-05, "loss": 0.06042907238006592, "step": 3400 }, { "epoch": 0.032047058823529415, "grad_norm": 0.9740182624319302, "learning_rate": 1.1176397827123428e-05, "loss": 0.06250975728034973, "step": 3405 }, { "epoch": 0.03209411764705882, "grad_norm": 1.0050175139326631, "learning_rate": 1.116819857590821e-05, "loss": 0.06548613905906678, "step": 3410 }, { "epoch": 0.032141176470588236, "grad_norm": 1.0708810124764505, "learning_rate": 1.1160017343709293e-05, "loss": 0.07379460334777832, "step": 3415 }, { "epoch": 0.03218823529411765, "grad_norm": 1.2329730811804647, "learning_rate": 1.1151854064624321e-05, "loss": 0.06519339084625245, "step": 3420 }, { "epoch": 0.032235294117647056, "grad_norm": 1.0373135853669964, "learning_rate": 1.1143708673087887e-05, "loss": 0.05852563381195068, "step": 3425 }, { "epoch": 0.03228235294117647, "grad_norm": 0.9483480529306071, "learning_rate": 1.1135581103869326e-05, "loss": 0.0645290732383728, "step": 3430 }, { "epoch": 0.032329411764705884, "grad_norm": 5.812554708551121, "learning_rate": 1.1127471292070506e-05, "loss": 0.08739068508148193, "step": 3435 }, { "epoch": 0.03237647058823529, "grad_norm": 1.2468026148418514, "learning_rate": 1.1119379173123671e-05, "loss": 0.07379398941993713, "step": 3440 }, { "epoch": 0.032423529411764705, "grad_norm": 0.8030997011773551, "learning_rate": 1.1111304682789268e-05, "loss": 0.0690412163734436, "step": 3445 }, { "epoch": 0.03247058823529412, "grad_norm": 0.9963729697707336, "learning_rate": 1.110324775715381e-05, "loss": 0.06168999671936035, "step": 3450 }, { "epoch": 0.032517647058823526, "grad_norm": 1.4387291445647494, "learning_rate": 1.1095208332627752e-05, "loss": 0.07194687128067016, "step": 3455 }, { "epoch": 0.03256470588235294, "grad_norm": 1.2822476382038956, "learning_rate": 1.1087186345943381e-05, "loss": 0.07023055553436279, "step": 3460 }, { "epoch": 0.032611764705882354, "grad_norm": 0.9809035334121027, "learning_rate": 1.1079181734152726e-05, "loss": 0.06731157898902893, "step": 3465 }, { "epoch": 0.03265882352941177, "grad_norm": 1.129942368194991, "learning_rate": 1.107119443462548e-05, "loss": 0.06365975141525268, "step": 3470 }, { "epoch": 0.032705882352941175, "grad_norm": 1.2924843280746394, "learning_rate": 1.1063224385046953e-05, "loss": 0.07191818952560425, "step": 3475 }, { "epoch": 0.03275294117647059, "grad_norm": 1.0799630843993924, "learning_rate": 1.1055271523416013e-05, "loss": 0.07216507196426392, "step": 3480 }, { "epoch": 0.0328, "grad_norm": 0.7806601388100058, "learning_rate": 1.1047335788043075e-05, "loss": 0.058826792240142825, "step": 3485 }, { "epoch": 0.03284705882352941, "grad_norm": 1.1838568319532383, "learning_rate": 1.1039417117548081e-05, "loss": 0.06988815665245056, "step": 3490 }, { "epoch": 0.032894117647058824, "grad_norm": 1.283401571113353, "learning_rate": 1.1031515450858513e-05, "loss": 0.07053061723709106, "step": 3495 }, { "epoch": 0.03294117647058824, "grad_norm": 0.9514479917201365, "learning_rate": 1.1023630727207412e-05, "loss": 0.06789518594741821, "step": 3500 }, { "epoch": 0.032988235294117645, "grad_norm": 1.021756151166608, "learning_rate": 1.1015762886131415e-05, "loss": 0.07003164291381836, "step": 3505 }, { "epoch": 0.03303529411764706, "grad_norm": 1.0179751877646626, "learning_rate": 1.100791186746881e-05, "loss": 0.06504020094871521, "step": 3510 }, { "epoch": 0.03308235294117647, "grad_norm": 1.5319177623235538, "learning_rate": 1.10000776113576e-05, "loss": 0.07452186346054077, "step": 3515 }, { "epoch": 0.03312941176470588, "grad_norm": 3.122751382303207, "learning_rate": 1.0992260058233588e-05, "loss": 0.06383951902389526, "step": 3520 }, { "epoch": 0.03317647058823529, "grad_norm": 0.939431801836167, "learning_rate": 1.0984459148828472e-05, "loss": 0.06514999866485596, "step": 3525 }, { "epoch": 0.03322352941176471, "grad_norm": 0.944487861653192, "learning_rate": 1.0976674824167965e-05, "loss": 0.0714152216911316, "step": 3530 }, { "epoch": 0.033270588235294114, "grad_norm": 1.1378936652438294, "learning_rate": 1.0968907025569917e-05, "loss": 0.06722251176834107, "step": 3535 }, { "epoch": 0.03331764705882353, "grad_norm": 1.2051876419831247, "learning_rate": 1.0961155694642456e-05, "loss": 0.06891834735870361, "step": 3540 }, { "epoch": 0.03336470588235294, "grad_norm": 1.0084831425278618, "learning_rate": 1.0953420773282147e-05, "loss": 0.0719656229019165, "step": 3545 }, { "epoch": 0.033411764705882356, "grad_norm": 0.98050103003918, "learning_rate": 1.0945702203672164e-05, "loss": 0.0703319787979126, "step": 3550 }, { "epoch": 0.03345882352941176, "grad_norm": 0.9444295094716865, "learning_rate": 1.0937999928280471e-05, "loss": 0.06862081289291382, "step": 3555 }, { "epoch": 0.03350588235294118, "grad_norm": 1.3030029496319309, "learning_rate": 1.0930313889858027e-05, "loss": 0.06253161430358886, "step": 3560 }, { "epoch": 0.03355294117647059, "grad_norm": 1.039187487732245, "learning_rate": 1.0922644031436984e-05, "loss": 0.06421077251434326, "step": 3565 }, { "epoch": 0.0336, "grad_norm": 1.374178632575709, "learning_rate": 1.091499029632894e-05, "loss": 0.06852370500564575, "step": 3570 }, { "epoch": 0.03364705882352941, "grad_norm": 1.0375742979638614, "learning_rate": 1.0907352628123149e-05, "loss": 0.07674087882041931, "step": 3575 }, { "epoch": 0.033694117647058826, "grad_norm": 0.8786769486768038, "learning_rate": 1.08997309706848e-05, "loss": 0.06983662247657776, "step": 3580 }, { "epoch": 0.03374117647058823, "grad_norm": 1.1559778594926755, "learning_rate": 1.0892125268153266e-05, "loss": 0.061822938919067386, "step": 3585 }, { "epoch": 0.03378823529411765, "grad_norm": 1.2014870318374813, "learning_rate": 1.0884535464940398e-05, "loss": 0.06593632102012634, "step": 3590 }, { "epoch": 0.03383529411764706, "grad_norm": 1.1354741689645125, "learning_rate": 1.0876961505728806e-05, "loss": 0.06378586292266845, "step": 3595 }, { "epoch": 0.03388235294117647, "grad_norm": 1.318188496575388, "learning_rate": 1.0869403335470182e-05, "loss": 0.06481632590293884, "step": 3600 }, { "epoch": 0.03392941176470588, "grad_norm": 1.3795936025088527, "learning_rate": 1.0861860899383599e-05, "loss": 0.07521111965179443, "step": 3605 }, { "epoch": 0.033976470588235295, "grad_norm": 1.1346991467791543, "learning_rate": 1.0854334142953859e-05, "loss": 0.07105418443679809, "step": 3610 }, { "epoch": 0.0340235294117647, "grad_norm": 0.9639156018524456, "learning_rate": 1.0846823011929834e-05, "loss": 0.066176438331604, "step": 3615 }, { "epoch": 0.034070588235294116, "grad_norm": 1.232912824052622, "learning_rate": 1.0839327452322826e-05, "loss": 0.06822323799133301, "step": 3620 }, { "epoch": 0.03411764705882353, "grad_norm": 1.3518227450331184, "learning_rate": 1.0831847410404922e-05, "loss": 0.06497113704681397, "step": 3625 }, { "epoch": 0.034164705882352944, "grad_norm": 1.033776402857876, "learning_rate": 1.0824382832707403e-05, "loss": 0.0695422649383545, "step": 3630 }, { "epoch": 0.03421176470588235, "grad_norm": 0.8722240576002347, "learning_rate": 1.0816933666019118e-05, "loss": 0.06398657560348511, "step": 3635 }, { "epoch": 0.034258823529411765, "grad_norm": 1.0727510763162413, "learning_rate": 1.0809499857384897e-05, "loss": 0.05750836730003357, "step": 3640 }, { "epoch": 0.03430588235294118, "grad_norm": 0.8001299663854201, "learning_rate": 1.0802081354103966e-05, "loss": 0.07643908858299256, "step": 3645 }, { "epoch": 0.034352941176470586, "grad_norm": 1.0065569472683877, "learning_rate": 1.0794678103728392e-05, "loss": 0.06732549071311951, "step": 3650 }, { "epoch": 0.0344, "grad_norm": 0.9990902907028065, "learning_rate": 1.0787290054061507e-05, "loss": 0.06304501295089722, "step": 3655 }, { "epoch": 0.034447058823529414, "grad_norm": 1.0829538053029735, "learning_rate": 1.0779917153156374e-05, "loss": 0.06255958080291749, "step": 3660 }, { "epoch": 0.03449411764705882, "grad_norm": 0.9968274220323504, "learning_rate": 1.0772559349314248e-05, "loss": 0.06992684602737427, "step": 3665 }, { "epoch": 0.034541176470588235, "grad_norm": 1.1899365549105891, "learning_rate": 1.076521659108306e-05, "loss": 0.06212121248245239, "step": 3670 }, { "epoch": 0.03458823529411765, "grad_norm": 1.3406795566264704, "learning_rate": 1.0757888827255893e-05, "loss": 0.06821196675300598, "step": 3675 }, { "epoch": 0.034635294117647056, "grad_norm": 1.307775853436158, "learning_rate": 1.07505760068695e-05, "loss": 0.06912804841995239, "step": 3680 }, { "epoch": 0.03468235294117647, "grad_norm": 1.029863065134204, "learning_rate": 1.0743278079202794e-05, "loss": 0.0671470046043396, "step": 3685 }, { "epoch": 0.034729411764705884, "grad_norm": 1.1698853260848865, "learning_rate": 1.0735994993775394e-05, "loss": 0.061593443155288696, "step": 3690 }, { "epoch": 0.0347764705882353, "grad_norm": 0.9328516642309049, "learning_rate": 1.0728726700346137e-05, "loss": 0.06046538352966309, "step": 3695 }, { "epoch": 0.034823529411764705, "grad_norm": 1.046255225831969, "learning_rate": 1.0721473148911636e-05, "loss": 0.07758271098136901, "step": 3700 }, { "epoch": 0.03487058823529412, "grad_norm": 1.27270754112843, "learning_rate": 1.0714234289704834e-05, "loss": 0.0647960364818573, "step": 3705 }, { "epoch": 0.03491764705882353, "grad_norm": 1.420601743905461, "learning_rate": 1.0707010073193559e-05, "loss": 0.0673503816127777, "step": 3710 }, { "epoch": 0.03496470588235294, "grad_norm": 0.888027433740599, "learning_rate": 1.0699800450079114e-05, "loss": 0.06422685384750366, "step": 3715 }, { "epoch": 0.03501176470588235, "grad_norm": 1.0309711426313695, "learning_rate": 1.0692605371294858e-05, "loss": 0.06082305908203125, "step": 3720 }, { "epoch": 0.03505882352941177, "grad_norm": 0.9315009716811531, "learning_rate": 1.0685424788004802e-05, "loss": 0.057824456691741945, "step": 3725 }, { "epoch": 0.035105882352941174, "grad_norm": 2.1503633954493786, "learning_rate": 1.0678258651602215e-05, "loss": 0.07341588139533997, "step": 3730 }, { "epoch": 0.03515294117647059, "grad_norm": 0.9577506889111711, "learning_rate": 1.0671106913708259e-05, "loss": 0.0631458044052124, "step": 3735 }, { "epoch": 0.0352, "grad_norm": 0.9047441158134049, "learning_rate": 1.0663969526170587e-05, "loss": 0.05363409519195557, "step": 3740 }, { "epoch": 0.03524705882352941, "grad_norm": 1.12701533120102, "learning_rate": 1.0656846441062013e-05, "loss": 0.0558512806892395, "step": 3745 }, { "epoch": 0.03529411764705882, "grad_norm": 0.9327405956507924, "learning_rate": 1.0649737610679134e-05, "loss": 0.07121008038520812, "step": 3750 }, { "epoch": 0.03534117647058824, "grad_norm": 0.9951749332433999, "learning_rate": 1.0642642987541006e-05, "loss": 0.06184264421463013, "step": 3755 }, { "epoch": 0.035388235294117644, "grad_norm": 0.8733649595604773, "learning_rate": 1.0635562524387806e-05, "loss": 0.06426815986633301, "step": 3760 }, { "epoch": 0.03543529411764706, "grad_norm": 0.8475755923079638, "learning_rate": 1.0628496174179506e-05, "loss": 0.060856866836547854, "step": 3765 }, { "epoch": 0.03548235294117647, "grad_norm": 0.8252302473653678, "learning_rate": 1.0621443890094566e-05, "loss": 0.06208049058914185, "step": 3770 }, { "epoch": 0.035529411764705886, "grad_norm": 0.7410589442172107, "learning_rate": 1.0614405625528632e-05, "loss": 0.06067532300949097, "step": 3775 }, { "epoch": 0.03557647058823529, "grad_norm": 1.686030540544864, "learning_rate": 1.0607381334093234e-05, "loss": 0.06349357366561889, "step": 3780 }, { "epoch": 0.03562352941176471, "grad_norm": 1.1077383536223486, "learning_rate": 1.0600370969614512e-05, "loss": 0.06277881860733033, "step": 3785 }, { "epoch": 0.03567058823529412, "grad_norm": 1.1024054850565936, "learning_rate": 1.0593374486131934e-05, "loss": 0.061219364404678345, "step": 3790 }, { "epoch": 0.03571764705882353, "grad_norm": 1.1625162247559264, "learning_rate": 1.0586391837897032e-05, "loss": 0.06833536624908447, "step": 3795 }, { "epoch": 0.03576470588235294, "grad_norm": 1.0004646325938578, "learning_rate": 1.057942297937215e-05, "loss": 0.07657653689384461, "step": 3800 }, { "epoch": 0.035811764705882355, "grad_norm": 0.9251391642946543, "learning_rate": 1.0572467865229193e-05, "loss": 0.06820139884948731, "step": 3805 }, { "epoch": 0.03585882352941176, "grad_norm": 1.0085615989645134, "learning_rate": 1.0565526450348382e-05, "loss": 0.07524577975273132, "step": 3810 }, { "epoch": 0.035905882352941176, "grad_norm": 0.9316624461481416, "learning_rate": 1.0558598689817043e-05, "loss": 0.06599715948104859, "step": 3815 }, { "epoch": 0.03595294117647059, "grad_norm": 1.348132207787982, "learning_rate": 1.0551684538928369e-05, "loss": 0.06953788995742798, "step": 3820 }, { "epoch": 0.036, "grad_norm": 1.0328213561982837, "learning_rate": 1.054478395318021e-05, "loss": 0.06035674214363098, "step": 3825 }, { "epoch": 0.03604705882352941, "grad_norm": 1.1922132718065306, "learning_rate": 1.0537896888273884e-05, "loss": 0.072864830493927, "step": 3830 }, { "epoch": 0.036094117647058825, "grad_norm": 1.0599775230532338, "learning_rate": 1.0531023300112966e-05, "loss": 0.06498841047286988, "step": 3835 }, { "epoch": 0.03614117647058823, "grad_norm": 0.9575755832902493, "learning_rate": 1.052416314480211e-05, "loss": 0.05698815584182739, "step": 3840 }, { "epoch": 0.036188235294117646, "grad_norm": 0.8586171993834886, "learning_rate": 1.0517316378645876e-05, "loss": 0.06840286254882813, "step": 3845 }, { "epoch": 0.03623529411764706, "grad_norm": 1.2024372573194473, "learning_rate": 1.0510482958147547e-05, "loss": 0.06094220280647278, "step": 3850 }, { "epoch": 0.036282352941176474, "grad_norm": 1.130283726363715, "learning_rate": 1.0503662840007986e-05, "loss": 0.06516398191452026, "step": 3855 }, { "epoch": 0.03632941176470588, "grad_norm": 0.9534140775419818, "learning_rate": 1.0496855981124468e-05, "loss": 0.060140013694763184, "step": 3860 }, { "epoch": 0.036376470588235295, "grad_norm": 0.8477529507526307, "learning_rate": 1.0490062338589548e-05, "loss": 0.06722902059555054, "step": 3865 }, { "epoch": 0.03642352941176471, "grad_norm": 1.5283347075610476, "learning_rate": 1.0483281869689918e-05, "loss": 0.05706917643547058, "step": 3870 }, { "epoch": 0.036470588235294116, "grad_norm": 1.07564117873191, "learning_rate": 1.0476514531905282e-05, "loss": 0.06616443395614624, "step": 3875 }, { "epoch": 0.03651764705882353, "grad_norm": 1.1980746974599563, "learning_rate": 1.0469760282907229e-05, "loss": 0.06537140607833862, "step": 3880 }, { "epoch": 0.036564705882352944, "grad_norm": 0.8642345040658008, "learning_rate": 1.0463019080558134e-05, "loss": 0.0637519359588623, "step": 3885 }, { "epoch": 0.03661176470588235, "grad_norm": 2.0201866798143517, "learning_rate": 1.0456290882910047e-05, "loss": 0.06251593232154846, "step": 3890 }, { "epoch": 0.036658823529411765, "grad_norm": 1.0126172411477166, "learning_rate": 1.0449575648203593e-05, "loss": 0.0628966212272644, "step": 3895 }, { "epoch": 0.03670588235294118, "grad_norm": 0.8849374905819122, "learning_rate": 1.0442873334866887e-05, "loss": 0.06638892889022827, "step": 3900 }, { "epoch": 0.036752941176470585, "grad_norm": 1.3166318927943907, "learning_rate": 1.0436183901514456e-05, "loss": 0.05749901533126831, "step": 3905 }, { "epoch": 0.0368, "grad_norm": 1.076028907324994, "learning_rate": 1.0429507306946163e-05, "loss": 0.06329305171966552, "step": 3910 }, { "epoch": 0.03684705882352941, "grad_norm": 0.8031806981578113, "learning_rate": 1.0422843510146137e-05, "loss": 0.06607590317726135, "step": 3915 }, { "epoch": 0.03689411764705882, "grad_norm": 1.0951745588361506, "learning_rate": 1.0416192470281718e-05, "loss": 0.06340357065200805, "step": 3920 }, { "epoch": 0.036941176470588234, "grad_norm": 1.0586395914605613, "learning_rate": 1.0409554146702417e-05, "loss": 0.06353096961975098, "step": 3925 }, { "epoch": 0.03698823529411765, "grad_norm": 1.1178250826931264, "learning_rate": 1.0402928498938852e-05, "loss": 0.06164622902870178, "step": 3930 }, { "epoch": 0.03703529411764706, "grad_norm": 0.8987937262404293, "learning_rate": 1.0396315486701723e-05, "loss": 0.06356516480445862, "step": 3935 }, { "epoch": 0.03708235294117647, "grad_norm": 1.2406334512671733, "learning_rate": 1.038971506988079e-05, "loss": 0.06132408380508423, "step": 3940 }, { "epoch": 0.03712941176470588, "grad_norm": 0.9092430761793427, "learning_rate": 1.0383127208543833e-05, "loss": 0.06087039709091187, "step": 3945 }, { "epoch": 0.0371764705882353, "grad_norm": 1.4289750469859868, "learning_rate": 1.0376551862935658e-05, "loss": 0.059073740243911745, "step": 3950 }, { "epoch": 0.037223529411764704, "grad_norm": 0.8310694907521248, "learning_rate": 1.0369988993477071e-05, "loss": 0.05861130356788635, "step": 3955 }, { "epoch": 0.03727058823529412, "grad_norm": 1.2027891836987343, "learning_rate": 1.0363438560763892e-05, "loss": 0.07303032875061036, "step": 3960 }, { "epoch": 0.03731764705882353, "grad_norm": 1.02295252249788, "learning_rate": 1.035690052556595e-05, "loss": 0.07674136161804199, "step": 3965 }, { "epoch": 0.03736470588235294, "grad_norm": 0.8618266335377324, "learning_rate": 1.03503748488261e-05, "loss": 0.055352628231048584, "step": 3970 }, { "epoch": 0.03741176470588235, "grad_norm": 0.9782197274683623, "learning_rate": 1.034386149165925e-05, "loss": 0.05814361572265625, "step": 3975 }, { "epoch": 0.03745882352941177, "grad_norm": 1.1429233308861984, "learning_rate": 1.0337360415351378e-05, "loss": 0.06557872295379638, "step": 3980 }, { "epoch": 0.037505882352941174, "grad_norm": 1.3270061486996507, "learning_rate": 1.033087158135857e-05, "loss": 0.05610069036483765, "step": 3985 }, { "epoch": 0.03755294117647059, "grad_norm": 0.793320637460391, "learning_rate": 1.0324394951306058e-05, "loss": 0.06762747168540954, "step": 3990 }, { "epoch": 0.0376, "grad_norm": 0.9152223206243102, "learning_rate": 1.0317930486987275e-05, "loss": 0.06818470358848572, "step": 3995 }, { "epoch": 0.03764705882352941, "grad_norm": 0.8347252245988553, "learning_rate": 1.0311478150362894e-05, "loss": 0.0618283748626709, "step": 4000 }, { "epoch": 0.03769411764705882, "grad_norm": 1.2078087052414077, "learning_rate": 1.0305037903559901e-05, "loss": 0.05916946530342102, "step": 4005 }, { "epoch": 0.037741176470588236, "grad_norm": 0.9460202717998206, "learning_rate": 1.029860970887065e-05, "loss": 0.07247042655944824, "step": 4010 }, { "epoch": 0.03778823529411765, "grad_norm": 0.9654156238084542, "learning_rate": 1.029219352875195e-05, "loss": 0.062404245138168335, "step": 4015 }, { "epoch": 0.03783529411764706, "grad_norm": 1.3379152002179422, "learning_rate": 1.0285789325824131e-05, "loss": 0.0678586483001709, "step": 4020 }, { "epoch": 0.03788235294117647, "grad_norm": 0.8810158992463495, "learning_rate": 1.0279397062870135e-05, "loss": 0.06401022672653198, "step": 4025 }, { "epoch": 0.037929411764705885, "grad_norm": 1.1170253496164642, "learning_rate": 1.0273016702834606e-05, "loss": 0.05751469135284424, "step": 4030 }, { "epoch": 0.03797647058823529, "grad_norm": 1.2836777260908356, "learning_rate": 1.0266648208822992e-05, "loss": 0.06321067810058593, "step": 4035 }, { "epoch": 0.038023529411764706, "grad_norm": 1.4354970864180183, "learning_rate": 1.026029154410064e-05, "loss": 0.06202356815338135, "step": 4040 }, { "epoch": 0.03807058823529412, "grad_norm": 1.0079545677485526, "learning_rate": 1.0253946672091915e-05, "loss": 0.0577451765537262, "step": 4045 }, { "epoch": 0.03811764705882353, "grad_norm": 1.1769425017307042, "learning_rate": 1.0247613556379316e-05, "loss": 0.07085485458374023, "step": 4050 }, { "epoch": 0.03816470588235294, "grad_norm": 1.0747160925797739, "learning_rate": 1.0241292160702587e-05, "loss": 0.054867058992385864, "step": 4055 }, { "epoch": 0.038211764705882355, "grad_norm": 1.2983025024806951, "learning_rate": 1.0234982448957864e-05, "loss": 0.06441992521286011, "step": 4060 }, { "epoch": 0.03825882352941176, "grad_norm": 0.8689521204467539, "learning_rate": 1.0228684385196785e-05, "loss": 0.06017477512359619, "step": 4065 }, { "epoch": 0.038305882352941176, "grad_norm": 0.8239358507044996, "learning_rate": 1.0222397933625651e-05, "loss": 0.058677184581756595, "step": 4070 }, { "epoch": 0.03835294117647059, "grad_norm": 0.9086524157274422, "learning_rate": 1.0216123058604561e-05, "loss": 0.07017085552215577, "step": 4075 }, { "epoch": 0.0384, "grad_norm": 1.2946772223439782, "learning_rate": 1.0209859724646568e-05, "loss": 0.06265410780906677, "step": 4080 }, { "epoch": 0.03844705882352941, "grad_norm": 1.4767727459485005, "learning_rate": 1.0203607896416826e-05, "loss": 0.0604932427406311, "step": 4085 }, { "epoch": 0.038494117647058824, "grad_norm": 0.9618064635202044, "learning_rate": 1.019736753873177e-05, "loss": 0.06521806716918946, "step": 4090 }, { "epoch": 0.03854117647058824, "grad_norm": 1.126314435516912, "learning_rate": 1.0191138616558271e-05, "loss": 0.05398134589195251, "step": 4095 }, { "epoch": 0.038588235294117645, "grad_norm": 0.7872510247148037, "learning_rate": 1.0184921095012813e-05, "loss": 0.053804260492324826, "step": 4100 }, { "epoch": 0.03863529411764706, "grad_norm": 1.227811403947721, "learning_rate": 1.0178714939360675e-05, "loss": 0.06630998253822326, "step": 4105 }, { "epoch": 0.03868235294117647, "grad_norm": 0.940220511247346, "learning_rate": 1.0172520115015108e-05, "loss": 0.06672313213348388, "step": 4110 }, { "epoch": 0.03872941176470588, "grad_norm": 1.0835711742258718, "learning_rate": 1.016633658753654e-05, "loss": 0.06357790231704712, "step": 4115 }, { "epoch": 0.038776470588235294, "grad_norm": 0.8776210431548942, "learning_rate": 1.0160164322631763e-05, "loss": 0.04841779172420502, "step": 4120 }, { "epoch": 0.03882352941176471, "grad_norm": 1.66897632706896, "learning_rate": 1.0154003286153128e-05, "loss": 0.06175354719161987, "step": 4125 }, { "epoch": 0.038870588235294115, "grad_norm": 1.1247644063003763, "learning_rate": 1.0147853444097768e-05, "loss": 0.06360772848129273, "step": 4130 }, { "epoch": 0.03891764705882353, "grad_norm": 0.8001365731088124, "learning_rate": 1.014171476260679e-05, "loss": 0.0607261598110199, "step": 4135 }, { "epoch": 0.03896470588235294, "grad_norm": 1.3866067737553738, "learning_rate": 1.0135587207964506e-05, "loss": 0.06451242566108703, "step": 4140 }, { "epoch": 0.03901176470588235, "grad_norm": 0.8906182761059539, "learning_rate": 1.0129470746597653e-05, "loss": 0.06508265137672424, "step": 4145 }, { "epoch": 0.039058823529411764, "grad_norm": 1.4059604105479897, "learning_rate": 1.0123365345074622e-05, "loss": 0.06407562494277955, "step": 4150 }, { "epoch": 0.03910588235294118, "grad_norm": 0.8121528394328908, "learning_rate": 1.011727097010468e-05, "loss": 0.05378486514091492, "step": 4155 }, { "epoch": 0.039152941176470585, "grad_norm": 0.8982365220575784, "learning_rate": 1.011118758853722e-05, "loss": 0.04884721338748932, "step": 4160 }, { "epoch": 0.0392, "grad_norm": 1.1064080545592059, "learning_rate": 1.0105115167360995e-05, "loss": 0.057101064920425416, "step": 4165 }, { "epoch": 0.03924705882352941, "grad_norm": 0.9537545545961812, "learning_rate": 1.0099053673703371e-05, "loss": 0.05586130619049072, "step": 4170 }, { "epoch": 0.03929411764705883, "grad_norm": 4.166024876552952, "learning_rate": 1.0093003074829584e-05, "loss": 0.06007715463638306, "step": 4175 }, { "epoch": 0.039341176470588234, "grad_norm": 1.1984168407448883, "learning_rate": 1.0086963338141977e-05, "loss": 0.06238747239112854, "step": 4180 }, { "epoch": 0.03938823529411765, "grad_norm": 1.1616395349730355, "learning_rate": 1.0080934431179293e-05, "loss": 0.06991440057754517, "step": 4185 }, { "epoch": 0.03943529411764706, "grad_norm": 1.3648490644355853, "learning_rate": 1.007491632161591e-05, "loss": 0.06032066345214844, "step": 4190 }, { "epoch": 0.03948235294117647, "grad_norm": 1.3176939569625834, "learning_rate": 1.0068908977261145e-05, "loss": 0.06600756645202636, "step": 4195 }, { "epoch": 0.03952941176470588, "grad_norm": 1.042900402573277, "learning_rate": 1.0062912366058501e-05, "loss": 0.061822181940078734, "step": 4200 }, { "epoch": 0.039576470588235296, "grad_norm": 0.9554473821275008, "learning_rate": 1.0056926456084971e-05, "loss": 0.05878391265869141, "step": 4205 }, { "epoch": 0.0396235294117647, "grad_norm": 0.8758797579210799, "learning_rate": 1.0050951215550314e-05, "loss": 0.048637676239013675, "step": 4210 }, { "epoch": 0.03967058823529412, "grad_norm": 0.8135946952854507, "learning_rate": 1.0044986612796341e-05, "loss": 0.062234127521514894, "step": 4215 }, { "epoch": 0.03971764705882353, "grad_norm": 1.0250321281325405, "learning_rate": 1.003903261629623e-05, "loss": 0.06140114068984985, "step": 4220 }, { "epoch": 0.03976470588235294, "grad_norm": 0.9126840446679175, "learning_rate": 1.0033089194653798e-05, "loss": 0.05517896413803101, "step": 4225 }, { "epoch": 0.03981176470588235, "grad_norm": 1.5506952166568286, "learning_rate": 1.0027156316602832e-05, "loss": 0.06372593641281128, "step": 4230 }, { "epoch": 0.039858823529411766, "grad_norm": 0.8412894543336357, "learning_rate": 1.0021233951006386e-05, "loss": 0.04829548895359039, "step": 4235 }, { "epoch": 0.03990588235294118, "grad_norm": 1.007646788544431, "learning_rate": 1.0015322066856086e-05, "loss": 0.06261690855026245, "step": 4240 }, { "epoch": 0.03995294117647059, "grad_norm": 1.2505456248747746, "learning_rate": 1.000942063327147e-05, "loss": 0.054658740758895874, "step": 4245 }, { "epoch": 0.04, "grad_norm": 0.9036967819746139, "learning_rate": 1.0003529619499289e-05, "loss": 0.05817039012908935, "step": 4250 }, { "epoch": 0.040047058823529415, "grad_norm": 1.1363148758373054, "learning_rate": 9.99764899491285e-06, "loss": 0.06161369681358338, "step": 4255 }, { "epoch": 0.04009411764705882, "grad_norm": 0.7403244968090665, "learning_rate": 9.991778729011337e-06, "loss": 0.0565372109413147, "step": 4260 }, { "epoch": 0.040141176470588236, "grad_norm": 1.1248881825135784, "learning_rate": 9.985918791419145e-06, "loss": 0.06785852909088134, "step": 4265 }, { "epoch": 0.04018823529411765, "grad_norm": 1.024556374791107, "learning_rate": 9.980069151885238e-06, "loss": 0.060152757167816165, "step": 4270 }, { "epoch": 0.04023529411764706, "grad_norm": 1.111963059699288, "learning_rate": 9.974229780282471e-06, "loss": 0.05185211896896362, "step": 4275 }, { "epoch": 0.04028235294117647, "grad_norm": 1.2749040824374123, "learning_rate": 9.968400646606943e-06, "loss": 0.056243371963500974, "step": 4280 }, { "epoch": 0.040329411764705884, "grad_norm": 1.1592830430295085, "learning_rate": 9.962581720977358e-06, "loss": 0.06011704206466675, "step": 4285 }, { "epoch": 0.04037647058823529, "grad_norm": 1.456184623448957, "learning_rate": 9.956772973634387e-06, "loss": 0.05977914333343506, "step": 4290 }, { "epoch": 0.040423529411764705, "grad_norm": 1.5505904875234557, "learning_rate": 9.950974374940007e-06, "loss": 0.06664139032363892, "step": 4295 }, { "epoch": 0.04047058823529412, "grad_norm": 2.8102160726797907, "learning_rate": 9.945185895376878e-06, "loss": 0.05494034290313721, "step": 4300 }, { "epoch": 0.040517647058823526, "grad_norm": 1.8681720443249044, "learning_rate": 9.939407505547717e-06, "loss": 0.059802889823913574, "step": 4305 }, { "epoch": 0.04056470588235294, "grad_norm": 1.0789821502662182, "learning_rate": 9.933639176174668e-06, "loss": 0.06155753135681152, "step": 4310 }, { "epoch": 0.040611764705882354, "grad_norm": 0.949645175691571, "learning_rate": 9.92788087809867e-06, "loss": 0.05862140655517578, "step": 4315 }, { "epoch": 0.04065882352941177, "grad_norm": 1.185950171659009, "learning_rate": 9.922132582278848e-06, "loss": 0.06020110249519348, "step": 4320 }, { "epoch": 0.040705882352941175, "grad_norm": 0.9628327269557283, "learning_rate": 9.916394259791897e-06, "loss": 0.06612221002578736, "step": 4325 }, { "epoch": 0.04075294117647059, "grad_norm": 0.8968454416977849, "learning_rate": 9.910665881831465e-06, "loss": 0.06746795177459716, "step": 4330 }, { "epoch": 0.0408, "grad_norm": 1.1859081614455813, "learning_rate": 9.904947419707549e-06, "loss": 0.05524238348007202, "step": 4335 }, { "epoch": 0.04084705882352941, "grad_norm": 1.1301874935437894, "learning_rate": 9.899238844845894e-06, "loss": 0.0660233199596405, "step": 4340 }, { "epoch": 0.040894117647058824, "grad_norm": 0.923925298935039, "learning_rate": 9.893540128787388e-06, "loss": 0.05691664218902588, "step": 4345 }, { "epoch": 0.04094117647058824, "grad_norm": 0.8942781242724532, "learning_rate": 9.88785124318747e-06, "loss": 0.05664651393890381, "step": 4350 }, { "epoch": 0.040988235294117645, "grad_norm": 0.8822088302329233, "learning_rate": 9.88217215981554e-06, "loss": 0.05752508640289307, "step": 4355 }, { "epoch": 0.04103529411764706, "grad_norm": 0.9063792448969158, "learning_rate": 9.876502850554366e-06, "loss": 0.0580705463886261, "step": 4360 }, { "epoch": 0.04108235294117647, "grad_norm": 0.9648717035673869, "learning_rate": 9.870843287399493e-06, "loss": 0.05670918226242065, "step": 4365 }, { "epoch": 0.04112941176470588, "grad_norm": 0.9734817388732515, "learning_rate": 9.865193442458682e-06, "loss": 0.07000411748886108, "step": 4370 }, { "epoch": 0.041176470588235294, "grad_norm": 1.4808489707775043, "learning_rate": 9.859553287951313e-06, "loss": 0.05839406251907349, "step": 4375 }, { "epoch": 0.04122352941176471, "grad_norm": 1.108460110587188, "learning_rate": 9.85392279620782e-06, "loss": 0.06093655228614807, "step": 4380 }, { "epoch": 0.041270588235294114, "grad_norm": 0.8643106450660311, "learning_rate": 9.848301939669118e-06, "loss": 0.06753495931625367, "step": 4385 }, { "epoch": 0.04131764705882353, "grad_norm": 1.1026409715744663, "learning_rate": 9.842690690886044e-06, "loss": 0.06561399698257446, "step": 4390 }, { "epoch": 0.04136470588235294, "grad_norm": 1.104212836567226, "learning_rate": 9.83708902251878e-06, "loss": 0.06896764039993286, "step": 4395 }, { "epoch": 0.041411764705882356, "grad_norm": 0.879755328362252, "learning_rate": 9.83149690733631e-06, "loss": 0.05912497043609619, "step": 4400 }, { "epoch": 0.04145882352941176, "grad_norm": 0.6946001760499024, "learning_rate": 9.825914318215847e-06, "loss": 0.05215945839881897, "step": 4405 }, { "epoch": 0.04150588235294118, "grad_norm": 1.0958760534307004, "learning_rate": 9.820341228142295e-06, "loss": 0.05990347862243652, "step": 4410 }, { "epoch": 0.04155294117647059, "grad_norm": 0.7881926170682889, "learning_rate": 9.81477761020769e-06, "loss": 0.05466141700744629, "step": 4415 }, { "epoch": 0.0416, "grad_norm": 1.5559965331981933, "learning_rate": 9.809223437610664e-06, "loss": 0.05846085548400879, "step": 4420 }, { "epoch": 0.04164705882352941, "grad_norm": 0.8661264324800962, "learning_rate": 9.803678683655891e-06, "loss": 0.06131205558776855, "step": 4425 }, { "epoch": 0.041694117647058826, "grad_norm": 0.9514703908271477, "learning_rate": 9.798143321753559e-06, "loss": 0.05898464322090149, "step": 4430 }, { "epoch": 0.04174117647058823, "grad_norm": 1.1952009634381282, "learning_rate": 9.792617325418823e-06, "loss": 0.06829565763473511, "step": 4435 }, { "epoch": 0.04178823529411765, "grad_norm": 1.115621125536362, "learning_rate": 9.78710066827129e-06, "loss": 0.06937699913978576, "step": 4440 }, { "epoch": 0.04183529411764706, "grad_norm": 1.0331581438352653, "learning_rate": 9.78159332403447e-06, "loss": 0.06953320503234864, "step": 4445 }, { "epoch": 0.04188235294117647, "grad_norm": 0.8556185978832272, "learning_rate": 9.776095266535262e-06, "loss": 0.06148541569709778, "step": 4450 }, { "epoch": 0.04192941176470588, "grad_norm": 1.6163482550183013, "learning_rate": 9.770606469703431e-06, "loss": 0.06992535591125489, "step": 4455 }, { "epoch": 0.041976470588235296, "grad_norm": 0.8548895457112116, "learning_rate": 9.765126907571087e-06, "loss": 0.05690547823905945, "step": 4460 }, { "epoch": 0.0420235294117647, "grad_norm": 1.21012834175052, "learning_rate": 9.759656554272169e-06, "loss": 0.07339398860931397, "step": 4465 }, { "epoch": 0.04207058823529412, "grad_norm": 0.9091381192052691, "learning_rate": 9.75419538404193e-06, "loss": 0.057302224636077884, "step": 4470 }, { "epoch": 0.04211764705882353, "grad_norm": 0.9372647935451318, "learning_rate": 9.748743371216436e-06, "loss": 0.07137056589126586, "step": 4475 }, { "epoch": 0.042164705882352944, "grad_norm": 1.2040807259043047, "learning_rate": 9.743300490232047e-06, "loss": 0.05551154613494873, "step": 4480 }, { "epoch": 0.04221176470588235, "grad_norm": 0.9945329892606768, "learning_rate": 9.737866715624919e-06, "loss": 0.074474036693573, "step": 4485 }, { "epoch": 0.042258823529411765, "grad_norm": 1.0284774031051414, "learning_rate": 9.732442022030512e-06, "loss": 0.05395995378494263, "step": 4490 }, { "epoch": 0.04230588235294118, "grad_norm": 1.0407166170607582, "learning_rate": 9.727026384183075e-06, "loss": 0.06770193576812744, "step": 4495 }, { "epoch": 0.042352941176470586, "grad_norm": 0.9141268694876965, "learning_rate": 9.721619776915172e-06, "loss": 0.050776755809783934, "step": 4500 }, { "epoch": 0.0424, "grad_norm": 1.1574472439912102, "learning_rate": 9.716222175157174e-06, "loss": 0.06164177656173706, "step": 4505 }, { "epoch": 0.042447058823529414, "grad_norm": 0.9003219325589678, "learning_rate": 9.710833553936776e-06, "loss": 0.06189924478530884, "step": 4510 }, { "epoch": 0.04249411764705882, "grad_norm": 0.9107557802712806, "learning_rate": 9.705453888378521e-06, "loss": 0.06031798124313355, "step": 4515 }, { "epoch": 0.042541176470588235, "grad_norm": 0.9689501022007273, "learning_rate": 9.700083153703302e-06, "loss": 0.05239434838294983, "step": 4520 }, { "epoch": 0.04258823529411765, "grad_norm": 0.7834142424405834, "learning_rate": 9.694721325227895e-06, "loss": 0.054245603084564206, "step": 4525 }, { "epoch": 0.042635294117647056, "grad_norm": 2.1648896829605246, "learning_rate": 9.689368378364473e-06, "loss": 0.04631655216217041, "step": 4530 }, { "epoch": 0.04268235294117647, "grad_norm": 1.325733550220918, "learning_rate": 9.684024288620146e-06, "loss": 0.05420587062835693, "step": 4535 }, { "epoch": 0.042729411764705884, "grad_norm": 1.0718736651573881, "learning_rate": 9.678689031596475e-06, "loss": 0.05981850624084473, "step": 4540 }, { "epoch": 0.04277647058823529, "grad_norm": 0.9822199163597578, "learning_rate": 9.673362582989012e-06, "loss": 0.06081575155258179, "step": 4545 }, { "epoch": 0.042823529411764705, "grad_norm": 0.8315320658455515, "learning_rate": 9.668044918586835e-06, "loss": 0.05954242944717407, "step": 4550 }, { "epoch": 0.04287058823529412, "grad_norm": 0.7766069737696083, "learning_rate": 9.662736014272083e-06, "loss": 0.054858869314193724, "step": 4555 }, { "epoch": 0.04291764705882353, "grad_norm": 1.3602799346445293, "learning_rate": 9.657435846019505e-06, "loss": 0.057552337646484375, "step": 4560 }, { "epoch": 0.04296470588235294, "grad_norm": 1.7335757653028077, "learning_rate": 9.652144389895985e-06, "loss": 0.05742691159248352, "step": 4565 }, { "epoch": 0.043011764705882354, "grad_norm": 0.834619110976854, "learning_rate": 9.646861622060108e-06, "loss": 0.05439275503158569, "step": 4570 }, { "epoch": 0.04305882352941177, "grad_norm": 1.305171107560965, "learning_rate": 9.641587518761702e-06, "loss": 0.05520985722541809, "step": 4575 }, { "epoch": 0.043105882352941174, "grad_norm": 0.7869839520031437, "learning_rate": 9.636322056341379e-06, "loss": 0.05729159116744995, "step": 4580 }, { "epoch": 0.04315294117647059, "grad_norm": 1.1973544791035076, "learning_rate": 9.631065211230108e-06, "loss": 0.05079241991043091, "step": 4585 }, { "epoch": 0.0432, "grad_norm": 0.8671984444385591, "learning_rate": 9.625816959948759e-06, "loss": 0.054439973831176755, "step": 4590 }, { "epoch": 0.04324705882352941, "grad_norm": 0.9333854661851386, "learning_rate": 9.62057727910766e-06, "loss": 0.056635069847106936, "step": 4595 }, { "epoch": 0.04329411764705882, "grad_norm": 0.992320551535254, "learning_rate": 9.615346145406175e-06, "loss": 0.08353567123413086, "step": 4600 }, { "epoch": 0.04334117647058824, "grad_norm": 0.7958440445225794, "learning_rate": 9.610123535632254e-06, "loss": 0.04849244058132172, "step": 4605 }, { "epoch": 0.043388235294117644, "grad_norm": 1.0942193632483121, "learning_rate": 9.604909426662001e-06, "loss": 0.05304941534996033, "step": 4610 }, { "epoch": 0.04343529411764706, "grad_norm": 1.4243694344359374, "learning_rate": 9.599703795459256e-06, "loss": 0.06545316576957702, "step": 4615 }, { "epoch": 0.04348235294117647, "grad_norm": 1.0573196081977778, "learning_rate": 9.594506619075155e-06, "loss": 0.05430942177772522, "step": 4620 }, { "epoch": 0.04352941176470588, "grad_norm": 0.9093175172431368, "learning_rate": 9.58931787464771e-06, "loss": 0.05216946601867676, "step": 4625 }, { "epoch": 0.04357647058823529, "grad_norm": 1.3581480906688543, "learning_rate": 9.584137539401389e-06, "loss": 0.05918387174606323, "step": 4630 }, { "epoch": 0.04362352941176471, "grad_norm": 1.0882189085239897, "learning_rate": 9.57896559064669e-06, "loss": 0.0628853440284729, "step": 4635 }, { "epoch": 0.04367058823529412, "grad_norm": 0.7703806390387709, "learning_rate": 9.57380200577973e-06, "loss": 0.05484623312950134, "step": 4640 }, { "epoch": 0.04371764705882353, "grad_norm": 1.2620529350204084, "learning_rate": 9.568646762281827e-06, "loss": 0.0715370535850525, "step": 4645 }, { "epoch": 0.04376470588235294, "grad_norm": 0.746539656245257, "learning_rate": 9.563499837719085e-06, "loss": 0.051120531558990476, "step": 4650 }, { "epoch": 0.043811764705882356, "grad_norm": 0.7020183807149597, "learning_rate": 9.558361209741987e-06, "loss": 0.05743147730827332, "step": 4655 }, { "epoch": 0.04385882352941176, "grad_norm": 0.970501697102066, "learning_rate": 9.553230856084996e-06, "loss": 0.048539507389068606, "step": 4660 }, { "epoch": 0.043905882352941177, "grad_norm": 0.9486947739907902, "learning_rate": 9.548108754566127e-06, "loss": 0.05960168838500977, "step": 4665 }, { "epoch": 0.04395294117647059, "grad_norm": 1.0625863399984576, "learning_rate": 9.542994883086568e-06, "loss": 0.05414172410964966, "step": 4670 }, { "epoch": 0.044, "grad_norm": 0.9993312553919933, "learning_rate": 9.537889219630267e-06, "loss": 0.06060645580291748, "step": 4675 }, { "epoch": 0.04404705882352941, "grad_norm": 1.179977797298802, "learning_rate": 9.532791742263535e-06, "loss": 0.06540307998657227, "step": 4680 }, { "epoch": 0.044094117647058825, "grad_norm": 0.8924477088101903, "learning_rate": 9.527702429134652e-06, "loss": 0.050313806533813475, "step": 4685 }, { "epoch": 0.04414117647058823, "grad_norm": 0.982825664696499, "learning_rate": 9.52262125847348e-06, "loss": 0.05020564794540405, "step": 4690 }, { "epoch": 0.044188235294117646, "grad_norm": 1.0698467112487644, "learning_rate": 9.517548208591051e-06, "loss": 0.0542986273765564, "step": 4695 }, { "epoch": 0.04423529411764706, "grad_norm": 0.839264218828007, "learning_rate": 9.512483257879209e-06, "loss": 0.058183145523071286, "step": 4700 }, { "epoch": 0.04428235294117647, "grad_norm": 1.053102590458233, "learning_rate": 9.507426384810194e-06, "loss": 0.06179782748222351, "step": 4705 }, { "epoch": 0.04432941176470588, "grad_norm": 1.0933986849488733, "learning_rate": 9.502377567936282e-06, "loss": 0.05959266424179077, "step": 4710 }, { "epoch": 0.044376470588235295, "grad_norm": 1.087536417026509, "learning_rate": 9.497336785889377e-06, "loss": 0.06873283982276916, "step": 4715 }, { "epoch": 0.04442352941176471, "grad_norm": 0.7070789589423983, "learning_rate": 9.492304017380662e-06, "loss": 0.056410545110702516, "step": 4720 }, { "epoch": 0.044470588235294116, "grad_norm": 1.202244351746068, "learning_rate": 9.48727924120019e-06, "loss": 0.061277192831039426, "step": 4725 }, { "epoch": 0.04451764705882353, "grad_norm": 0.9957125775750622, "learning_rate": 9.482262436216537e-06, "loss": 0.053910136222839355, "step": 4730 }, { "epoch": 0.044564705882352944, "grad_norm": 0.8442860052128459, "learning_rate": 9.477253581376404e-06, "loss": 0.053446012735366824, "step": 4735 }, { "epoch": 0.04461176470588235, "grad_norm": 0.8610153533494069, "learning_rate": 9.472252655704265e-06, "loss": 0.058822262287139895, "step": 4740 }, { "epoch": 0.044658823529411765, "grad_norm": 0.9431200920166897, "learning_rate": 9.46725963830199e-06, "loss": 0.04923393428325653, "step": 4745 }, { "epoch": 0.04470588235294118, "grad_norm": 1.0695066936429851, "learning_rate": 9.462274508348473e-06, "loss": 0.05902501344680786, "step": 4750 }, { "epoch": 0.044752941176470586, "grad_norm": 1.0832710765170503, "learning_rate": 9.457297245099278e-06, "loss": 0.06002548933029175, "step": 4755 }, { "epoch": 0.0448, "grad_norm": 1.1036345280749233, "learning_rate": 9.452327827886269e-06, "loss": 0.05743227005004883, "step": 4760 }, { "epoch": 0.044847058823529413, "grad_norm": 0.9260314428302399, "learning_rate": 9.447366236117255e-06, "loss": 0.0549584150314331, "step": 4765 }, { "epoch": 0.04489411764705882, "grad_norm": 0.9578358065321025, "learning_rate": 9.442412449275624e-06, "loss": 0.05576552748680115, "step": 4770 }, { "epoch": 0.044941176470588234, "grad_norm": 2.2848299123159395, "learning_rate": 9.437466446919993e-06, "loss": 0.05801724195480347, "step": 4775 }, { "epoch": 0.04498823529411765, "grad_norm": 1.0000533227191815, "learning_rate": 9.432528208683854e-06, "loss": 0.053192907571792604, "step": 4780 }, { "epoch": 0.04503529411764706, "grad_norm": 1.1698684986958439, "learning_rate": 9.427597714275221e-06, "loss": 0.06259573698043823, "step": 4785 }, { "epoch": 0.04508235294117647, "grad_norm": 0.845302591255772, "learning_rate": 9.422674943476272e-06, "loss": 0.05468448400497437, "step": 4790 }, { "epoch": 0.04512941176470588, "grad_norm": 1.1140657053701017, "learning_rate": 9.41775987614301e-06, "loss": 0.07646671533584595, "step": 4795 }, { "epoch": 0.0451764705882353, "grad_norm": 1.1033498002492308, "learning_rate": 9.412852492204918e-06, "loss": 0.058733236789703366, "step": 4800 }, { "epoch": 0.045223529411764704, "grad_norm": 0.9249144702396037, "learning_rate": 9.407952771664609e-06, "loss": 0.05205525159835815, "step": 4805 }, { "epoch": 0.04527058823529412, "grad_norm": 1.0021741922456304, "learning_rate": 9.403060694597482e-06, "loss": 0.055048692226409915, "step": 4810 }, { "epoch": 0.04531764705882353, "grad_norm": 1.1638721095663058, "learning_rate": 9.398176241151388e-06, "loss": 0.053053104877471925, "step": 4815 }, { "epoch": 0.04536470588235294, "grad_norm": 1.391457812557203, "learning_rate": 9.393299391546292e-06, "loss": 0.06156247854232788, "step": 4820 }, { "epoch": 0.04541176470588235, "grad_norm": 0.9169824701200381, "learning_rate": 9.388430126073926e-06, "loss": 0.05614110231399536, "step": 4825 }, { "epoch": 0.04545882352941177, "grad_norm": 1.0360009537384631, "learning_rate": 9.383568425097473e-06, "loss": 0.06477376222610473, "step": 4830 }, { "epoch": 0.045505882352941174, "grad_norm": 0.7680411094264232, "learning_rate": 9.37871426905121e-06, "loss": 0.04581314921379089, "step": 4835 }, { "epoch": 0.04555294117647059, "grad_norm": 0.983668350916781, "learning_rate": 9.373867638440203e-06, "loss": 0.0528582751750946, "step": 4840 }, { "epoch": 0.0456, "grad_norm": 0.9851050105899277, "learning_rate": 9.369028513839959e-06, "loss": 0.056116366386413576, "step": 4845 }, { "epoch": 0.04564705882352941, "grad_norm": 0.9149164632450172, "learning_rate": 9.364196875896106e-06, "loss": 0.0476639986038208, "step": 4850 }, { "epoch": 0.04569411764705882, "grad_norm": 0.8955257155707765, "learning_rate": 9.359372705324072e-06, "loss": 0.056936174631118774, "step": 4855 }, { "epoch": 0.045741176470588236, "grad_norm": 0.8329390037722261, "learning_rate": 9.354555982908754e-06, "loss": 0.06066344976425171, "step": 4860 }, { "epoch": 0.04578823529411765, "grad_norm": 1.0250659872656886, "learning_rate": 9.3497466895042e-06, "loss": 0.056872892379760745, "step": 4865 }, { "epoch": 0.04583529411764706, "grad_norm": 1.1982824707528152, "learning_rate": 9.344944806033291e-06, "loss": 0.05651407241821289, "step": 4870 }, { "epoch": 0.04588235294117647, "grad_norm": 0.9596534184663736, "learning_rate": 9.340150313487423e-06, "loss": 0.05594099164009094, "step": 4875 }, { "epoch": 0.045929411764705885, "grad_norm": 0.9829809644352521, "learning_rate": 9.335363192926185e-06, "loss": 0.061138248443603514, "step": 4880 }, { "epoch": 0.04597647058823529, "grad_norm": 1.1452038150127164, "learning_rate": 9.330583425477052e-06, "loss": 0.05325714349746704, "step": 4885 }, { "epoch": 0.046023529411764706, "grad_norm": 1.0601132028138713, "learning_rate": 9.325810992335072e-06, "loss": 0.058096885681152344, "step": 4890 }, { "epoch": 0.04607058823529412, "grad_norm": 1.2341362044487423, "learning_rate": 9.321045874762547e-06, "loss": 0.06767117977142334, "step": 4895 }, { "epoch": 0.04611764705882353, "grad_norm": 1.1580165759059373, "learning_rate": 9.316288054088737e-06, "loss": 0.055376315116882326, "step": 4900 }, { "epoch": 0.04616470588235294, "grad_norm": 1.0574112289816877, "learning_rate": 9.31153751170954e-06, "loss": 0.056944739818572995, "step": 4905 }, { "epoch": 0.046211764705882355, "grad_norm": 1.3512327600347391, "learning_rate": 9.3067942290872e-06, "loss": 0.0636708378791809, "step": 4910 }, { "epoch": 0.04625882352941176, "grad_norm": 0.7475336683944821, "learning_rate": 9.302058187749986e-06, "loss": 0.05658641457557678, "step": 4915 }, { "epoch": 0.046305882352941176, "grad_norm": 1.0288490280655918, "learning_rate": 9.297329369291904e-06, "loss": 0.056850337982177736, "step": 4920 }, { "epoch": 0.04635294117647059, "grad_norm": 0.872127541379661, "learning_rate": 9.292607755372394e-06, "loss": 0.04733706712722778, "step": 4925 }, { "epoch": 0.0464, "grad_norm": 1.2704748862822814, "learning_rate": 9.287893327716026e-06, "loss": 0.06639980673789977, "step": 4930 }, { "epoch": 0.04644705882352941, "grad_norm": 0.926129429877069, "learning_rate": 9.283186068112206e-06, "loss": 0.05338034629821777, "step": 4935 }, { "epoch": 0.046494117647058825, "grad_norm": 0.881712292744094, "learning_rate": 9.278485958414883e-06, "loss": 0.04954003691673279, "step": 4940 }, { "epoch": 0.04654117647058824, "grad_norm": 0.7917639683600223, "learning_rate": 9.27379298054225e-06, "loss": 0.06257432699203491, "step": 4945 }, { "epoch": 0.046588235294117646, "grad_norm": 0.8324238925775844, "learning_rate": 9.269107116476458e-06, "loss": 0.04753593504428864, "step": 4950 }, { "epoch": 0.04663529411764706, "grad_norm": 0.9290058993925516, "learning_rate": 9.264428348263318e-06, "loss": 0.05173888206481934, "step": 4955 }, { "epoch": 0.04668235294117647, "grad_norm": 0.8297137612989003, "learning_rate": 9.259756658012018e-06, "loss": 0.060052239894866945, "step": 4960 }, { "epoch": 0.04672941176470588, "grad_norm": 0.873800417896456, "learning_rate": 9.255092027894838e-06, "loss": 0.06122896671295166, "step": 4965 }, { "epoch": 0.046776470588235294, "grad_norm": 0.9345884028126338, "learning_rate": 9.250434440146854e-06, "loss": 0.059136247634887694, "step": 4970 }, { "epoch": 0.04682352941176471, "grad_norm": 0.7831512444418487, "learning_rate": 9.245783877065661e-06, "loss": 0.05404677391052246, "step": 4975 }, { "epoch": 0.046870588235294115, "grad_norm": 0.924328588972874, "learning_rate": 9.241140321011092e-06, "loss": 0.06851239800453186, "step": 4980 }, { "epoch": 0.04691764705882353, "grad_norm": 0.8340840321512926, "learning_rate": 9.236503754404935e-06, "loss": 0.049431759119033816, "step": 4985 }, { "epoch": 0.04696470588235294, "grad_norm": 0.794974771509881, "learning_rate": 9.231874159730647e-06, "loss": 0.053896307945251465, "step": 4990 }, { "epoch": 0.04701176470588235, "grad_norm": 0.921908718298277, "learning_rate": 9.22725151953309e-06, "loss": 0.0573560357093811, "step": 4995 }, { "epoch": 0.047058823529411764, "grad_norm": 0.9932166095001307, "learning_rate": 9.222635816418237e-06, "loss": 0.05719166398048401, "step": 5000 }, { "epoch": 0.04710588235294118, "grad_norm": 1.026523975436637, "learning_rate": 9.218027033052916e-06, "loss": 0.058307313919067384, "step": 5005 }, { "epoch": 0.047152941176470585, "grad_norm": 1.0154527509680453, "learning_rate": 9.213425152164523e-06, "loss": 0.05172334313392639, "step": 5010 }, { "epoch": 0.0472, "grad_norm": 0.9214206250977764, "learning_rate": 9.208830156540753e-06, "loss": 0.05420504212379455, "step": 5015 }, { "epoch": 0.04724705882352941, "grad_norm": 1.1184564177955454, "learning_rate": 9.204242029029333e-06, "loss": 0.05299224853515625, "step": 5020 }, { "epoch": 0.04729411764705883, "grad_norm": 0.8449289557788005, "learning_rate": 9.199660752537752e-06, "loss": 0.05056322813034057, "step": 5025 }, { "epoch": 0.047341176470588234, "grad_norm": 0.9699409066804136, "learning_rate": 9.195086310032988e-06, "loss": 0.05885199904441833, "step": 5030 }, { "epoch": 0.04738823529411765, "grad_norm": 0.7899295876231814, "learning_rate": 9.190518684541252e-06, "loss": 0.054781532287597655, "step": 5035 }, { "epoch": 0.04743529411764706, "grad_norm": 0.6954986571924613, "learning_rate": 9.185957859147718e-06, "loss": 0.05703824758529663, "step": 5040 }, { "epoch": 0.04748235294117647, "grad_norm": 1.075928320743827, "learning_rate": 9.181403816996254e-06, "loss": 0.05333094000816345, "step": 5045 }, { "epoch": 0.04752941176470588, "grad_norm": 0.7580472883671994, "learning_rate": 9.176856541289173e-06, "loss": 0.05111567378044128, "step": 5050 }, { "epoch": 0.047576470588235296, "grad_norm": 0.8224011225383928, "learning_rate": 9.172316015286963e-06, "loss": 0.048982429504394534, "step": 5055 }, { "epoch": 0.0476235294117647, "grad_norm": 0.8744422244115115, "learning_rate": 9.167782222308028e-06, "loss": 0.054561066627502444, "step": 5060 }, { "epoch": 0.04767058823529412, "grad_norm": 0.7326080818420245, "learning_rate": 9.163255145728445e-06, "loss": 0.05611671805381775, "step": 5065 }, { "epoch": 0.04771764705882353, "grad_norm": 1.0074201421889861, "learning_rate": 9.158734768981682e-06, "loss": 0.051882427930831906, "step": 5070 }, { "epoch": 0.04776470588235294, "grad_norm": 0.8848473551181966, "learning_rate": 9.154221075558373e-06, "loss": 0.04998658895492554, "step": 5075 }, { "epoch": 0.04781176470588235, "grad_norm": 0.9325878418263798, "learning_rate": 9.14971404900604e-06, "loss": 0.04884618520736694, "step": 5080 }, { "epoch": 0.047858823529411766, "grad_norm": 1.1904286616644564, "learning_rate": 9.145213672928851e-06, "loss": 0.05732145309448242, "step": 5085 }, { "epoch": 0.04790588235294117, "grad_norm": 1.065999928565113, "learning_rate": 9.140719930987376e-06, "loss": 0.053017842769622806, "step": 5090 }, { "epoch": 0.04795294117647059, "grad_norm": 1.02592447738706, "learning_rate": 9.136232806898329e-06, "loss": 0.05549513101577759, "step": 5095 }, { "epoch": 0.048, "grad_norm": 1.0414313575807113, "learning_rate": 9.131752284434323e-06, "loss": 0.05129291415214539, "step": 5100 }, { "epoch": 0.048047058823529415, "grad_norm": 1.150319595855008, "learning_rate": 9.127278347423616e-06, "loss": 0.06488362550735474, "step": 5105 }, { "epoch": 0.04809411764705882, "grad_norm": 1.0054390041774315, "learning_rate": 9.122810979749885e-06, "loss": 0.05039316415786743, "step": 5110 }, { "epoch": 0.048141176470588236, "grad_norm": 0.895565921196501, "learning_rate": 9.118350165351965e-06, "loss": 0.04966666996479034, "step": 5115 }, { "epoch": 0.04818823529411765, "grad_norm": 0.7701807511170534, "learning_rate": 9.113895888223608e-06, "loss": 0.04153298139572144, "step": 5120 }, { "epoch": 0.04823529411764706, "grad_norm": 1.1585027022413334, "learning_rate": 9.10944813241325e-06, "loss": 0.06184702515602112, "step": 5125 }, { "epoch": 0.04828235294117647, "grad_norm": 0.7662902349488897, "learning_rate": 9.105006882023766e-06, "loss": 0.05743580460548401, "step": 5130 }, { "epoch": 0.048329411764705885, "grad_norm": 0.9602790872190071, "learning_rate": 9.100572121212231e-06, "loss": 0.05202406644821167, "step": 5135 }, { "epoch": 0.04837647058823529, "grad_norm": 1.2401457818400907, "learning_rate": 9.096143834189683e-06, "loss": 0.05692102313041687, "step": 5140 }, { "epoch": 0.048423529411764706, "grad_norm": 1.160677876922498, "learning_rate": 9.091722005220887e-06, "loss": 0.05837029218673706, "step": 5145 }, { "epoch": 0.04847058823529412, "grad_norm": 0.8321848631687214, "learning_rate": 9.087306618624106e-06, "loss": 0.05232032537460327, "step": 5150 }, { "epoch": 0.048517647058823526, "grad_norm": 0.710913309682717, "learning_rate": 9.082897658770855e-06, "loss": 0.0558817982673645, "step": 5155 }, { "epoch": 0.04856470588235294, "grad_norm": 0.7686656873408378, "learning_rate": 9.07849511008568e-06, "loss": 0.05416983366012573, "step": 5160 }, { "epoch": 0.048611764705882354, "grad_norm": 0.9826451259126561, "learning_rate": 9.074098957045918e-06, "loss": 0.05606234073638916, "step": 5165 }, { "epoch": 0.04865882352941176, "grad_norm": 0.85668280259481, "learning_rate": 9.069709184181479e-06, "loss": 0.04638736248016358, "step": 5170 }, { "epoch": 0.048705882352941175, "grad_norm": 1.4109077898448434, "learning_rate": 9.065325776074603e-06, "loss": 0.052173519134521486, "step": 5175 }, { "epoch": 0.04875294117647059, "grad_norm": 0.7464332416718015, "learning_rate": 9.060948717359645e-06, "loss": 0.05062476396560669, "step": 5180 }, { "epoch": 0.0488, "grad_norm": 0.7715709239538376, "learning_rate": 9.056577992722836e-06, "loss": 0.05579806566238403, "step": 5185 }, { "epoch": 0.04884705882352941, "grad_norm": 0.8022868241952377, "learning_rate": 9.052213586902079e-06, "loss": 0.04855314493179321, "step": 5190 }, { "epoch": 0.048894117647058824, "grad_norm": 1.0514352079325138, "learning_rate": 9.047855484686696e-06, "loss": 0.05135869383811951, "step": 5195 }, { "epoch": 0.04894117647058824, "grad_norm": 1.1562919309146655, "learning_rate": 9.043503670917232e-06, "loss": 0.056084060668945314, "step": 5200 }, { "epoch": 0.048988235294117645, "grad_norm": 0.9785163071655442, "learning_rate": 9.039158130485217e-06, "loss": 0.06066577434539795, "step": 5205 }, { "epoch": 0.04903529411764706, "grad_norm": 1.0555741248091437, "learning_rate": 9.034818848332951e-06, "loss": 0.05225840210914612, "step": 5210 }, { "epoch": 0.04908235294117647, "grad_norm": 0.8970770479800074, "learning_rate": 9.03048580945329e-06, "loss": 0.04747841060161591, "step": 5215 }, { "epoch": 0.04912941176470588, "grad_norm": 0.9604312134940284, "learning_rate": 9.026158998889419e-06, "loss": 0.055688774585723876, "step": 5220 }, { "epoch": 0.049176470588235294, "grad_norm": 0.9534289323332487, "learning_rate": 9.021838401734636e-06, "loss": 0.05189491510391235, "step": 5225 }, { "epoch": 0.04922352941176471, "grad_norm": 1.2319117561106914, "learning_rate": 9.017524003132149e-06, "loss": 0.06227871775627136, "step": 5230 }, { "epoch": 0.049270588235294115, "grad_norm": 0.8007804922311148, "learning_rate": 9.013215788274842e-06, "loss": 0.05370932221412659, "step": 5235 }, { "epoch": 0.04931764705882353, "grad_norm": 0.7129442470233069, "learning_rate": 9.008913742405078e-06, "loss": 0.05920398235321045, "step": 5240 }, { "epoch": 0.04936470588235294, "grad_norm": 0.7355362139163817, "learning_rate": 9.004617850814477e-06, "loss": 0.042733049392700194, "step": 5245 }, { "epoch": 0.04941176470588235, "grad_norm": 0.951634717725911, "learning_rate": 9.000328098843708e-06, "loss": 0.05046279430389404, "step": 5250 }, { "epoch": 0.04945882352941176, "grad_norm": 0.751434236974177, "learning_rate": 8.996044471882282e-06, "loss": 0.05149431228637695, "step": 5255 }, { "epoch": 0.04950588235294118, "grad_norm": 0.9916891182304618, "learning_rate": 8.991766955368335e-06, "loss": 0.04724765419960022, "step": 5260 }, { "epoch": 0.04955294117647059, "grad_norm": 0.9816375029117077, "learning_rate": 8.98749553478843e-06, "loss": 0.053609049320220946, "step": 5265 }, { "epoch": 0.0496, "grad_norm": 1.1362240844352822, "learning_rate": 8.98323019567734e-06, "loss": 0.06258370876312255, "step": 5270 }, { "epoch": 0.04964705882352941, "grad_norm": 0.9407248374950825, "learning_rate": 8.978970923617854e-06, "loss": 0.04961287379264832, "step": 5275 }, { "epoch": 0.049694117647058826, "grad_norm": 1.0928594026925786, "learning_rate": 8.974717704240557e-06, "loss": 0.04528616070747375, "step": 5280 }, { "epoch": 0.04974117647058823, "grad_norm": 1.2986721344864895, "learning_rate": 8.970470523223642e-06, "loss": 0.05431901216506958, "step": 5285 }, { "epoch": 0.04978823529411765, "grad_norm": 0.9749814143665109, "learning_rate": 8.9662293662927e-06, "loss": 0.05256264805793762, "step": 5290 }, { "epoch": 0.04983529411764706, "grad_norm": 0.8537521936373835, "learning_rate": 8.961994219220514e-06, "loss": 0.04916108250617981, "step": 5295 }, { "epoch": 0.04988235294117647, "grad_norm": 0.8676253600790981, "learning_rate": 8.957765067826871e-06, "loss": 0.057344257831573486, "step": 5300 }, { "epoch": 0.04992941176470588, "grad_norm": 0.955184482158896, "learning_rate": 8.953541897978349e-06, "loss": 0.05558730959892273, "step": 5305 }, { "epoch": 0.049976470588235296, "grad_norm": 0.8051477597676827, "learning_rate": 8.949324695588125e-06, "loss": 0.05644655227661133, "step": 5310 }, { "epoch": 0.0500235294117647, "grad_norm": 0.7081105935936324, "learning_rate": 8.945113446615784e-06, "loss": 0.04919471442699432, "step": 5315 }, { "epoch": 0.05007058823529412, "grad_norm": 1.032176082873376, "learning_rate": 8.940908137067108e-06, "loss": 0.05105513334274292, "step": 5320 }, { "epoch": 0.05011764705882353, "grad_norm": 0.7508292878769429, "learning_rate": 8.936708752993897e-06, "loss": 0.05345001220703125, "step": 5325 }, { "epoch": 0.050164705882352945, "grad_norm": 1.5339889813262497, "learning_rate": 8.932515280493758e-06, "loss": 0.054362809658050536, "step": 5330 }, { "epoch": 0.05021176470588235, "grad_norm": 0.8032467629630343, "learning_rate": 8.928327705709924e-06, "loss": 0.05244559645652771, "step": 5335 }, { "epoch": 0.050258823529411766, "grad_norm": 0.8799938966360781, "learning_rate": 8.924146014831061e-06, "loss": 0.05392885208129883, "step": 5340 }, { "epoch": 0.05030588235294118, "grad_norm": 0.6410570855795444, "learning_rate": 8.919970194091068e-06, "loss": 0.04568852782249451, "step": 5345 }, { "epoch": 0.050352941176470586, "grad_norm": 0.8184236551382592, "learning_rate": 8.915800229768899e-06, "loss": 0.05361774563789368, "step": 5350 }, { "epoch": 0.0504, "grad_norm": 0.7240418216126692, "learning_rate": 8.911636108188359e-06, "loss": 0.04499505758285523, "step": 5355 }, { "epoch": 0.050447058823529414, "grad_norm": 1.3419423414539686, "learning_rate": 8.907477815717927e-06, "loss": 0.04830817580223083, "step": 5360 }, { "epoch": 0.05049411764705882, "grad_norm": 0.8400923686358776, "learning_rate": 8.903325338770568e-06, "loss": 0.04725008904933929, "step": 5365 }, { "epoch": 0.050541176470588235, "grad_norm": 0.7879981324129122, "learning_rate": 8.89917866380354e-06, "loss": 0.04605258405208588, "step": 5370 }, { "epoch": 0.05058823529411765, "grad_norm": 1.0938005938363018, "learning_rate": 8.895037777318212e-06, "loss": 0.06101258993148804, "step": 5375 }, { "epoch": 0.050635294117647056, "grad_norm": 0.9750113462795236, "learning_rate": 8.890902665859885e-06, "loss": 0.05304529666900635, "step": 5380 }, { "epoch": 0.05068235294117647, "grad_norm": 0.8840475095538707, "learning_rate": 8.886773316017593e-06, "loss": 0.051707673072814944, "step": 5385 }, { "epoch": 0.050729411764705884, "grad_norm": 1.2636677700747256, "learning_rate": 8.882649714423938e-06, "loss": 0.051524817943573, "step": 5390 }, { "epoch": 0.05077647058823529, "grad_norm": 0.9317896217267109, "learning_rate": 8.878531847754902e-06, "loss": 0.06413198709487915, "step": 5395 }, { "epoch": 0.050823529411764705, "grad_norm": 0.7476104149389644, "learning_rate": 8.87441970272966e-06, "loss": 0.05303781032562256, "step": 5400 }, { "epoch": 0.05087058823529412, "grad_norm": 1.1533931854460118, "learning_rate": 8.870313266110399e-06, "loss": 0.05697816014289856, "step": 5405 }, { "epoch": 0.05091764705882353, "grad_norm": 0.9386825525421233, "learning_rate": 8.866212524702158e-06, "loss": 0.052917075157165525, "step": 5410 }, { "epoch": 0.05096470588235294, "grad_norm": 0.7795382116149762, "learning_rate": 8.862117465352633e-06, "loss": 0.04973183274269104, "step": 5415 }, { "epoch": 0.051011764705882354, "grad_norm": 1.2451746412035671, "learning_rate": 8.858028074951996e-06, "loss": 0.052921134233474734, "step": 5420 }, { "epoch": 0.05105882352941177, "grad_norm": 1.0088284258033975, "learning_rate": 8.853944340432733e-06, "loss": 0.05510483980178833, "step": 5425 }, { "epoch": 0.051105882352941175, "grad_norm": 0.8248511921715805, "learning_rate": 8.849866248769462e-06, "loss": 0.05235458016395569, "step": 5430 }, { "epoch": 0.05115294117647059, "grad_norm": 1.2061043646706848, "learning_rate": 8.845793786978755e-06, "loss": 0.047932443022727964, "step": 5435 }, { "epoch": 0.0512, "grad_norm": 1.2028203124689862, "learning_rate": 8.84172694211897e-06, "loss": 0.05447284579277038, "step": 5440 }, { "epoch": 0.05124705882352941, "grad_norm": 0.9612308835758209, "learning_rate": 8.837665701290078e-06, "loss": 0.04547366499900818, "step": 5445 }, { "epoch": 0.05129411764705882, "grad_norm": 0.9974438826576741, "learning_rate": 8.83361005163348e-06, "loss": 0.042829209566116334, "step": 5450 }, { "epoch": 0.05134117647058824, "grad_norm": 1.4747160415279634, "learning_rate": 8.829559980331861e-06, "loss": 0.060974007844924925, "step": 5455 }, { "epoch": 0.051388235294117644, "grad_norm": 1.3723066820627658, "learning_rate": 8.825515474608991e-06, "loss": 0.04418237209320068, "step": 5460 }, { "epoch": 0.05143529411764706, "grad_norm": 0.8765080774289914, "learning_rate": 8.821476521729572e-06, "loss": 0.048278403282165525, "step": 5465 }, { "epoch": 0.05148235294117647, "grad_norm": 0.8986639323771106, "learning_rate": 8.81744310899907e-06, "loss": 0.051570141315460206, "step": 5470 }, { "epoch": 0.05152941176470588, "grad_norm": 1.0237379701617866, "learning_rate": 8.81341522376354e-06, "loss": 0.05593241453170776, "step": 5475 }, { "epoch": 0.05157647058823529, "grad_norm": 1.1147006555085472, "learning_rate": 8.809392853409466e-06, "loss": 0.05405416488647461, "step": 5480 }, { "epoch": 0.05162352941176471, "grad_norm": 0.679170846427563, "learning_rate": 8.80537598536359e-06, "loss": 0.04074937105178833, "step": 5485 }, { "epoch": 0.05167058823529412, "grad_norm": 0.837994214966857, "learning_rate": 8.801364607092749e-06, "loss": 0.05515358448028564, "step": 5490 }, { "epoch": 0.05171764705882353, "grad_norm": 1.2609485801850697, "learning_rate": 8.797358706103712e-06, "loss": 0.04903667867183685, "step": 5495 }, { "epoch": 0.05176470588235294, "grad_norm": 1.0818578212074184, "learning_rate": 8.793358269943015e-06, "loss": 0.05337224006652832, "step": 5500 }, { "epoch": 0.051811764705882356, "grad_norm": 0.6201030171792075, "learning_rate": 8.7893632861968e-06, "loss": 0.05150498747825623, "step": 5505 }, { "epoch": 0.05185882352941176, "grad_norm": 0.897913269539895, "learning_rate": 8.785373742490649e-06, "loss": 0.050084471702575684, "step": 5510 }, { "epoch": 0.05190588235294118, "grad_norm": 0.772827779484793, "learning_rate": 8.781389626489432e-06, "loss": 0.06321983337402344, "step": 5515 }, { "epoch": 0.05195294117647059, "grad_norm": 0.6749894212461224, "learning_rate": 8.777410925897133e-06, "loss": 0.05589754581451416, "step": 5520 }, { "epoch": 0.052, "grad_norm": 1.3199364462196828, "learning_rate": 8.773437628456704e-06, "loss": 0.04928069114685059, "step": 5525 }, { "epoch": 0.05204705882352941, "grad_norm": 0.9041286714601304, "learning_rate": 8.769469721949901e-06, "loss": 0.05671446919441223, "step": 5530 }, { "epoch": 0.052094117647058825, "grad_norm": 0.9586132159011023, "learning_rate": 8.765507194197123e-06, "loss": 0.038549506664276124, "step": 5535 }, { "epoch": 0.05214117647058823, "grad_norm": 0.943095725680445, "learning_rate": 8.76155003305726e-06, "loss": 0.05039359331130981, "step": 5540 }, { "epoch": 0.052188235294117646, "grad_norm": 1.0371494971395705, "learning_rate": 8.757598226427536e-06, "loss": 0.04351224601268768, "step": 5545 }, { "epoch": 0.05223529411764706, "grad_norm": 1.0502927663945814, "learning_rate": 8.753651762243348e-06, "loss": 0.0606536865234375, "step": 5550 }, { "epoch": 0.05228235294117647, "grad_norm": 0.8594834719257122, "learning_rate": 8.749710628478122e-06, "loss": 0.05014607310295105, "step": 5555 }, { "epoch": 0.05232941176470588, "grad_norm": 0.8272966532305489, "learning_rate": 8.745774813143146e-06, "loss": 0.055352413654327394, "step": 5560 }, { "epoch": 0.052376470588235295, "grad_norm": 0.8227646108452631, "learning_rate": 8.741844304287432e-06, "loss": 0.04190017580986023, "step": 5565 }, { "epoch": 0.05242352941176471, "grad_norm": 1.1842916682843874, "learning_rate": 8.737919089997546e-06, "loss": 0.05067229866981506, "step": 5570 }, { "epoch": 0.052470588235294116, "grad_norm": 0.9411431722496242, "learning_rate": 8.733999158397469e-06, "loss": 0.04605227708816528, "step": 5575 }, { "epoch": 0.05251764705882353, "grad_norm": 1.1087184910504932, "learning_rate": 8.730084497648447e-06, "loss": 0.05422499179840088, "step": 5580 }, { "epoch": 0.052564705882352944, "grad_norm": 0.8360582319522784, "learning_rate": 8.72617509594883e-06, "loss": 0.05948336124420166, "step": 5585 }, { "epoch": 0.05261176470588235, "grad_norm": 0.6978950233144853, "learning_rate": 8.722270941533937e-06, "loss": 0.04081909358501434, "step": 5590 }, { "epoch": 0.052658823529411765, "grad_norm": 0.9827716368568628, "learning_rate": 8.718372022675886e-06, "loss": 0.05621950030326843, "step": 5595 }, { "epoch": 0.05270588235294118, "grad_norm": 0.8957818833370016, "learning_rate": 8.714478327683473e-06, "loss": 0.054260486364364625, "step": 5600 }, { "epoch": 0.052752941176470586, "grad_norm": 0.9430886992337426, "learning_rate": 8.710589844902005e-06, "loss": 0.05478161573410034, "step": 5605 }, { "epoch": 0.0528, "grad_norm": 0.8073626594425586, "learning_rate": 8.70670656271316e-06, "loss": 0.05106542110443115, "step": 5610 }, { "epoch": 0.052847058823529414, "grad_norm": 0.9526114831294744, "learning_rate": 8.702828469534838e-06, "loss": 0.059408044815063475, "step": 5615 }, { "epoch": 0.05289411764705882, "grad_norm": 0.891030888735009, "learning_rate": 8.698955553821023e-06, "loss": 0.05613881945610046, "step": 5620 }, { "epoch": 0.052941176470588235, "grad_norm": 1.0327009394656455, "learning_rate": 8.695087804061636e-06, "loss": 0.059497737884521486, "step": 5625 }, { "epoch": 0.05298823529411765, "grad_norm": 0.9142698303619715, "learning_rate": 8.691225208782384e-06, "loss": 0.05252029895782471, "step": 5630 }, { "epoch": 0.053035294117647055, "grad_norm": 0.9721092140426839, "learning_rate": 8.687367756544625e-06, "loss": 0.0478814572095871, "step": 5635 }, { "epoch": 0.05308235294117647, "grad_norm": 0.8640397074930004, "learning_rate": 8.683515435945224e-06, "loss": 0.05765978693962097, "step": 5640 }, { "epoch": 0.05312941176470588, "grad_norm": 0.9234419867538567, "learning_rate": 8.679668235616414e-06, "loss": 0.045256626605987546, "step": 5645 }, { "epoch": 0.0531764705882353, "grad_norm": 0.84563475254899, "learning_rate": 8.675826144225644e-06, "loss": 0.05213707685470581, "step": 5650 }, { "epoch": 0.053223529411764704, "grad_norm": 1.168614716899313, "learning_rate": 8.671989150475451e-06, "loss": 0.046965858340263365, "step": 5655 }, { "epoch": 0.05327058823529412, "grad_norm": 1.0869491645317955, "learning_rate": 8.668157243103318e-06, "loss": 0.047701025009155275, "step": 5660 }, { "epoch": 0.05331764705882353, "grad_norm": 1.0666331589236278, "learning_rate": 8.664330410881534e-06, "loss": 0.0606535017490387, "step": 5665 }, { "epoch": 0.05336470588235294, "grad_norm": 0.8279875216550493, "learning_rate": 8.660508642617051e-06, "loss": 0.04395399689674377, "step": 5670 }, { "epoch": 0.05341176470588235, "grad_norm": 0.8767543005391596, "learning_rate": 8.65669192715135e-06, "loss": 0.05035778284072876, "step": 5675 }, { "epoch": 0.05345882352941177, "grad_norm": 1.5222012683508181, "learning_rate": 8.652880253360311e-06, "loss": 0.057291197776794436, "step": 5680 }, { "epoch": 0.053505882352941174, "grad_norm": 0.8926339588580788, "learning_rate": 8.649073610154066e-06, "loss": 0.05980679988861084, "step": 5685 }, { "epoch": 0.05355294117647059, "grad_norm": 0.8072682737718888, "learning_rate": 8.64527198647687e-06, "loss": 0.06227710247039795, "step": 5690 }, { "epoch": 0.0536, "grad_norm": 1.4232836324315092, "learning_rate": 8.641475371306964e-06, "loss": 0.059049397706985474, "step": 5695 }, { "epoch": 0.05364705882352941, "grad_norm": 0.8300823553293007, "learning_rate": 8.637683753656439e-06, "loss": 0.05585963726043701, "step": 5700 }, { "epoch": 0.05369411764705882, "grad_norm": 0.8065185527660718, "learning_rate": 8.63389712257111e-06, "loss": 0.046714401245117186, "step": 5705 }, { "epoch": 0.05374117647058824, "grad_norm": 0.743206842254941, "learning_rate": 8.63011546713037e-06, "loss": 0.04675543904304504, "step": 5710 }, { "epoch": 0.053788235294117644, "grad_norm": 0.5803874328485218, "learning_rate": 8.626338776447069e-06, "loss": 0.043314167857170106, "step": 5715 }, { "epoch": 0.05383529411764706, "grad_norm": 1.0739167992163603, "learning_rate": 8.622567039667382e-06, "loss": 0.05039234161376953, "step": 5720 }, { "epoch": 0.05388235294117647, "grad_norm": 1.0629884703846533, "learning_rate": 8.618800245970671e-06, "loss": 0.049304628372192384, "step": 5725 }, { "epoch": 0.053929411764705885, "grad_norm": 1.1627960030681619, "learning_rate": 8.615038384569359e-06, "loss": 0.054071635007858276, "step": 5730 }, { "epoch": 0.05397647058823529, "grad_norm": 0.85972807925695, "learning_rate": 8.611281444708799e-06, "loss": 0.057187867164611814, "step": 5735 }, { "epoch": 0.054023529411764706, "grad_norm": 0.6900990099392259, "learning_rate": 8.607529415667146e-06, "loss": 0.04407526850700379, "step": 5740 }, { "epoch": 0.05407058823529412, "grad_norm": 0.7916571583921332, "learning_rate": 8.603782286755228e-06, "loss": 0.04663759171962738, "step": 5745 }, { "epoch": 0.05411764705882353, "grad_norm": 0.9051054346449778, "learning_rate": 8.600040047316418e-06, "loss": 0.05105094909667969, "step": 5750 }, { "epoch": 0.05416470588235294, "grad_norm": 0.9996517906985234, "learning_rate": 8.596302686726507e-06, "loss": 0.0500701904296875, "step": 5755 }, { "epoch": 0.054211764705882355, "grad_norm": 0.9120065131496978, "learning_rate": 8.592570194393576e-06, "loss": 0.05096786618232727, "step": 5760 }, { "epoch": 0.05425882352941176, "grad_norm": 0.9164825953863874, "learning_rate": 8.58884255975787e-06, "loss": 0.046885830163955686, "step": 5765 }, { "epoch": 0.054305882352941176, "grad_norm": 0.7320600700407623, "learning_rate": 8.585119772291679e-06, "loss": 0.055017507076263426, "step": 5770 }, { "epoch": 0.05435294117647059, "grad_norm": 0.907912683259948, "learning_rate": 8.581401821499202e-06, "loss": 0.05011894702911377, "step": 5775 }, { "epoch": 0.0544, "grad_norm": 1.1705824574264143, "learning_rate": 8.577688696916427e-06, "loss": 0.06781928539276123, "step": 5780 }, { "epoch": 0.05444705882352941, "grad_norm": 0.8380932616689235, "learning_rate": 8.573980388111016e-06, "loss": 0.048784464597702026, "step": 5785 }, { "epoch": 0.054494117647058825, "grad_norm": 0.9947919977924322, "learning_rate": 8.570276884682167e-06, "loss": 0.05211031436920166, "step": 5790 }, { "epoch": 0.05454117647058824, "grad_norm": 0.7566385362796085, "learning_rate": 8.566578176260504e-06, "loss": 0.045276933908462526, "step": 5795 }, { "epoch": 0.054588235294117646, "grad_norm": 0.8395984069402485, "learning_rate": 8.562884252507949e-06, "loss": 0.047098970413208006, "step": 5800 }, { "epoch": 0.05463529411764706, "grad_norm": 1.2085215394798732, "learning_rate": 8.559195103117599e-06, "loss": 0.05974348783493042, "step": 5805 }, { "epoch": 0.054682352941176474, "grad_norm": 1.0281661043579473, "learning_rate": 8.55551071781361e-06, "loss": 0.04791990220546723, "step": 5810 }, { "epoch": 0.05472941176470588, "grad_norm": 0.964899469864378, "learning_rate": 8.551831086351073e-06, "loss": 0.05207066535949707, "step": 5815 }, { "epoch": 0.054776470588235295, "grad_norm": 0.9900127395064654, "learning_rate": 8.548156198515902e-06, "loss": 0.058225595951080324, "step": 5820 }, { "epoch": 0.05482352941176471, "grad_norm": 0.9253616314587143, "learning_rate": 8.5444860441247e-06, "loss": 0.04553566575050354, "step": 5825 }, { "epoch": 0.054870588235294115, "grad_norm": 1.261596199735791, "learning_rate": 8.540820613024657e-06, "loss": 0.06569744348526001, "step": 5830 }, { "epoch": 0.05491764705882353, "grad_norm": 0.8861984666205212, "learning_rate": 8.53715989509342e-06, "loss": 0.06487873792648316, "step": 5835 }, { "epoch": 0.05496470588235294, "grad_norm": 0.8501078205475938, "learning_rate": 8.533503880238984e-06, "loss": 0.05103581547737122, "step": 5840 }, { "epoch": 0.05501176470588235, "grad_norm": 0.9576106021479504, "learning_rate": 8.529852558399565e-06, "loss": 0.050140345096588136, "step": 5845 }, { "epoch": 0.055058823529411764, "grad_norm": 1.1343765448582062, "learning_rate": 8.526205919543496e-06, "loss": 0.05175929665565491, "step": 5850 }, { "epoch": 0.05510588235294118, "grad_norm": 1.2285851204850762, "learning_rate": 8.522563953669102e-06, "loss": 0.04795929789543152, "step": 5855 }, { "epoch": 0.055152941176470585, "grad_norm": 1.0023845218574694, "learning_rate": 8.51892665080459e-06, "loss": 0.047503143548965454, "step": 5860 }, { "epoch": 0.0552, "grad_norm": 0.8728698276586965, "learning_rate": 8.51529400100793e-06, "loss": 0.04841610789299011, "step": 5865 }, { "epoch": 0.05524705882352941, "grad_norm": 2.493686308846383, "learning_rate": 8.511665994366745e-06, "loss": 0.04646971821784973, "step": 5870 }, { "epoch": 0.05529411764705883, "grad_norm": 0.8869362895804578, "learning_rate": 8.508042620998196e-06, "loss": 0.04840872287750244, "step": 5875 }, { "epoch": 0.055341176470588234, "grad_norm": 1.0686057736599877, "learning_rate": 8.504423871048863e-06, "loss": 0.05281339287757873, "step": 5880 }, { "epoch": 0.05538823529411765, "grad_norm": 0.7928733886003791, "learning_rate": 8.500809734694647e-06, "loss": 0.059424185752868654, "step": 5885 }, { "epoch": 0.05543529411764706, "grad_norm": 0.8868830745252144, "learning_rate": 8.497200202140642e-06, "loss": 0.043534868955612184, "step": 5890 }, { "epoch": 0.05548235294117647, "grad_norm": 0.9099081434852108, "learning_rate": 8.493595263621033e-06, "loss": 0.05462719202041626, "step": 5895 }, { "epoch": 0.05552941176470588, "grad_norm": 0.8018605586317176, "learning_rate": 8.489994909398982e-06, "loss": 0.05847443342208862, "step": 5900 }, { "epoch": 0.0555764705882353, "grad_norm": 0.8371058927668092, "learning_rate": 8.486399129766518e-06, "loss": 0.045787274837493896, "step": 5905 }, { "epoch": 0.055623529411764704, "grad_norm": 0.9913841733491023, "learning_rate": 8.482807915044426e-06, "loss": 0.049982213973999025, "step": 5910 }, { "epoch": 0.05567058823529412, "grad_norm": 0.8158716788249954, "learning_rate": 8.479221255582143e-06, "loss": 0.05500664710998535, "step": 5915 }, { "epoch": 0.05571764705882353, "grad_norm": 0.7813587220125158, "learning_rate": 8.475639141757637e-06, "loss": 0.049361830949783324, "step": 5920 }, { "epoch": 0.05576470588235294, "grad_norm": 0.7093847623759612, "learning_rate": 8.472061563977314e-06, "loss": 0.04965191781520843, "step": 5925 }, { "epoch": 0.05581176470588235, "grad_norm": 0.6605741172512638, "learning_rate": 8.4684885126759e-06, "loss": 0.04913351833820343, "step": 5930 }, { "epoch": 0.055858823529411766, "grad_norm": 0.858915773183397, "learning_rate": 8.464919978316332e-06, "loss": 0.053041040897369385, "step": 5935 }, { "epoch": 0.05590588235294117, "grad_norm": 0.9241772886989035, "learning_rate": 8.46135595138966e-06, "loss": 0.04592163562774658, "step": 5940 }, { "epoch": 0.05595294117647059, "grad_norm": 0.6865409063550446, "learning_rate": 8.45779642241493e-06, "loss": 0.05568005442619324, "step": 5945 }, { "epoch": 0.056, "grad_norm": 0.9753615592845369, "learning_rate": 8.454241381939096e-06, "loss": 0.05047893524169922, "step": 5950 }, { "epoch": 0.056047058823529415, "grad_norm": 0.677130862097504, "learning_rate": 8.450690820536884e-06, "loss": 0.047448340058326724, "step": 5955 }, { "epoch": 0.05609411764705882, "grad_norm": 0.6891631088419412, "learning_rate": 8.447144728810722e-06, "loss": 0.04058254361152649, "step": 5960 }, { "epoch": 0.056141176470588236, "grad_norm": 1.2022786908458507, "learning_rate": 8.443603097390608e-06, "loss": 0.060691225528717044, "step": 5965 }, { "epoch": 0.05618823529411765, "grad_norm": 1.0723757851459024, "learning_rate": 8.440065916934023e-06, "loss": 0.051495879888534546, "step": 5970 }, { "epoch": 0.05623529411764706, "grad_norm": 0.9729597554091151, "learning_rate": 8.436533178125816e-06, "loss": 0.055090171098709104, "step": 5975 }, { "epoch": 0.05628235294117647, "grad_norm": 0.8541436435213156, "learning_rate": 8.433004871678113e-06, "loss": 0.05595778226852417, "step": 5980 }, { "epoch": 0.056329411764705885, "grad_norm": 0.7566710768601963, "learning_rate": 8.429480988330203e-06, "loss": 0.04364965558052063, "step": 5985 }, { "epoch": 0.05637647058823529, "grad_norm": 0.6766541468216617, "learning_rate": 8.42596151884844e-06, "loss": 0.04749383926391602, "step": 5990 }, { "epoch": 0.056423529411764706, "grad_norm": 1.0237079948180312, "learning_rate": 8.422446454026148e-06, "loss": 0.05332149267196655, "step": 5995 }, { "epoch": 0.05647058823529412, "grad_norm": 0.8842287489964114, "learning_rate": 8.418935784683503e-06, "loss": 0.042420053482055665, "step": 6000 }, { "epoch": 0.05651764705882353, "grad_norm": 1.1332999055330126, "learning_rate": 8.415429501667457e-06, "loss": 0.05226143598556519, "step": 6005 }, { "epoch": 0.05656470588235294, "grad_norm": 1.0327554151451668, "learning_rate": 8.411927595851612e-06, "loss": 0.061706829071044925, "step": 6010 }, { "epoch": 0.056611764705882354, "grad_norm": 0.6637925578284748, "learning_rate": 8.408430058136141e-06, "loss": 0.05207816362380981, "step": 6015 }, { "epoch": 0.05665882352941176, "grad_norm": 0.8031054383964359, "learning_rate": 8.404936879447673e-06, "loss": 0.05385408401489258, "step": 6020 }, { "epoch": 0.056705882352941175, "grad_norm": 0.7805812412879618, "learning_rate": 8.401448050739207e-06, "loss": 0.05483897924423218, "step": 6025 }, { "epoch": 0.05675294117647059, "grad_norm": 0.9191053321953049, "learning_rate": 8.39796356299e-06, "loss": 0.04745014309883118, "step": 6030 }, { "epoch": 0.0568, "grad_norm": 0.8238147635273351, "learning_rate": 8.394483407205486e-06, "loss": 0.05643470883369446, "step": 6035 }, { "epoch": 0.05684705882352941, "grad_norm": 0.7807656572884372, "learning_rate": 8.39100757441716e-06, "loss": 0.050235384702682497, "step": 6040 }, { "epoch": 0.056894117647058824, "grad_norm": 1.0171854763908528, "learning_rate": 8.3875360556825e-06, "loss": 0.04613131284713745, "step": 6045 }, { "epoch": 0.05694117647058824, "grad_norm": 0.9370374025570739, "learning_rate": 8.384068842084851e-06, "loss": 0.05300304889678955, "step": 6050 }, { "epoch": 0.056988235294117645, "grad_norm": 1.0493905870602038, "learning_rate": 8.380605924733342e-06, "loss": 0.06813046336174011, "step": 6055 }, { "epoch": 0.05703529411764706, "grad_norm": 1.1599671671998604, "learning_rate": 8.37714729476279e-06, "loss": 0.050168824195861814, "step": 6060 }, { "epoch": 0.05708235294117647, "grad_norm": 1.081735832393309, "learning_rate": 8.373692943333596e-06, "loss": 0.05081000328063965, "step": 6065 }, { "epoch": 0.05712941176470588, "grad_norm": 0.7383716394243669, "learning_rate": 8.370242861631655e-06, "loss": 0.05330377817153931, "step": 6070 }, { "epoch": 0.057176470588235294, "grad_norm": 0.7043292179878256, "learning_rate": 8.366797040868267e-06, "loss": 0.050927352905273435, "step": 6075 }, { "epoch": 0.05722352941176471, "grad_norm": 0.9473540403131722, "learning_rate": 8.363355472280036e-06, "loss": 0.04967974424362183, "step": 6080 }, { "epoch": 0.057270588235294115, "grad_norm": 0.8537689606440889, "learning_rate": 8.359918147128773e-06, "loss": 0.05129367113113403, "step": 6085 }, { "epoch": 0.05731764705882353, "grad_norm": 0.786157387813875, "learning_rate": 8.356485056701415e-06, "loss": 0.047284495830535886, "step": 6090 }, { "epoch": 0.05736470588235294, "grad_norm": 0.8572362469212605, "learning_rate": 8.353056192309921e-06, "loss": 0.053248584270477295, "step": 6095 }, { "epoch": 0.05741176470588235, "grad_norm": 1.0573154847513857, "learning_rate": 8.349631545291187e-06, "loss": 0.052351081371307374, "step": 6100 }, { "epoch": 0.057458823529411764, "grad_norm": 0.8942051361922291, "learning_rate": 8.346211107006944e-06, "loss": 0.04907844960689545, "step": 6105 }, { "epoch": 0.05750588235294118, "grad_norm": 0.9109021955500486, "learning_rate": 8.342794868843684e-06, "loss": 0.04675759077072143, "step": 6110 }, { "epoch": 0.05755294117647059, "grad_norm": 1.2521567110066536, "learning_rate": 8.339382822212553e-06, "loss": 0.047817122936248777, "step": 6115 }, { "epoch": 0.0576, "grad_norm": 0.7110382959112761, "learning_rate": 8.335974958549263e-06, "loss": 0.04816058874130249, "step": 6120 }, { "epoch": 0.05764705882352941, "grad_norm": 0.8207069406194546, "learning_rate": 8.33257126931401e-06, "loss": 0.04352743625640869, "step": 6125 }, { "epoch": 0.057694117647058826, "grad_norm": 1.0319938520566418, "learning_rate": 8.32917174599137e-06, "loss": 0.04596584439277649, "step": 6130 }, { "epoch": 0.05774117647058823, "grad_norm": 0.9266997344069365, "learning_rate": 8.325776380090228e-06, "loss": 0.049817335605621335, "step": 6135 }, { "epoch": 0.05778823529411765, "grad_norm": 0.8345835637434753, "learning_rate": 8.322385163143674e-06, "loss": 0.046652215719223025, "step": 6140 }, { "epoch": 0.05783529411764706, "grad_norm": 2.001048068770478, "learning_rate": 8.318998086708919e-06, "loss": 0.045704382658004764, "step": 6145 }, { "epoch": 0.05788235294117647, "grad_norm": 1.0479628633210825, "learning_rate": 8.31561514236721e-06, "loss": 0.05514185428619385, "step": 6150 }, { "epoch": 0.05792941176470588, "grad_norm": 0.9026561899520112, "learning_rate": 8.312236321723733e-06, "loss": 0.04911520183086395, "step": 6155 }, { "epoch": 0.057976470588235296, "grad_norm": 0.9070190084147048, "learning_rate": 8.308861616407539e-06, "loss": 0.04661111533641815, "step": 6160 }, { "epoch": 0.0580235294117647, "grad_norm": 0.971069422499175, "learning_rate": 8.305491018071442e-06, "loss": 0.0522200345993042, "step": 6165 }, { "epoch": 0.05807058823529412, "grad_norm": 1.1361741300779553, "learning_rate": 8.30212451839195e-06, "loss": 0.05742270946502685, "step": 6170 }, { "epoch": 0.05811764705882353, "grad_norm": 0.7810297465904954, "learning_rate": 8.298762109069155e-06, "loss": 0.04292632043361664, "step": 6175 }, { "epoch": 0.05816470588235294, "grad_norm": 0.7870819787231187, "learning_rate": 8.295403781826672e-06, "loss": 0.04279356300830841, "step": 6180 }, { "epoch": 0.05821176470588235, "grad_norm": 1.0022953224117954, "learning_rate": 8.292049528411538e-06, "loss": 0.05106249451637268, "step": 6185 }, { "epoch": 0.058258823529411766, "grad_norm": 1.0149536643528154, "learning_rate": 8.28869934059413e-06, "loss": 0.04770632088184357, "step": 6190 }, { "epoch": 0.05830588235294118, "grad_norm": 2.3145271557087255, "learning_rate": 8.28535321016808e-06, "loss": 0.04815227091312409, "step": 6195 }, { "epoch": 0.05835294117647059, "grad_norm": 0.7408430199256983, "learning_rate": 8.282011128950193e-06, "loss": 0.04952155351638794, "step": 6200 }, { "epoch": 0.0584, "grad_norm": 0.7364149940238738, "learning_rate": 8.278673088780364e-06, "loss": 0.04813927710056305, "step": 6205 }, { "epoch": 0.058447058823529414, "grad_norm": 0.9413641698123109, "learning_rate": 8.275339081521487e-06, "loss": 0.05054261684417725, "step": 6210 }, { "epoch": 0.05849411764705882, "grad_norm": 0.870583506848652, "learning_rate": 8.27200909905938e-06, "loss": 0.04844705462455749, "step": 6215 }, { "epoch": 0.058541176470588235, "grad_norm": 0.7879441407024661, "learning_rate": 8.268683133302696e-06, "loss": 0.0492041677236557, "step": 6220 }, { "epoch": 0.05858823529411765, "grad_norm": 1.0339899396164387, "learning_rate": 8.265361176182845e-06, "loss": 0.05559816360473633, "step": 6225 }, { "epoch": 0.058635294117647056, "grad_norm": 0.7897402983168916, "learning_rate": 8.26204321965391e-06, "loss": 0.04272550344467163, "step": 6230 }, { "epoch": 0.05868235294117647, "grad_norm": 0.9784330353123941, "learning_rate": 8.258729255692562e-06, "loss": 0.051071131229400636, "step": 6235 }, { "epoch": 0.058729411764705884, "grad_norm": 0.8155111236244047, "learning_rate": 8.255419276297986e-06, "loss": 0.05380455255508423, "step": 6240 }, { "epoch": 0.05877647058823529, "grad_norm": 0.606756945460779, "learning_rate": 8.252113273491791e-06, "loss": 0.04101290106773377, "step": 6245 }, { "epoch": 0.058823529411764705, "grad_norm": 0.7190311765142511, "learning_rate": 8.248811239317936e-06, "loss": 0.05615334510803223, "step": 6250 }, { "epoch": 0.05887058823529412, "grad_norm": 0.8406632936090913, "learning_rate": 8.245513165842647e-06, "loss": 0.05502117276191711, "step": 6255 }, { "epoch": 0.058917647058823526, "grad_norm": 0.9960549792408586, "learning_rate": 8.242219045154334e-06, "loss": 0.04707815647125244, "step": 6260 }, { "epoch": 0.05896470588235294, "grad_norm": 0.9628698524224838, "learning_rate": 8.238928869363522e-06, "loss": 0.04915645718574524, "step": 6265 }, { "epoch": 0.059011764705882354, "grad_norm": 0.7290291402802391, "learning_rate": 8.235642630602752e-06, "loss": 0.050112968683242796, "step": 6270 }, { "epoch": 0.05905882352941177, "grad_norm": 1.075981972035044, "learning_rate": 8.232360321026529e-06, "loss": 0.04577712118625641, "step": 6275 }, { "epoch": 0.059105882352941175, "grad_norm": 0.6647465561072707, "learning_rate": 8.229081932811212e-06, "loss": 0.04285544157028198, "step": 6280 }, { "epoch": 0.05915294117647059, "grad_norm": 2.0263990665445375, "learning_rate": 8.225807458154964e-06, "loss": 0.04741703271865845, "step": 6285 }, { "epoch": 0.0592, "grad_norm": 0.7900682567572624, "learning_rate": 8.222536889277656e-06, "loss": 0.042841899394989016, "step": 6290 }, { "epoch": 0.05924705882352941, "grad_norm": 0.7979253813355666, "learning_rate": 8.219270218420802e-06, "loss": 0.05929479598999023, "step": 6295 }, { "epoch": 0.059294117647058824, "grad_norm": 0.9361183567726062, "learning_rate": 8.216007437847468e-06, "loss": 0.061690300703048706, "step": 6300 }, { "epoch": 0.05934117647058824, "grad_norm": 0.8786213626235809, "learning_rate": 8.212748539842202e-06, "loss": 0.04676767885684967, "step": 6305 }, { "epoch": 0.059388235294117644, "grad_norm": 1.1229842914902204, "learning_rate": 8.209493516710963e-06, "loss": 0.05632607340812683, "step": 6310 }, { "epoch": 0.05943529411764706, "grad_norm": 0.8022004436624581, "learning_rate": 8.206242360781035e-06, "loss": 0.04318778514862061, "step": 6315 }, { "epoch": 0.05948235294117647, "grad_norm": 1.0178902880328162, "learning_rate": 8.20299506440096e-06, "loss": 0.04592931866645813, "step": 6320 }, { "epoch": 0.05952941176470588, "grad_norm": 0.7384016792790267, "learning_rate": 8.19975161994045e-06, "loss": 0.04877558350563049, "step": 6325 }, { "epoch": 0.05957647058823529, "grad_norm": 0.9019370165093751, "learning_rate": 8.196512019790323e-06, "loss": 0.06127631664276123, "step": 6330 }, { "epoch": 0.05962352941176471, "grad_norm": 1.0243688591347508, "learning_rate": 8.193276256362429e-06, "loss": 0.05354619026184082, "step": 6335 }, { "epoch": 0.05967058823529412, "grad_norm": 0.8085659797394995, "learning_rate": 8.190044322089562e-06, "loss": 0.042973273992538454, "step": 6340 }, { "epoch": 0.05971764705882353, "grad_norm": 0.8164568815683131, "learning_rate": 8.1868162094254e-06, "loss": 0.04673422873020172, "step": 6345 }, { "epoch": 0.05976470588235294, "grad_norm": 0.8295195650306754, "learning_rate": 8.183591910844424e-06, "loss": 0.044617480039596556, "step": 6350 }, { "epoch": 0.059811764705882356, "grad_norm": 0.7629725406813801, "learning_rate": 8.180371418841849e-06, "loss": 0.04905260801315307, "step": 6355 }, { "epoch": 0.05985882352941176, "grad_norm": 0.8713486789581275, "learning_rate": 8.177154725933539e-06, "loss": 0.04847681522369385, "step": 6360 }, { "epoch": 0.05990588235294118, "grad_norm": 0.7605126879401918, "learning_rate": 8.173941824655952e-06, "loss": 0.04892257452011108, "step": 6365 }, { "epoch": 0.05995294117647059, "grad_norm": 0.8424397871777084, "learning_rate": 8.170732707566046e-06, "loss": 0.048999863862991336, "step": 6370 }, { "epoch": 0.06, "grad_norm": 0.763467614480443, "learning_rate": 8.167527367241229e-06, "loss": 0.04810271859169006, "step": 6375 }, { "epoch": 0.06004705882352941, "grad_norm": 0.959459328750256, "learning_rate": 8.164325796279273e-06, "loss": 0.04907872974872589, "step": 6380 }, { "epoch": 0.060094117647058826, "grad_norm": 1.051092197583926, "learning_rate": 8.161127987298243e-06, "loss": 0.06160105466842651, "step": 6385 }, { "epoch": 0.06014117647058823, "grad_norm": 0.6902743491609424, "learning_rate": 8.157933932936427e-06, "loss": 0.04432865679264068, "step": 6390 }, { "epoch": 0.06018823529411765, "grad_norm": 1.420272088771326, "learning_rate": 8.154743625852268e-06, "loss": 0.04592020511627197, "step": 6395 }, { "epoch": 0.06023529411764706, "grad_norm": 0.8296535197363074, "learning_rate": 8.151557058724288e-06, "loss": 0.04713435173034668, "step": 6400 }, { "epoch": 0.06028235294117647, "grad_norm": 0.7209744376949923, "learning_rate": 8.148374224251026e-06, "loss": 0.046643304824829104, "step": 6405 }, { "epoch": 0.06032941176470588, "grad_norm": 0.8691869178770356, "learning_rate": 8.145195115150952e-06, "loss": 0.04695774018764496, "step": 6410 }, { "epoch": 0.060376470588235295, "grad_norm": 0.8687113929997744, "learning_rate": 8.142019724162417e-06, "loss": 0.04725030064582825, "step": 6415 }, { "epoch": 0.06042352941176471, "grad_norm": 0.7505114114056418, "learning_rate": 8.138848044043567e-06, "loss": 0.045259779691696166, "step": 6420 }, { "epoch": 0.060470588235294116, "grad_norm": 0.8264244713097836, "learning_rate": 8.135680067572281e-06, "loss": 0.0513832688331604, "step": 6425 }, { "epoch": 0.06051764705882353, "grad_norm": 0.6316608308317675, "learning_rate": 8.132515787546101e-06, "loss": 0.04821864366531372, "step": 6430 }, { "epoch": 0.060564705882352944, "grad_norm": 0.9188528845288957, "learning_rate": 8.129355196782165e-06, "loss": 0.04632043242454529, "step": 6435 }, { "epoch": 0.06061176470588235, "grad_norm": 0.6437237895987963, "learning_rate": 8.126198288117132e-06, "loss": 0.042613250017166135, "step": 6440 }, { "epoch": 0.060658823529411765, "grad_norm": 0.9028947149590185, "learning_rate": 8.123045054407123e-06, "loss": 0.05213314890861511, "step": 6445 }, { "epoch": 0.06070588235294118, "grad_norm": 0.9577412257906017, "learning_rate": 8.119895488527644e-06, "loss": 0.04607585370540619, "step": 6450 }, { "epoch": 0.060752941176470586, "grad_norm": 0.7203160566102496, "learning_rate": 8.116749583373526e-06, "loss": 0.0490864098072052, "step": 6455 }, { "epoch": 0.0608, "grad_norm": 0.8157096223345541, "learning_rate": 8.113607331858854e-06, "loss": 0.04447671175003052, "step": 6460 }, { "epoch": 0.060847058823529414, "grad_norm": 0.5883408489119355, "learning_rate": 8.1104687269169e-06, "loss": 0.04774403870105744, "step": 6465 }, { "epoch": 0.06089411764705882, "grad_norm": 0.8067104906615159, "learning_rate": 8.107333761500056e-06, "loss": 0.045851320028305054, "step": 6470 }, { "epoch": 0.060941176470588235, "grad_norm": 0.9236067041239623, "learning_rate": 8.104202428579769e-06, "loss": 0.05352299809455872, "step": 6475 }, { "epoch": 0.06098823529411765, "grad_norm": 0.8169448502634551, "learning_rate": 8.101074721146472e-06, "loss": 0.04454232156276703, "step": 6480 }, { "epoch": 0.061035294117647056, "grad_norm": 0.9075550105379604, "learning_rate": 8.097950632209526e-06, "loss": 0.04269312024116516, "step": 6485 }, { "epoch": 0.06108235294117647, "grad_norm": 0.7217841930566616, "learning_rate": 8.094830154797141e-06, "loss": 0.05304018259048462, "step": 6490 }, { "epoch": 0.061129411764705884, "grad_norm": 0.7830616518187129, "learning_rate": 8.091713281956322e-06, "loss": 0.04549948871135712, "step": 6495 }, { "epoch": 0.0611764705882353, "grad_norm": 0.6963193366831701, "learning_rate": 8.088600006752803e-06, "loss": 0.04503432810306549, "step": 6500 }, { "epoch": 0.061223529411764704, "grad_norm": 0.7725412091121703, "learning_rate": 8.085490322270971e-06, "loss": 0.04464373886585236, "step": 6505 }, { "epoch": 0.06127058823529412, "grad_norm": 0.9728201586684246, "learning_rate": 8.082384221613818e-06, "loss": 0.05133069157600403, "step": 6510 }, { "epoch": 0.06131764705882353, "grad_norm": 0.7129664701117695, "learning_rate": 8.079281697902863e-06, "loss": 0.03639741241931915, "step": 6515 }, { "epoch": 0.06136470588235294, "grad_norm": 1.6030157524636697, "learning_rate": 8.076182744278098e-06, "loss": 0.04611032009124756, "step": 6520 }, { "epoch": 0.06141176470588235, "grad_norm": 0.9028438877836901, "learning_rate": 8.073087353897918e-06, "loss": 0.04849598705768585, "step": 6525 }, { "epoch": 0.06145882352941177, "grad_norm": 0.8466538178632796, "learning_rate": 8.069995519939057e-06, "loss": 0.04964205026626587, "step": 6530 }, { "epoch": 0.061505882352941174, "grad_norm": 0.8798189148446433, "learning_rate": 8.066907235596529e-06, "loss": 0.045395946502685545, "step": 6535 }, { "epoch": 0.06155294117647059, "grad_norm": 0.589078232012704, "learning_rate": 8.063822494083567e-06, "loss": 0.048678749799728395, "step": 6540 }, { "epoch": 0.0616, "grad_norm": 0.8345858410109828, "learning_rate": 8.060741288631548e-06, "loss": 0.04342857599258423, "step": 6545 }, { "epoch": 0.06164705882352941, "grad_norm": 1.0917530376658706, "learning_rate": 8.05766361248995e-06, "loss": 0.047490951418876645, "step": 6550 }, { "epoch": 0.06169411764705882, "grad_norm": 0.8750619763327243, "learning_rate": 8.05458945892627e-06, "loss": 0.051048123836517335, "step": 6555 }, { "epoch": 0.06174117647058824, "grad_norm": 1.0929788115707968, "learning_rate": 8.05151882122598e-06, "loss": 0.04916675388813019, "step": 6560 }, { "epoch": 0.061788235294117644, "grad_norm": 0.7631898707314079, "learning_rate": 8.048451692692447e-06, "loss": 0.046158325672149655, "step": 6565 }, { "epoch": 0.06183529411764706, "grad_norm": 0.7007788113717943, "learning_rate": 8.045388066646888e-06, "loss": 0.0442328155040741, "step": 6570 }, { "epoch": 0.06188235294117647, "grad_norm": 0.9435269072878384, "learning_rate": 8.0423279364283e-06, "loss": 0.039794260263442995, "step": 6575 }, { "epoch": 0.061929411764705886, "grad_norm": 0.7576096718419788, "learning_rate": 8.039271295393404e-06, "loss": 0.04627712666988373, "step": 6580 }, { "epoch": 0.06197647058823529, "grad_norm": 0.8186166484453754, "learning_rate": 8.036218136916576e-06, "loss": 0.03908923864364624, "step": 6585 }, { "epoch": 0.062023529411764707, "grad_norm": 1.0876720421447743, "learning_rate": 8.033168454389802e-06, "loss": 0.042777955532073975, "step": 6590 }, { "epoch": 0.06207058823529412, "grad_norm": 0.827544859361499, "learning_rate": 8.0301222412226e-06, "loss": 0.051509320735931396, "step": 6595 }, { "epoch": 0.06211764705882353, "grad_norm": 0.7861959898961169, "learning_rate": 8.027079490841972e-06, "loss": 0.04177828431129456, "step": 6600 }, { "epoch": 0.06216470588235294, "grad_norm": 0.6512555480336669, "learning_rate": 8.02404019669234e-06, "loss": 0.048101669549942015, "step": 6605 }, { "epoch": 0.062211764705882355, "grad_norm": 2.26392245344674, "learning_rate": 8.021004352235494e-06, "loss": 0.053478121757507324, "step": 6610 }, { "epoch": 0.06225882352941176, "grad_norm": 0.8486957520621659, "learning_rate": 8.017971950950516e-06, "loss": 0.04666596055030823, "step": 6615 }, { "epoch": 0.062305882352941176, "grad_norm": 0.679375108006696, "learning_rate": 8.01494298633374e-06, "loss": 0.05086297988891601, "step": 6620 }, { "epoch": 0.06235294117647059, "grad_norm": 1.0102513049448916, "learning_rate": 8.011917451898683e-06, "loss": 0.044361060857772826, "step": 6625 }, { "epoch": 0.0624, "grad_norm": 0.7971772745165422, "learning_rate": 8.008895341175986e-06, "loss": 0.05193319320678711, "step": 6630 }, { "epoch": 0.06244705882352941, "grad_norm": 0.7527634058540538, "learning_rate": 8.005876647713366e-06, "loss": 0.04702990055084229, "step": 6635 }, { "epoch": 0.062494117647058825, "grad_norm": 0.9268374678597557, "learning_rate": 8.002861365075541e-06, "loss": 0.045073673129081726, "step": 6640 }, { "epoch": 0.06254117647058824, "grad_norm": 0.5761443442098394, "learning_rate": 7.999849486844197e-06, "loss": 0.040089771151542664, "step": 6645 }, { "epoch": 0.06258823529411765, "grad_norm": 1.2335679546087375, "learning_rate": 7.9968410066179e-06, "loss": 0.04537184834480286, "step": 6650 }, { "epoch": 0.06263529411764705, "grad_norm": 0.8192823682933885, "learning_rate": 7.993835918012066e-06, "loss": 0.04912831783294678, "step": 6655 }, { "epoch": 0.06268235294117647, "grad_norm": 0.7335623899651175, "learning_rate": 7.990834214658885e-06, "loss": 0.0440189391374588, "step": 6660 }, { "epoch": 0.06272941176470588, "grad_norm": 0.9139803232874147, "learning_rate": 7.987835890207281e-06, "loss": 0.05228636860847473, "step": 6665 }, { "epoch": 0.0627764705882353, "grad_norm": 0.8072385137699039, "learning_rate": 7.984840938322843e-06, "loss": 0.040100932121276855, "step": 6670 }, { "epoch": 0.06282352941176471, "grad_norm": 0.837884033521222, "learning_rate": 7.981849352687766e-06, "loss": 0.048631531000137326, "step": 6675 }, { "epoch": 0.06287058823529412, "grad_norm": 0.8769463935935577, "learning_rate": 7.978861127000812e-06, "loss": 0.05318729877471924, "step": 6680 }, { "epoch": 0.06291764705882352, "grad_norm": 0.7117389878712573, "learning_rate": 7.975876254977241e-06, "loss": 0.05044156312942505, "step": 6685 }, { "epoch": 0.06296470588235294, "grad_norm": 0.9475166349498143, "learning_rate": 7.972894730348753e-06, "loss": 0.04821394085884094, "step": 6690 }, { "epoch": 0.06301176470588235, "grad_norm": 0.8348721279405442, "learning_rate": 7.969916546863444e-06, "loss": 0.04895939826965332, "step": 6695 }, { "epoch": 0.06305882352941176, "grad_norm": 0.8036219549432221, "learning_rate": 7.966941698285746e-06, "loss": 0.04526369571685791, "step": 6700 }, { "epoch": 0.06310588235294118, "grad_norm": 0.8109268253621144, "learning_rate": 7.963970178396367e-06, "loss": 0.055337274074554445, "step": 6705 }, { "epoch": 0.06315294117647059, "grad_norm": 0.8139053855153311, "learning_rate": 7.96100198099224e-06, "loss": 0.047301122546195985, "step": 6710 }, { "epoch": 0.0632, "grad_norm": 0.8160539992846447, "learning_rate": 7.958037099886472e-06, "loss": 0.051296287775039674, "step": 6715 }, { "epoch": 0.0632470588235294, "grad_norm": 1.0394355137749316, "learning_rate": 7.955075528908292e-06, "loss": 0.044166365265846254, "step": 6720 }, { "epoch": 0.06329411764705882, "grad_norm": 0.8857092601614848, "learning_rate": 7.95211726190298e-06, "loss": 0.042069971561431885, "step": 6725 }, { "epoch": 0.06334117647058823, "grad_norm": 0.7066534246409776, "learning_rate": 7.949162292731835e-06, "loss": 0.03688594698905945, "step": 6730 }, { "epoch": 0.06338823529411765, "grad_norm": 0.9702517589757147, "learning_rate": 7.94621061527211e-06, "loss": 0.05462148189544678, "step": 6735 }, { "epoch": 0.06343529411764706, "grad_norm": 0.7523433103406422, "learning_rate": 7.943262223416958e-06, "loss": 0.044864320755004884, "step": 6740 }, { "epoch": 0.06348235294117648, "grad_norm": 0.7744213460322642, "learning_rate": 7.940317111075383e-06, "loss": 0.04641602039337158, "step": 6745 }, { "epoch": 0.06352941176470588, "grad_norm": 0.9531362066099063, "learning_rate": 7.937375272172192e-06, "loss": 0.05273948311805725, "step": 6750 }, { "epoch": 0.06357647058823529, "grad_norm": 0.703871985715468, "learning_rate": 7.934436700647924e-06, "loss": 0.05591185092926025, "step": 6755 }, { "epoch": 0.0636235294117647, "grad_norm": 0.7492140593249799, "learning_rate": 7.931501390458815e-06, "loss": 0.051226407289505005, "step": 6760 }, { "epoch": 0.06367058823529412, "grad_norm": 0.7702113860372327, "learning_rate": 7.928569335576748e-06, "loss": 0.05048145055770874, "step": 6765 }, { "epoch": 0.06371764705882353, "grad_norm": 0.801201931453342, "learning_rate": 7.925640529989181e-06, "loss": 0.0452703595161438, "step": 6770 }, { "epoch": 0.06376470588235295, "grad_norm": 1.0109064876746487, "learning_rate": 7.922714967699117e-06, "loss": 0.052932453155517575, "step": 6775 }, { "epoch": 0.06381176470588236, "grad_norm": 0.8713451373715068, "learning_rate": 7.919792642725038e-06, "loss": 0.04041685461997986, "step": 6780 }, { "epoch": 0.06385882352941176, "grad_norm": 0.8158424423755769, "learning_rate": 7.916873549100858e-06, "loss": 0.04403741359710693, "step": 6785 }, { "epoch": 0.06390588235294117, "grad_norm": 0.7563415699587719, "learning_rate": 7.913957680875881e-06, "loss": 0.04720597267150879, "step": 6790 }, { "epoch": 0.06395294117647059, "grad_norm": 0.8715909642900556, "learning_rate": 7.911045032114733e-06, "loss": 0.04702601432800293, "step": 6795 }, { "epoch": 0.064, "grad_norm": 1.040153415391021, "learning_rate": 7.90813559689732e-06, "loss": 0.0555222749710083, "step": 6800 }, { "epoch": 0.06404705882352942, "grad_norm": 0.6855486287192379, "learning_rate": 7.905229369318784e-06, "loss": 0.0481214851140976, "step": 6805 }, { "epoch": 0.06409411764705883, "grad_norm": 0.7842321774894823, "learning_rate": 7.902326343489441e-06, "loss": 0.049106287956237796, "step": 6810 }, { "epoch": 0.06414117647058823, "grad_norm": 0.9311843940573644, "learning_rate": 7.899426513534736e-06, "loss": 0.04347133040428162, "step": 6815 }, { "epoch": 0.06418823529411764, "grad_norm": 1.161575172722067, "learning_rate": 7.896529873595195e-06, "loss": 0.04208458662033081, "step": 6820 }, { "epoch": 0.06423529411764706, "grad_norm": 0.7941783432254943, "learning_rate": 7.893636417826369e-06, "loss": 0.04267190098762512, "step": 6825 }, { "epoch": 0.06428235294117647, "grad_norm": 0.818236940997158, "learning_rate": 7.890746140398796e-06, "loss": 0.05091539621353149, "step": 6830 }, { "epoch": 0.06432941176470588, "grad_norm": 1.1547813683655752, "learning_rate": 7.887859035497939e-06, "loss": 0.050417423248291016, "step": 6835 }, { "epoch": 0.0643764705882353, "grad_norm": 0.8949882042609182, "learning_rate": 7.884975097324141e-06, "loss": 0.050833648443222045, "step": 6840 }, { "epoch": 0.06442352941176471, "grad_norm": 0.947752679696825, "learning_rate": 7.882094320092584e-06, "loss": 0.0631711483001709, "step": 6845 }, { "epoch": 0.06447058823529411, "grad_norm": 0.7962719797702629, "learning_rate": 7.879216698033228e-06, "loss": 0.05106891989707947, "step": 6850 }, { "epoch": 0.06451764705882353, "grad_norm": 0.5768730270971667, "learning_rate": 7.876342225390769e-06, "loss": 0.05651359558105469, "step": 6855 }, { "epoch": 0.06456470588235294, "grad_norm": 0.8198909043055324, "learning_rate": 7.87347089642459e-06, "loss": 0.0517051100730896, "step": 6860 }, { "epoch": 0.06461176470588235, "grad_norm": 0.8054941557951936, "learning_rate": 7.870602705408713e-06, "loss": 0.041665786504745485, "step": 6865 }, { "epoch": 0.06465882352941177, "grad_norm": 1.2428304638138745, "learning_rate": 7.86773764663175e-06, "loss": 0.054990077018737794, "step": 6870 }, { "epoch": 0.06470588235294118, "grad_norm": 0.8637652095847608, "learning_rate": 7.86487571439686e-06, "loss": 0.04749939441680908, "step": 6875 }, { "epoch": 0.06475294117647058, "grad_norm": 0.725358946481711, "learning_rate": 7.86201690302169e-06, "loss": 0.04918504059314728, "step": 6880 }, { "epoch": 0.0648, "grad_norm": 0.7815767775341909, "learning_rate": 7.859161206838337e-06, "loss": 0.04086994528770447, "step": 6885 }, { "epoch": 0.06484705882352941, "grad_norm": 0.7197384311902245, "learning_rate": 7.856308620193304e-06, "loss": 0.04612023830413818, "step": 6890 }, { "epoch": 0.06489411764705882, "grad_norm": 0.69512349843619, "learning_rate": 7.853459137447442e-06, "loss": 0.03932723701000214, "step": 6895 }, { "epoch": 0.06494117647058824, "grad_norm": 0.6234475127389708, "learning_rate": 7.850612752975912e-06, "loss": 0.04662355482578277, "step": 6900 }, { "epoch": 0.06498823529411765, "grad_norm": 0.9690713692457589, "learning_rate": 7.847769461168128e-06, "loss": 0.04755313098430634, "step": 6905 }, { "epoch": 0.06503529411764705, "grad_norm": 0.7981469257708849, "learning_rate": 7.844929256427732e-06, "loss": 0.04643131196498871, "step": 6910 }, { "epoch": 0.06508235294117647, "grad_norm": 0.6119859100014949, "learning_rate": 7.842092133172521e-06, "loss": 0.03911852240562439, "step": 6915 }, { "epoch": 0.06512941176470588, "grad_norm": 0.8573613624803591, "learning_rate": 7.839258085834419e-06, "loss": 0.050963950157165525, "step": 6920 }, { "epoch": 0.0651764705882353, "grad_norm": 0.8125673853286254, "learning_rate": 7.836427108859418e-06, "loss": 0.04528974294662476, "step": 6925 }, { "epoch": 0.06522352941176471, "grad_norm": 0.7458470835346394, "learning_rate": 7.833599196707555e-06, "loss": 0.052500152587890626, "step": 6930 }, { "epoch": 0.06527058823529412, "grad_norm": 0.8746713319762686, "learning_rate": 7.830774343852837e-06, "loss": 0.044158488512039185, "step": 6935 }, { "epoch": 0.06531764705882354, "grad_norm": 0.7681814432638355, "learning_rate": 7.827952544783217e-06, "loss": 0.046980041265487674, "step": 6940 }, { "epoch": 0.06536470588235294, "grad_norm": 0.7841598939579631, "learning_rate": 7.825133794000536e-06, "loss": 0.053898018598556516, "step": 6945 }, { "epoch": 0.06541176470588235, "grad_norm": 0.7017194594957101, "learning_rate": 7.822318086020492e-06, "loss": 0.04479287266731262, "step": 6950 }, { "epoch": 0.06545882352941176, "grad_norm": 0.8584931786484056, "learning_rate": 7.819505415372581e-06, "loss": 0.0459755539894104, "step": 6955 }, { "epoch": 0.06550588235294118, "grad_norm": 0.9520464048609649, "learning_rate": 7.816695776600061e-06, "loss": 0.04745836853981018, "step": 6960 }, { "epoch": 0.06555294117647059, "grad_norm": 0.7700291888383194, "learning_rate": 7.813889164259902e-06, "loss": 0.04483268857002258, "step": 6965 }, { "epoch": 0.0656, "grad_norm": 0.8319786856097361, "learning_rate": 7.811085572922748e-06, "loss": 0.04714553952217102, "step": 6970 }, { "epoch": 0.0656470588235294, "grad_norm": 0.7120620186768651, "learning_rate": 7.808284997172865e-06, "loss": 0.04796645343303681, "step": 6975 }, { "epoch": 0.06569411764705882, "grad_norm": 0.7527884694819138, "learning_rate": 7.805487431608108e-06, "loss": 0.05355405807495117, "step": 6980 }, { "epoch": 0.06574117647058823, "grad_norm": 0.824058760947598, "learning_rate": 7.802692870839862e-06, "loss": 0.048086348176002505, "step": 6985 }, { "epoch": 0.06578823529411765, "grad_norm": 0.6329186923020561, "learning_rate": 7.799901309493017e-06, "loss": 0.03868172764778137, "step": 6990 }, { "epoch": 0.06583529411764706, "grad_norm": 0.7395342497426673, "learning_rate": 7.797112742205908e-06, "loss": 0.039650171995162964, "step": 6995 }, { "epoch": 0.06588235294117648, "grad_norm": 0.9566328384313526, "learning_rate": 7.79432716363028e-06, "loss": 0.050187861919403075, "step": 7000 }, { "epoch": 0.06592941176470589, "grad_norm": 0.7018780259177543, "learning_rate": 7.791544568431241e-06, "loss": 0.046261394023895265, "step": 7005 }, { "epoch": 0.06597647058823529, "grad_norm": 0.7532652040561428, "learning_rate": 7.788764951287228e-06, "loss": 0.04164751768112183, "step": 7010 }, { "epoch": 0.0660235294117647, "grad_norm": 0.9172447559723574, "learning_rate": 7.785988306889954e-06, "loss": 0.05360597968101501, "step": 7015 }, { "epoch": 0.06607058823529412, "grad_norm": 0.7665271889849838, "learning_rate": 7.783214629944368e-06, "loss": 0.05270863175392151, "step": 7020 }, { "epoch": 0.06611764705882353, "grad_norm": 0.8050910071787595, "learning_rate": 7.780443915168613e-06, "loss": 0.04945349395275116, "step": 7025 }, { "epoch": 0.06616470588235294, "grad_norm": 0.8974700079474576, "learning_rate": 7.777676157293987e-06, "loss": 0.045093482732772826, "step": 7030 }, { "epoch": 0.06621176470588236, "grad_norm": 0.9827025656350916, "learning_rate": 7.774911351064901e-06, "loss": 0.047122296690940854, "step": 7035 }, { "epoch": 0.06625882352941176, "grad_norm": 0.7851630330937928, "learning_rate": 7.772149491238827e-06, "loss": 0.046960973739624025, "step": 7040 }, { "epoch": 0.06630588235294117, "grad_norm": 0.7682306147651133, "learning_rate": 7.76939057258627e-06, "loss": 0.04914337396621704, "step": 7045 }, { "epoch": 0.06635294117647059, "grad_norm": 1.0604869368370633, "learning_rate": 7.766634589890716e-06, "loss": 0.042911380529403687, "step": 7050 }, { "epoch": 0.0664, "grad_norm": 0.6018633326582492, "learning_rate": 7.7638815379486e-06, "loss": 0.03854638040065765, "step": 7055 }, { "epoch": 0.06644705882352941, "grad_norm": 0.8025427200625336, "learning_rate": 7.761131411569256e-06, "loss": 0.04068224430084229, "step": 7060 }, { "epoch": 0.06649411764705883, "grad_norm": 0.8038769017689485, "learning_rate": 7.758384205574877e-06, "loss": 0.040446072816848755, "step": 7065 }, { "epoch": 0.06654117647058823, "grad_norm": 0.9267871933658918, "learning_rate": 7.75563991480048e-06, "loss": 0.051042020320892334, "step": 7070 }, { "epoch": 0.06658823529411764, "grad_norm": 1.1100681342835805, "learning_rate": 7.752898534093863e-06, "loss": 0.05517172813415527, "step": 7075 }, { "epoch": 0.06663529411764706, "grad_norm": 0.7485761378919078, "learning_rate": 7.750160058315558e-06, "loss": 0.04578961431980133, "step": 7080 }, { "epoch": 0.06668235294117647, "grad_norm": 0.6423133248266676, "learning_rate": 7.7474244823388e-06, "loss": 0.04690332412719726, "step": 7085 }, { "epoch": 0.06672941176470588, "grad_norm": 0.9266563571788425, "learning_rate": 7.744691801049483e-06, "loss": 0.04214382767677307, "step": 7090 }, { "epoch": 0.0667764705882353, "grad_norm": 1.0453007546344881, "learning_rate": 7.74196200934611e-06, "loss": 0.03713680505752563, "step": 7095 }, { "epoch": 0.06682352941176471, "grad_norm": 0.8562128907116241, "learning_rate": 7.739235102139774e-06, "loss": 0.050072526931762694, "step": 7100 }, { "epoch": 0.06687058823529411, "grad_norm": 1.0050397633170276, "learning_rate": 7.7365110743541e-06, "loss": 0.04350548982620239, "step": 7105 }, { "epoch": 0.06691764705882353, "grad_norm": 0.8060109901682371, "learning_rate": 7.733789920925212e-06, "loss": 0.04305790662765503, "step": 7110 }, { "epoch": 0.06696470588235294, "grad_norm": 0.8185325772142367, "learning_rate": 7.731071636801691e-06, "loss": 0.04321301281452179, "step": 7115 }, { "epoch": 0.06701176470588235, "grad_norm": 0.7243891010176191, "learning_rate": 7.728356216944545e-06, "loss": 0.047197335958480836, "step": 7120 }, { "epoch": 0.06705882352941177, "grad_norm": 0.7185962307355666, "learning_rate": 7.725643656327156e-06, "loss": 0.04017984867095947, "step": 7125 }, { "epoch": 0.06710588235294118, "grad_norm": 0.6830797786246798, "learning_rate": 7.722933949935247e-06, "loss": 0.038732612133026124, "step": 7130 }, { "epoch": 0.06715294117647058, "grad_norm": 0.8585590573886085, "learning_rate": 7.720227092766847e-06, "loss": 0.04141038358211517, "step": 7135 }, { "epoch": 0.0672, "grad_norm": 0.730927588901938, "learning_rate": 7.717523079832245e-06, "loss": 0.043223094940185544, "step": 7140 }, { "epoch": 0.06724705882352941, "grad_norm": 0.8280125895368314, "learning_rate": 7.71482190615396e-06, "loss": 0.0437760591506958, "step": 7145 }, { "epoch": 0.06729411764705882, "grad_norm": 0.8169251531941044, "learning_rate": 7.712123566766692e-06, "loss": 0.04162079989910126, "step": 7150 }, { "epoch": 0.06734117647058824, "grad_norm": 0.8584692345033823, "learning_rate": 7.709428056717292e-06, "loss": 0.03989492654800415, "step": 7155 }, { "epoch": 0.06738823529411765, "grad_norm": 0.9096730545507387, "learning_rate": 7.70673537106472e-06, "loss": 0.04212767779827118, "step": 7160 }, { "epoch": 0.06743529411764707, "grad_norm": 0.787541569175836, "learning_rate": 7.704045504880008e-06, "loss": 0.05083588361740112, "step": 7165 }, { "epoch": 0.06748235294117647, "grad_norm": 0.6807892511499272, "learning_rate": 7.701358453246224e-06, "loss": 0.04281621277332306, "step": 7170 }, { "epoch": 0.06752941176470588, "grad_norm": 0.5922775402324579, "learning_rate": 7.698674211258432e-06, "loss": 0.049913936853408815, "step": 7175 }, { "epoch": 0.0675764705882353, "grad_norm": 2.4622189544768553, "learning_rate": 7.695992774023649e-06, "loss": 0.048590284585952756, "step": 7180 }, { "epoch": 0.06762352941176471, "grad_norm": 1.0465706185421928, "learning_rate": 7.69331413666082e-06, "loss": 0.04122371971607208, "step": 7185 }, { "epoch": 0.06767058823529412, "grad_norm": 0.758170066240648, "learning_rate": 7.690638294300772e-06, "loss": 0.03834942877292633, "step": 7190 }, { "epoch": 0.06771764705882354, "grad_norm": 0.9178340488877101, "learning_rate": 7.687965242086176e-06, "loss": 0.04116382598876953, "step": 7195 }, { "epoch": 0.06776470588235294, "grad_norm": 0.987745457965624, "learning_rate": 7.685294975171518e-06, "loss": 0.05004991888999939, "step": 7200 }, { "epoch": 0.06781176470588235, "grad_norm": 0.8968924228824131, "learning_rate": 7.682627488723054e-06, "loss": 0.04677339792251587, "step": 7205 }, { "epoch": 0.06785882352941176, "grad_norm": 0.9186516990813579, "learning_rate": 7.679962777918769e-06, "loss": 0.03873664140701294, "step": 7210 }, { "epoch": 0.06790588235294118, "grad_norm": 0.890348452631425, "learning_rate": 7.677300837948363e-06, "loss": 0.0493941068649292, "step": 7215 }, { "epoch": 0.06795294117647059, "grad_norm": 0.7376172348311963, "learning_rate": 7.674641664013185e-06, "loss": 0.04503190517425537, "step": 7220 }, { "epoch": 0.068, "grad_norm": 0.745661146619638, "learning_rate": 7.671985251326218e-06, "loss": 0.035159599781036374, "step": 7225 }, { "epoch": 0.0680470588235294, "grad_norm": 0.9453424166939002, "learning_rate": 7.669331595112031e-06, "loss": 0.050525987148284913, "step": 7230 }, { "epoch": 0.06809411764705882, "grad_norm": 0.6892190540663172, "learning_rate": 7.666680690606752e-06, "loss": 0.041788288950920106, "step": 7235 }, { "epoch": 0.06814117647058823, "grad_norm": 0.6421323261553946, "learning_rate": 7.664032533058024e-06, "loss": 0.04500813186168671, "step": 7240 }, { "epoch": 0.06818823529411765, "grad_norm": 0.9736233657707466, "learning_rate": 7.661387117724974e-06, "loss": 0.05108722448348999, "step": 7245 }, { "epoch": 0.06823529411764706, "grad_norm": 0.7790248093878458, "learning_rate": 7.658744439878176e-06, "loss": 0.040089654922485354, "step": 7250 }, { "epoch": 0.06828235294117647, "grad_norm": 0.7037896631289814, "learning_rate": 7.656104494799616e-06, "loss": 0.04554274082183838, "step": 7255 }, { "epoch": 0.06832941176470589, "grad_norm": 0.8887278215741929, "learning_rate": 7.653467277782654e-06, "loss": 0.038513433933258054, "step": 7260 }, { "epoch": 0.06837647058823529, "grad_norm": 0.8774816951037578, "learning_rate": 7.650832784131993e-06, "loss": 0.049951159954071046, "step": 7265 }, { "epoch": 0.0684235294117647, "grad_norm": 0.7663623166834392, "learning_rate": 7.648201009163641e-06, "loss": 0.04949943423271179, "step": 7270 }, { "epoch": 0.06847058823529412, "grad_norm": 0.8014463849376364, "learning_rate": 7.645571948204879e-06, "loss": 0.04598194062709808, "step": 7275 }, { "epoch": 0.06851764705882353, "grad_norm": 0.6655150887431591, "learning_rate": 7.64294559659422e-06, "loss": 0.04181466996669769, "step": 7280 }, { "epoch": 0.06856470588235294, "grad_norm": 0.8407981466474493, "learning_rate": 7.640321949681382e-06, "loss": 0.04469131827354431, "step": 7285 }, { "epoch": 0.06861176470588236, "grad_norm": 1.0153587932462487, "learning_rate": 7.637701002827244e-06, "loss": 0.04255268573760986, "step": 7290 }, { "epoch": 0.06865882352941176, "grad_norm": 0.9520676028846596, "learning_rate": 7.635082751403825e-06, "loss": 0.047272291779518125, "step": 7295 }, { "epoch": 0.06870588235294117, "grad_norm": 0.8965676621381921, "learning_rate": 7.632467190794239e-06, "loss": 0.044813477993011476, "step": 7300 }, { "epoch": 0.06875294117647059, "grad_norm": 0.8955906136461773, "learning_rate": 7.62985431639266e-06, "loss": 0.04338140785694122, "step": 7305 }, { "epoch": 0.0688, "grad_norm": 0.930379637251282, "learning_rate": 7.627244123604294e-06, "loss": 0.04786311984062195, "step": 7310 }, { "epoch": 0.06884705882352941, "grad_norm": 0.8491158088558596, "learning_rate": 7.624636607845346e-06, "loss": 0.04048349261283875, "step": 7315 }, { "epoch": 0.06889411764705883, "grad_norm": 1.2105521406354156, "learning_rate": 7.622031764542978e-06, "loss": 0.05329177379608154, "step": 7320 }, { "epoch": 0.06894117647058824, "grad_norm": 0.9563604296680449, "learning_rate": 7.619429589135285e-06, "loss": 0.04632124900817871, "step": 7325 }, { "epoch": 0.06898823529411764, "grad_norm": 0.7722566442520984, "learning_rate": 7.616830077071249e-06, "loss": 0.04385439157485962, "step": 7330 }, { "epoch": 0.06903529411764706, "grad_norm": 0.7380388056393932, "learning_rate": 7.614233223810723e-06, "loss": 0.04833526015281677, "step": 7335 }, { "epoch": 0.06908235294117647, "grad_norm": 0.8751662205482666, "learning_rate": 7.611639024824382e-06, "loss": 0.04502001702785492, "step": 7340 }, { "epoch": 0.06912941176470588, "grad_norm": 0.6950913258210802, "learning_rate": 7.6090474755936946e-06, "loss": 0.04136750102043152, "step": 7345 }, { "epoch": 0.0691764705882353, "grad_norm": 1.0040410629920409, "learning_rate": 7.606458571610898e-06, "loss": 0.04214646816253662, "step": 7350 }, { "epoch": 0.06922352941176471, "grad_norm": 0.8066929674329698, "learning_rate": 7.6038723083789465e-06, "loss": 0.04415796995162964, "step": 7355 }, { "epoch": 0.06927058823529411, "grad_norm": 0.8458154511062774, "learning_rate": 7.601288681411505e-06, "loss": 0.042672178149223326, "step": 7360 }, { "epoch": 0.06931764705882353, "grad_norm": 0.7715976245441879, "learning_rate": 7.598707686232889e-06, "loss": 0.0368788480758667, "step": 7365 }, { "epoch": 0.06936470588235294, "grad_norm": 0.8346668050441985, "learning_rate": 7.596129318378052e-06, "loss": 0.042068445682525636, "step": 7370 }, { "epoch": 0.06941176470588235, "grad_norm": 0.983556609961905, "learning_rate": 7.593553573392543e-06, "loss": 0.04623185992240906, "step": 7375 }, { "epoch": 0.06945882352941177, "grad_norm": 0.674650880599421, "learning_rate": 7.590980446832477e-06, "loss": 0.045015543699264526, "step": 7380 }, { "epoch": 0.06950588235294118, "grad_norm": 0.8846372454651041, "learning_rate": 7.588409934264504e-06, "loss": 0.043837904930114746, "step": 7385 }, { "epoch": 0.0695529411764706, "grad_norm": 0.7611601705003942, "learning_rate": 7.585842031265775e-06, "loss": 0.04585274457931519, "step": 7390 }, { "epoch": 0.0696, "grad_norm": 0.812471765279978, "learning_rate": 7.58327673342391e-06, "loss": 0.041480553150177, "step": 7395 }, { "epoch": 0.06964705882352941, "grad_norm": 1.0320759383389713, "learning_rate": 7.580714036336968e-06, "loss": 0.04638607501983642, "step": 7400 }, { "epoch": 0.06969411764705882, "grad_norm": 1.285473649281198, "learning_rate": 7.578153935613414e-06, "loss": 0.04548290967941284, "step": 7405 }, { "epoch": 0.06974117647058824, "grad_norm": 0.6829967694694967, "learning_rate": 7.575596426872085e-06, "loss": 0.04332936406135559, "step": 7410 }, { "epoch": 0.06978823529411765, "grad_norm": 0.8133857091432077, "learning_rate": 7.573041505742165e-06, "loss": 0.04117406606674194, "step": 7415 }, { "epoch": 0.06983529411764706, "grad_norm": 0.7751780805245035, "learning_rate": 7.570489167863148e-06, "loss": 0.03464306592941284, "step": 7420 }, { "epoch": 0.06988235294117646, "grad_norm": 1.0386318853376015, "learning_rate": 7.567939408884808e-06, "loss": 0.04228093028068543, "step": 7425 }, { "epoch": 0.06992941176470588, "grad_norm": 0.8509288269035986, "learning_rate": 7.5653922244671685e-06, "loss": 0.044032835960388185, "step": 7430 }, { "epoch": 0.06997647058823529, "grad_norm": 0.6962555671768075, "learning_rate": 7.562847610280468e-06, "loss": 0.04071049094200134, "step": 7435 }, { "epoch": 0.0700235294117647, "grad_norm": 0.6310853496880509, "learning_rate": 7.560305562005142e-06, "loss": 0.03976244628429413, "step": 7440 }, { "epoch": 0.07007058823529412, "grad_norm": 0.6800137946019099, "learning_rate": 7.557766075331771e-06, "loss": 0.03413670659065247, "step": 7445 }, { "epoch": 0.07011764705882353, "grad_norm": 0.7003873613812562, "learning_rate": 7.5552291459610676e-06, "loss": 0.03671037256717682, "step": 7450 }, { "epoch": 0.07016470588235293, "grad_norm": 1.4085645396336783, "learning_rate": 7.5526947696038385e-06, "loss": 0.04544505178928375, "step": 7455 }, { "epoch": 0.07021176470588235, "grad_norm": 0.8476382863597874, "learning_rate": 7.550162941980956e-06, "loss": 0.046199989318847653, "step": 7460 }, { "epoch": 0.07025882352941176, "grad_norm": 0.6971165004871703, "learning_rate": 7.5476336588233265e-06, "loss": 0.04062325358390808, "step": 7465 }, { "epoch": 0.07030588235294118, "grad_norm": 0.6904304277469994, "learning_rate": 7.545106915871859e-06, "loss": 0.04174319207668305, "step": 7470 }, { "epoch": 0.07035294117647059, "grad_norm": 1.5897134955344328, "learning_rate": 7.542582708877436e-06, "loss": 0.0442396342754364, "step": 7475 }, { "epoch": 0.0704, "grad_norm": 1.1174881408088622, "learning_rate": 7.5400610336008915e-06, "loss": 0.043513703346252444, "step": 7480 }, { "epoch": 0.07044705882352942, "grad_norm": 0.8972448697432823, "learning_rate": 7.537541885812962e-06, "loss": 0.03900115489959717, "step": 7485 }, { "epoch": 0.07049411764705882, "grad_norm": 1.023594197727412, "learning_rate": 7.535025261294279e-06, "loss": 0.048474347591400145, "step": 7490 }, { "epoch": 0.07054117647058823, "grad_norm": 0.8304450408522885, "learning_rate": 7.53251115583532e-06, "loss": 0.04313503503799439, "step": 7495 }, { "epoch": 0.07058823529411765, "grad_norm": 0.7075218461544961, "learning_rate": 7.5299995652363975e-06, "loss": 0.0455164909362793, "step": 7500 }, { "epoch": 0.07063529411764706, "grad_norm": 1.056295629054905, "learning_rate": 7.527490485307611e-06, "loss": 0.042007225751876834, "step": 7505 }, { "epoch": 0.07068235294117647, "grad_norm": 0.8602946935261953, "learning_rate": 7.5249839118688285e-06, "loss": 0.043867054581642154, "step": 7510 }, { "epoch": 0.07072941176470589, "grad_norm": 0.8573724827135702, "learning_rate": 7.522479840749656e-06, "loss": 0.04459763765335083, "step": 7515 }, { "epoch": 0.07077647058823529, "grad_norm": 0.7175693245642404, "learning_rate": 7.519978267789409e-06, "loss": 0.04681876599788666, "step": 7520 }, { "epoch": 0.0708235294117647, "grad_norm": 0.7081630269554474, "learning_rate": 7.517479188837081e-06, "loss": 0.03994632661342621, "step": 7525 }, { "epoch": 0.07087058823529412, "grad_norm": 0.9192038833405124, "learning_rate": 7.514982599751314e-06, "loss": 0.04014921188354492, "step": 7530 }, { "epoch": 0.07091764705882353, "grad_norm": 0.6746236933050357, "learning_rate": 7.5124884964003716e-06, "loss": 0.0425815224647522, "step": 7535 }, { "epoch": 0.07096470588235294, "grad_norm": 0.8354961838532864, "learning_rate": 7.509996874662111e-06, "loss": 0.03858628273010254, "step": 7540 }, { "epoch": 0.07101176470588236, "grad_norm": 0.7060214218919527, "learning_rate": 7.507507730423952e-06, "loss": 0.04021809101104736, "step": 7545 }, { "epoch": 0.07105882352941177, "grad_norm": 1.1289545338251925, "learning_rate": 7.505021059582851e-06, "loss": 0.049331533908844, "step": 7550 }, { "epoch": 0.07110588235294117, "grad_norm": 0.9132682983215017, "learning_rate": 7.50253685804527e-06, "loss": 0.0634386420249939, "step": 7555 }, { "epoch": 0.07115294117647059, "grad_norm": 0.6892277719821657, "learning_rate": 7.500055121727149e-06, "loss": 0.03451612591743469, "step": 7560 }, { "epoch": 0.0712, "grad_norm": 0.8593082994764869, "learning_rate": 7.497575846553881e-06, "loss": 0.04625406265258789, "step": 7565 }, { "epoch": 0.07124705882352941, "grad_norm": 0.9690864555658798, "learning_rate": 7.49509902846028e-06, "loss": 0.04605524241924286, "step": 7570 }, { "epoch": 0.07129411764705883, "grad_norm": 0.7646082025386621, "learning_rate": 7.492624663390552e-06, "loss": 0.04004932045936584, "step": 7575 }, { "epoch": 0.07134117647058824, "grad_norm": 0.9412975212104391, "learning_rate": 7.490152747298274e-06, "loss": 0.047846543788909915, "step": 7580 }, { "epoch": 0.07138823529411764, "grad_norm": 0.9555888511022933, "learning_rate": 7.487683276146358e-06, "loss": 0.04371194839477539, "step": 7585 }, { "epoch": 0.07143529411764706, "grad_norm": 0.8980305715922163, "learning_rate": 7.485216245907029e-06, "loss": 0.04429514408111572, "step": 7590 }, { "epoch": 0.07148235294117647, "grad_norm": 0.880884030390341, "learning_rate": 7.482751652561794e-06, "loss": 0.03864992260932922, "step": 7595 }, { "epoch": 0.07152941176470588, "grad_norm": 0.7834687398972128, "learning_rate": 7.480289492101416e-06, "loss": 0.04630504250526428, "step": 7600 }, { "epoch": 0.0715764705882353, "grad_norm": 0.8339044546484092, "learning_rate": 7.4778297605258865e-06, "loss": 0.042115846276283266, "step": 7605 }, { "epoch": 0.07162352941176471, "grad_norm": 0.7136555003922018, "learning_rate": 7.475372453844398e-06, "loss": 0.04286922812461853, "step": 7610 }, { "epoch": 0.07167058823529411, "grad_norm": 0.717145127680025, "learning_rate": 7.472917568075321e-06, "loss": 0.04502480030059815, "step": 7615 }, { "epoch": 0.07171764705882352, "grad_norm": 0.9235766559101491, "learning_rate": 7.470465099246164e-06, "loss": 0.0400701105594635, "step": 7620 }, { "epoch": 0.07176470588235294, "grad_norm": 0.9581691807001581, "learning_rate": 7.468015043393564e-06, "loss": 0.04599066972732544, "step": 7625 }, { "epoch": 0.07181176470588235, "grad_norm": 1.0644742589789875, "learning_rate": 7.465567396563247e-06, "loss": 0.045197689533233644, "step": 7630 }, { "epoch": 0.07185882352941177, "grad_norm": 0.7231499450818589, "learning_rate": 7.463122154810006e-06, "loss": 0.04093585610389709, "step": 7635 }, { "epoch": 0.07190588235294118, "grad_norm": 0.9265450120067261, "learning_rate": 7.460679314197672e-06, "loss": 0.04083575904369354, "step": 7640 }, { "epoch": 0.0719529411764706, "grad_norm": 0.980503167313605, "learning_rate": 7.458238870799089e-06, "loss": 0.043649935722351076, "step": 7645 }, { "epoch": 0.072, "grad_norm": 0.723455234923159, "learning_rate": 7.455800820696094e-06, "loss": 0.04171210527420044, "step": 7650 }, { "epoch": 0.07204705882352941, "grad_norm": 0.9872206740353697, "learning_rate": 7.453365159979473e-06, "loss": 0.04788214266300202, "step": 7655 }, { "epoch": 0.07209411764705882, "grad_norm": 0.5777919798994127, "learning_rate": 7.450931884748952e-06, "loss": 0.04784711003303528, "step": 7660 }, { "epoch": 0.07214117647058824, "grad_norm": 0.7361320379779838, "learning_rate": 7.448500991113164e-06, "loss": 0.04179099202156067, "step": 7665 }, { "epoch": 0.07218823529411765, "grad_norm": 0.8515110361579976, "learning_rate": 7.446072475189623e-06, "loss": 0.04504277110099793, "step": 7670 }, { "epoch": 0.07223529411764706, "grad_norm": 0.762777564381799, "learning_rate": 7.443646333104696e-06, "loss": 0.040731889009475705, "step": 7675 }, { "epoch": 0.07228235294117646, "grad_norm": 0.6732778901086581, "learning_rate": 7.441222560993582e-06, "loss": 0.035531526803970336, "step": 7680 }, { "epoch": 0.07232941176470588, "grad_norm": 1.0185552953938661, "learning_rate": 7.438801155000283e-06, "loss": 0.046747487783432004, "step": 7685 }, { "epoch": 0.07237647058823529, "grad_norm": 0.7804943900086404, "learning_rate": 7.436382111277576e-06, "loss": 0.05332390069961548, "step": 7690 }, { "epoch": 0.0724235294117647, "grad_norm": 2.351673842097977, "learning_rate": 7.433965425986992e-06, "loss": 0.05169922113418579, "step": 7695 }, { "epoch": 0.07247058823529412, "grad_norm": 1.0051667786403502, "learning_rate": 7.431551095298789e-06, "loss": 0.040037918090820315, "step": 7700 }, { "epoch": 0.07251764705882353, "grad_norm": 0.8714745510002158, "learning_rate": 7.429139115391926e-06, "loss": 0.04124288260936737, "step": 7705 }, { "epoch": 0.07256470588235295, "grad_norm": 0.6565586012318185, "learning_rate": 7.426729482454034e-06, "loss": 0.04609893560409546, "step": 7710 }, { "epoch": 0.07261176470588235, "grad_norm": 0.8621858462684041, "learning_rate": 7.424322192681398e-06, "loss": 0.04439888000488281, "step": 7715 }, { "epoch": 0.07265882352941176, "grad_norm": 0.757469633482959, "learning_rate": 7.421917242278927e-06, "loss": 0.0405612051486969, "step": 7720 }, { "epoch": 0.07270588235294118, "grad_norm": 0.9334062045307175, "learning_rate": 7.4195146274601274e-06, "loss": 0.043050524592399594, "step": 7725 }, { "epoch": 0.07275294117647059, "grad_norm": 0.8722283571909698, "learning_rate": 7.417114344447083e-06, "loss": 0.0413454532623291, "step": 7730 }, { "epoch": 0.0728, "grad_norm": 0.5834854980147899, "learning_rate": 7.414716389470428e-06, "loss": 0.0400424599647522, "step": 7735 }, { "epoch": 0.07284705882352942, "grad_norm": 0.6988716230718698, "learning_rate": 7.412320758769318e-06, "loss": 0.03833651542663574, "step": 7740 }, { "epoch": 0.07289411764705882, "grad_norm": 2.110121283145734, "learning_rate": 7.409927448591413e-06, "loss": 0.03443239331245422, "step": 7745 }, { "epoch": 0.07294117647058823, "grad_norm": 0.8729713666216514, "learning_rate": 7.407536455192845e-06, "loss": 0.051136314868927, "step": 7750 }, { "epoch": 0.07298823529411765, "grad_norm": 0.5638434965644747, "learning_rate": 7.4051477748382e-06, "loss": 0.045931801199913025, "step": 7755 }, { "epoch": 0.07303529411764706, "grad_norm": 0.7893280468091048, "learning_rate": 7.402761403800487e-06, "loss": 0.03959587216377258, "step": 7760 }, { "epoch": 0.07308235294117647, "grad_norm": 0.8346902246059742, "learning_rate": 7.400377338361119e-06, "loss": 0.04093787670135498, "step": 7765 }, { "epoch": 0.07312941176470589, "grad_norm": 0.9232031325360439, "learning_rate": 7.397995574809888e-06, "loss": 0.042835193872451785, "step": 7770 }, { "epoch": 0.07317647058823529, "grad_norm": 0.8148185552679875, "learning_rate": 7.395616109444936e-06, "loss": 0.0417802095413208, "step": 7775 }, { "epoch": 0.0732235294117647, "grad_norm": 0.6864712017477943, "learning_rate": 7.393238938572737e-06, "loss": 0.04839060306549072, "step": 7780 }, { "epoch": 0.07327058823529412, "grad_norm": 0.979493808494642, "learning_rate": 7.390864058508068e-06, "loss": 0.04435421228408813, "step": 7785 }, { "epoch": 0.07331764705882353, "grad_norm": 2.7862801813823035, "learning_rate": 7.3884914655739905e-06, "loss": 0.04375256896018982, "step": 7790 }, { "epoch": 0.07336470588235294, "grad_norm": 0.9067484404227062, "learning_rate": 7.386121156101817e-06, "loss": 0.041307687759399414, "step": 7795 }, { "epoch": 0.07341176470588236, "grad_norm": 0.7947867373888133, "learning_rate": 7.3837531264311e-06, "loss": 0.04289669394493103, "step": 7800 }, { "epoch": 0.07345882352941177, "grad_norm": 0.7700695581957295, "learning_rate": 7.381387372909599e-06, "loss": 0.04331514835357666, "step": 7805 }, { "epoch": 0.07350588235294117, "grad_norm": 0.833070329805482, "learning_rate": 7.379023891893258e-06, "loss": 0.039678958058357236, "step": 7810 }, { "epoch": 0.07355294117647058, "grad_norm": 0.9897119940862544, "learning_rate": 7.376662679746186e-06, "loss": 0.05005145072937012, "step": 7815 }, { "epoch": 0.0736, "grad_norm": 0.6763422669380925, "learning_rate": 7.37430373284063e-06, "loss": 0.044966450333595274, "step": 7820 }, { "epoch": 0.07364705882352941, "grad_norm": 0.5345439484480099, "learning_rate": 7.371947047556951e-06, "loss": 0.041560354828834536, "step": 7825 }, { "epoch": 0.07369411764705883, "grad_norm": 0.9465503955534804, "learning_rate": 7.369592620283604e-06, "loss": 0.040966248512268065, "step": 7830 }, { "epoch": 0.07374117647058824, "grad_norm": 0.9343169524660867, "learning_rate": 7.367240447417116e-06, "loss": 0.041668424010276796, "step": 7835 }, { "epoch": 0.07378823529411764, "grad_norm": 0.6660277910631572, "learning_rate": 7.36489052536205e-06, "loss": 0.03654949069023132, "step": 7840 }, { "epoch": 0.07383529411764705, "grad_norm": 0.8040095382111648, "learning_rate": 7.362542850531e-06, "loss": 0.047833177447319034, "step": 7845 }, { "epoch": 0.07388235294117647, "grad_norm": 0.8231644237797044, "learning_rate": 7.3601974193445615e-06, "loss": 0.043325701355934144, "step": 7850 }, { "epoch": 0.07392941176470588, "grad_norm": 0.7013382559168615, "learning_rate": 7.357854228231299e-06, "loss": 0.04134718775749206, "step": 7855 }, { "epoch": 0.0739764705882353, "grad_norm": 0.8600479189772752, "learning_rate": 7.355513273627736e-06, "loss": 0.03952181339263916, "step": 7860 }, { "epoch": 0.07402352941176471, "grad_norm": 1.0157195933180485, "learning_rate": 7.353174551978326e-06, "loss": 0.04343522787094116, "step": 7865 }, { "epoch": 0.07407058823529412, "grad_norm": 0.8707007707271405, "learning_rate": 7.350838059735431e-06, "loss": 0.04599972665309906, "step": 7870 }, { "epoch": 0.07411764705882352, "grad_norm": 0.7191326544458396, "learning_rate": 7.348503793359298e-06, "loss": 0.045662683248519895, "step": 7875 }, { "epoch": 0.07416470588235294, "grad_norm": 0.8672235255318338, "learning_rate": 7.346171749318041e-06, "loss": 0.03881556987762451, "step": 7880 }, { "epoch": 0.07421176470588235, "grad_norm": 0.8842691017316899, "learning_rate": 7.34384192408761e-06, "loss": 0.04342695474624634, "step": 7885 }, { "epoch": 0.07425882352941177, "grad_norm": 0.8156229449637028, "learning_rate": 7.341514314151776e-06, "loss": 0.03830942809581757, "step": 7890 }, { "epoch": 0.07430588235294118, "grad_norm": 0.9893105134274784, "learning_rate": 7.339188916002108e-06, "loss": 0.04146823585033417, "step": 7895 }, { "epoch": 0.0743529411764706, "grad_norm": 0.861228970851014, "learning_rate": 7.336865726137943e-06, "loss": 0.0470429003238678, "step": 7900 }, { "epoch": 0.0744, "grad_norm": 0.6131142695233257, "learning_rate": 7.334544741066377e-06, "loss": 0.03548722267150879, "step": 7905 }, { "epoch": 0.07444705882352941, "grad_norm": 0.8685417662704223, "learning_rate": 7.332225957302235e-06, "loss": 0.04694281220436096, "step": 7910 }, { "epoch": 0.07449411764705882, "grad_norm": 0.9344115872260766, "learning_rate": 7.329909371368043e-06, "loss": 0.04787932336330414, "step": 7915 }, { "epoch": 0.07454117647058824, "grad_norm": 0.666287926931086, "learning_rate": 7.32759497979402e-06, "loss": 0.04231418371200561, "step": 7920 }, { "epoch": 0.07458823529411765, "grad_norm": 0.8549880258933168, "learning_rate": 7.325282779118047e-06, "loss": 0.039889085292816165, "step": 7925 }, { "epoch": 0.07463529411764706, "grad_norm": 0.9965368617867786, "learning_rate": 7.322972765885648e-06, "loss": 0.03972766697406769, "step": 7930 }, { "epoch": 0.07468235294117648, "grad_norm": 0.9097420529623422, "learning_rate": 7.320664936649967e-06, "loss": 0.04402828812599182, "step": 7935 }, { "epoch": 0.07472941176470588, "grad_norm": 0.8278979460601418, "learning_rate": 7.318359287971746e-06, "loss": 0.03863018751144409, "step": 7940 }, { "epoch": 0.07477647058823529, "grad_norm": 0.8409802286758961, "learning_rate": 7.316055816419305e-06, "loss": 0.042000961303710935, "step": 7945 }, { "epoch": 0.0748235294117647, "grad_norm": 0.8656583328699708, "learning_rate": 7.313754518568524e-06, "loss": 0.043586736917495726, "step": 7950 }, { "epoch": 0.07487058823529412, "grad_norm": 0.6689942370688281, "learning_rate": 7.311455391002813e-06, "loss": 0.04699338674545288, "step": 7955 }, { "epoch": 0.07491764705882353, "grad_norm": 0.6813524479197506, "learning_rate": 7.309158430313097e-06, "loss": 0.04446381330490112, "step": 7960 }, { "epoch": 0.07496470588235295, "grad_norm": 0.6536020649061466, "learning_rate": 7.306863633097795e-06, "loss": 0.04289618134498596, "step": 7965 }, { "epoch": 0.07501176470588235, "grad_norm": 1.0458982893267004, "learning_rate": 7.304570995962793e-06, "loss": 0.05195906162261963, "step": 7970 }, { "epoch": 0.07505882352941176, "grad_norm": 0.6881256825435238, "learning_rate": 7.302280515521432e-06, "loss": 0.03777743875980377, "step": 7975 }, { "epoch": 0.07510588235294118, "grad_norm": 0.9429640389582998, "learning_rate": 7.299992188394477e-06, "loss": 0.044588404893875125, "step": 7980 }, { "epoch": 0.07515294117647059, "grad_norm": 0.7180322044484185, "learning_rate": 7.297706011210106e-06, "loss": 0.03614823520183563, "step": 7985 }, { "epoch": 0.0752, "grad_norm": 0.7803515022752, "learning_rate": 7.2954219806038805e-06, "loss": 0.04165690541267395, "step": 7990 }, { "epoch": 0.07524705882352942, "grad_norm": 0.7210204646076973, "learning_rate": 7.29314009321873e-06, "loss": 0.0350339949131012, "step": 7995 }, { "epoch": 0.07529411764705882, "grad_norm": 0.6550214537115661, "learning_rate": 7.2908603457049275e-06, "loss": 0.033763465285301206, "step": 8000 }, { "epoch": 0.07534117647058823, "grad_norm": 0.8887348818841523, "learning_rate": 7.288582734720076e-06, "loss": 0.04631681442260742, "step": 8005 }, { "epoch": 0.07538823529411764, "grad_norm": 0.9888795314037692, "learning_rate": 7.2863072569290775e-06, "loss": 0.04193318486213684, "step": 8010 }, { "epoch": 0.07543529411764706, "grad_norm": 0.7993810314983075, "learning_rate": 7.28403390900412e-06, "loss": 0.037175697088241574, "step": 8015 }, { "epoch": 0.07548235294117647, "grad_norm": 0.6032131091482987, "learning_rate": 7.281762687624657e-06, "loss": 0.04407871663570404, "step": 8020 }, { "epoch": 0.07552941176470589, "grad_norm": 0.7464741185689534, "learning_rate": 7.27949358947738e-06, "loss": 0.041248619556427, "step": 8025 }, { "epoch": 0.0755764705882353, "grad_norm": 0.8837617943703194, "learning_rate": 7.277226611256209e-06, "loss": 0.048611363768577574, "step": 8030 }, { "epoch": 0.0756235294117647, "grad_norm": 0.9947594523415992, "learning_rate": 7.274961749662265e-06, "loss": 0.051585066318511966, "step": 8035 }, { "epoch": 0.07567058823529411, "grad_norm": 0.6007287950040552, "learning_rate": 7.272699001403848e-06, "loss": 0.04429323673248291, "step": 8040 }, { "epoch": 0.07571764705882353, "grad_norm": 0.7777643824163886, "learning_rate": 7.270438363196425e-06, "loss": 0.05204473733901978, "step": 8045 }, { "epoch": 0.07576470588235294, "grad_norm": 0.7981523291435701, "learning_rate": 7.268179831762599e-06, "loss": 0.04339389801025391, "step": 8050 }, { "epoch": 0.07581176470588236, "grad_norm": 0.5809048446478834, "learning_rate": 7.2659234038321045e-06, "loss": 0.032998257875442506, "step": 8055 }, { "epoch": 0.07585882352941177, "grad_norm": 0.7535544829955374, "learning_rate": 7.263669076141769e-06, "loss": 0.03899219632148743, "step": 8060 }, { "epoch": 0.07590588235294117, "grad_norm": 0.8788563617465514, "learning_rate": 7.26141684543551e-06, "loss": 0.04265542626380921, "step": 8065 }, { "epoch": 0.07595294117647058, "grad_norm": 0.9039342944617103, "learning_rate": 7.259166708464304e-06, "loss": 0.04107366800308228, "step": 8070 }, { "epoch": 0.076, "grad_norm": 0.7857104184544429, "learning_rate": 7.256918661986173e-06, "loss": 0.03960933983325958, "step": 8075 }, { "epoch": 0.07604705882352941, "grad_norm": 0.6145859870230289, "learning_rate": 7.254672702766158e-06, "loss": 0.0362784743309021, "step": 8080 }, { "epoch": 0.07609411764705883, "grad_norm": 0.9257288349389885, "learning_rate": 7.25242882757631e-06, "loss": 0.05394405126571655, "step": 8085 }, { "epoch": 0.07614117647058824, "grad_norm": 0.7222568835562903, "learning_rate": 7.250187033195658e-06, "loss": 0.04036918580532074, "step": 8090 }, { "epoch": 0.07618823529411765, "grad_norm": 0.8377163671133705, "learning_rate": 7.247947316410204e-06, "loss": 0.042873018980026247, "step": 8095 }, { "epoch": 0.07623529411764705, "grad_norm": 1.2730931673753014, "learning_rate": 7.2457096740128885e-06, "loss": 0.04177888631820679, "step": 8100 }, { "epoch": 0.07628235294117647, "grad_norm": 0.7786498419385429, "learning_rate": 7.243474102803585e-06, "loss": 0.032366061210632326, "step": 8105 }, { "epoch": 0.07632941176470588, "grad_norm": 0.5945243491466947, "learning_rate": 7.2412405995890655e-06, "loss": 0.03741127848625183, "step": 8110 }, { "epoch": 0.0763764705882353, "grad_norm": 0.931123036124849, "learning_rate": 7.239009161183001e-06, "loss": 0.039473307132720944, "step": 8115 }, { "epoch": 0.07642352941176471, "grad_norm": 0.6813849488911059, "learning_rate": 7.236779784405922e-06, "loss": 0.04252462387084961, "step": 8120 }, { "epoch": 0.07647058823529412, "grad_norm": 0.7076390052001268, "learning_rate": 7.234552466085214e-06, "loss": 0.036012887954711914, "step": 8125 }, { "epoch": 0.07651764705882352, "grad_norm": 0.8645945053653026, "learning_rate": 7.232327203055092e-06, "loss": 0.045015114545822146, "step": 8130 }, { "epoch": 0.07656470588235294, "grad_norm": 1.3670162056806865, "learning_rate": 7.230103992156586e-06, "loss": 0.05060557723045349, "step": 8135 }, { "epoch": 0.07661176470588235, "grad_norm": 1.2771760050276468, "learning_rate": 7.227882830237513e-06, "loss": 0.047195997834205625, "step": 8140 }, { "epoch": 0.07665882352941177, "grad_norm": 0.7270567340122086, "learning_rate": 7.225663714152469e-06, "loss": 0.04384077489376068, "step": 8145 }, { "epoch": 0.07670588235294118, "grad_norm": 0.8386951731268543, "learning_rate": 7.223446640762808e-06, "loss": 0.04122030735015869, "step": 8150 }, { "epoch": 0.0767529411764706, "grad_norm": 0.8340888676115019, "learning_rate": 7.221231606936615e-06, "loss": 0.04431716203689575, "step": 8155 }, { "epoch": 0.0768, "grad_norm": 0.8593098421357279, "learning_rate": 7.219018609548699e-06, "loss": 0.03862275779247284, "step": 8160 }, { "epoch": 0.07684705882352941, "grad_norm": 0.8146405747816116, "learning_rate": 7.216807645480566e-06, "loss": 0.037995511293411256, "step": 8165 }, { "epoch": 0.07689411764705882, "grad_norm": 0.8390852833347161, "learning_rate": 7.214598711620403e-06, "loss": 0.05134750604629516, "step": 8170 }, { "epoch": 0.07694117647058824, "grad_norm": 0.6009750313142495, "learning_rate": 7.212391804863065e-06, "loss": 0.04437579810619354, "step": 8175 }, { "epoch": 0.07698823529411765, "grad_norm": 0.703656122684004, "learning_rate": 7.210186922110046e-06, "loss": 0.03752304315567016, "step": 8180 }, { "epoch": 0.07703529411764706, "grad_norm": 0.7844174919084009, "learning_rate": 7.20798406026947e-06, "loss": 0.03770493268966675, "step": 8185 }, { "epoch": 0.07708235294117648, "grad_norm": 0.7752626153533831, "learning_rate": 7.205783216256067e-06, "loss": 0.04377322793006897, "step": 8190 }, { "epoch": 0.07712941176470588, "grad_norm": 0.8118627719638726, "learning_rate": 7.2035843869911605e-06, "loss": 0.04113427996635437, "step": 8195 }, { "epoch": 0.07717647058823529, "grad_norm": 0.6889519332766441, "learning_rate": 7.201387569402645e-06, "loss": 0.034543472528457644, "step": 8200 }, { "epoch": 0.0772235294117647, "grad_norm": 0.8718032428032348, "learning_rate": 7.199192760424967e-06, "loss": 0.04164567589759827, "step": 8205 }, { "epoch": 0.07727058823529412, "grad_norm": 0.6513534506271287, "learning_rate": 7.196999956999111e-06, "loss": 0.04234464168548584, "step": 8210 }, { "epoch": 0.07731764705882353, "grad_norm": 0.6850719461163374, "learning_rate": 7.19480915607258e-06, "loss": 0.042114120721817014, "step": 8215 }, { "epoch": 0.07736470588235295, "grad_norm": 0.5098470635009489, "learning_rate": 7.192620354599377e-06, "loss": 0.038785803318023684, "step": 8220 }, { "epoch": 0.07741176470588235, "grad_norm": 0.7228114677035813, "learning_rate": 7.1904335495399855e-06, "loss": 0.040649837255477904, "step": 8225 }, { "epoch": 0.07745882352941176, "grad_norm": 0.6370549213494126, "learning_rate": 7.18824873786136e-06, "loss": 0.035694155097007754, "step": 8230 }, { "epoch": 0.07750588235294117, "grad_norm": 0.651928972192843, "learning_rate": 7.186065916536894e-06, "loss": 0.04350908994674683, "step": 8235 }, { "epoch": 0.07755294117647059, "grad_norm": 0.9879091650365782, "learning_rate": 7.183885082546416e-06, "loss": 0.044039851427078246, "step": 8240 }, { "epoch": 0.0776, "grad_norm": 0.8503585497980153, "learning_rate": 7.181706232876167e-06, "loss": 0.04772854447364807, "step": 8245 }, { "epoch": 0.07764705882352942, "grad_norm": 0.5630741813840255, "learning_rate": 7.179529364518778e-06, "loss": 0.03738888502120972, "step": 8250 }, { "epoch": 0.07769411764705883, "grad_norm": 0.878356691953935, "learning_rate": 7.177354474473262e-06, "loss": 0.03735349178314209, "step": 8255 }, { "epoch": 0.07774117647058823, "grad_norm": 1.0165759621722381, "learning_rate": 7.175181559744988e-06, "loss": 0.04325188398361206, "step": 8260 }, { "epoch": 0.07778823529411764, "grad_norm": 0.8293620355637801, "learning_rate": 7.173010617345671e-06, "loss": 0.043177998065948485, "step": 8265 }, { "epoch": 0.07783529411764706, "grad_norm": 0.891267547909355, "learning_rate": 7.170841644293349e-06, "loss": 0.046544206142425534, "step": 8270 }, { "epoch": 0.07788235294117647, "grad_norm": 0.7300317178331363, "learning_rate": 7.168674637612369e-06, "loss": 0.03902711272239685, "step": 8275 }, { "epoch": 0.07792941176470589, "grad_norm": 0.9547770602383897, "learning_rate": 7.1665095943333665e-06, "loss": 0.03570728898048401, "step": 8280 }, { "epoch": 0.0779764705882353, "grad_norm": 0.774212686856292, "learning_rate": 7.164346511493253e-06, "loss": 0.0464309424161911, "step": 8285 }, { "epoch": 0.0780235294117647, "grad_norm": 0.9545218147583261, "learning_rate": 7.1621853861351966e-06, "loss": 0.04601489305496216, "step": 8290 }, { "epoch": 0.07807058823529411, "grad_norm": 0.5703591372477005, "learning_rate": 7.160026215308603e-06, "loss": 0.037084007263183595, "step": 8295 }, { "epoch": 0.07811764705882353, "grad_norm": 0.752656166136118, "learning_rate": 7.157868996069105e-06, "loss": 0.0413888156414032, "step": 8300 }, { "epoch": 0.07816470588235294, "grad_norm": 1.2643177657987315, "learning_rate": 7.155713725478538e-06, "loss": 0.046397429704666135, "step": 8305 }, { "epoch": 0.07821176470588236, "grad_norm": 0.634140168251467, "learning_rate": 7.153560400604926e-06, "loss": 0.03924456834793091, "step": 8310 }, { "epoch": 0.07825882352941177, "grad_norm": 1.0280537291702994, "learning_rate": 7.151409018522467e-06, "loss": 0.03885660171508789, "step": 8315 }, { "epoch": 0.07830588235294117, "grad_norm": 0.6589847226150422, "learning_rate": 7.149259576311516e-06, "loss": 0.043415650725364685, "step": 8320 }, { "epoch": 0.07835294117647058, "grad_norm": 0.9336473163724023, "learning_rate": 7.147112071058563e-06, "loss": 0.04121080040931702, "step": 8325 }, { "epoch": 0.0784, "grad_norm": 0.8605268458693813, "learning_rate": 7.1449664998562266e-06, "loss": 0.0408750057220459, "step": 8330 }, { "epoch": 0.07844705882352941, "grad_norm": 0.7909255756105096, "learning_rate": 7.142822859803222e-06, "loss": 0.04394915103912354, "step": 8335 }, { "epoch": 0.07849411764705883, "grad_norm": 0.7125063663930676, "learning_rate": 7.140681148004365e-06, "loss": 0.04102092981338501, "step": 8340 }, { "epoch": 0.07854117647058824, "grad_norm": 0.7085035203569956, "learning_rate": 7.138541361570536e-06, "loss": 0.04747799038887024, "step": 8345 }, { "epoch": 0.07858823529411765, "grad_norm": 0.8063730774450537, "learning_rate": 7.136403497618676e-06, "loss": 0.03631921112537384, "step": 8350 }, { "epoch": 0.07863529411764705, "grad_norm": 0.6633009145176585, "learning_rate": 7.134267553271763e-06, "loss": 0.03915725946426392, "step": 8355 }, { "epoch": 0.07868235294117647, "grad_norm": 0.9817552137413684, "learning_rate": 7.1321335256588055e-06, "loss": 0.036415308713912964, "step": 8360 }, { "epoch": 0.07872941176470588, "grad_norm": 0.965144962036075, "learning_rate": 7.130001411914813e-06, "loss": 0.0455479621887207, "step": 8365 }, { "epoch": 0.0787764705882353, "grad_norm": 0.6020060726738129, "learning_rate": 7.127871209180791e-06, "loss": 0.03637139201164245, "step": 8370 }, { "epoch": 0.07882352941176471, "grad_norm": 0.9807769643199599, "learning_rate": 7.1257429146037175e-06, "loss": 0.03857831954956055, "step": 8375 }, { "epoch": 0.07887058823529412, "grad_norm": 0.7999972376044003, "learning_rate": 7.123616525336533e-06, "loss": 0.03530334830284119, "step": 8380 }, { "epoch": 0.07891764705882352, "grad_norm": 0.9092983918677281, "learning_rate": 7.1214920385381205e-06, "loss": 0.040949147939682004, "step": 8385 }, { "epoch": 0.07896470588235294, "grad_norm": 0.8409611731537803, "learning_rate": 7.11936945137329e-06, "loss": 0.040784198045730594, "step": 8390 }, { "epoch": 0.07901176470588235, "grad_norm": 0.948511434089615, "learning_rate": 7.1172487610127635e-06, "loss": 0.04632658362388611, "step": 8395 }, { "epoch": 0.07905882352941176, "grad_norm": 0.9575977432666817, "learning_rate": 7.11512996463316e-06, "loss": 0.044055843353271486, "step": 8400 }, { "epoch": 0.07910588235294118, "grad_norm": 0.6358520381943858, "learning_rate": 7.113013059416977e-06, "loss": 0.044653195142745974, "step": 8405 }, { "epoch": 0.07915294117647059, "grad_norm": 0.656937791193163, "learning_rate": 7.1108980425525775e-06, "loss": 0.03802726864814758, "step": 8410 }, { "epoch": 0.0792, "grad_norm": 0.87461669899763, "learning_rate": 7.108784911234172e-06, "loss": 0.0425342321395874, "step": 8415 }, { "epoch": 0.0792470588235294, "grad_norm": 0.9634749973723336, "learning_rate": 7.106673662661806e-06, "loss": 0.04362159371376038, "step": 8420 }, { "epoch": 0.07929411764705882, "grad_norm": 1.0636493709451256, "learning_rate": 7.104564294041342e-06, "loss": 0.03941559195518494, "step": 8425 }, { "epoch": 0.07934117647058823, "grad_norm": 0.6336099428297054, "learning_rate": 7.102456802584442e-06, "loss": 0.03276706337928772, "step": 8430 }, { "epoch": 0.07938823529411765, "grad_norm": 0.6376324232508643, "learning_rate": 7.100351185508556e-06, "loss": 0.03377408385276794, "step": 8435 }, { "epoch": 0.07943529411764706, "grad_norm": 0.5793462433109633, "learning_rate": 7.098247440036908e-06, "loss": 0.03452861905097961, "step": 8440 }, { "epoch": 0.07948235294117648, "grad_norm": 0.9535302924463467, "learning_rate": 7.096145563398475e-06, "loss": 0.03683260679244995, "step": 8445 }, { "epoch": 0.07952941176470588, "grad_norm": 1.1706618021936313, "learning_rate": 7.094045552827971e-06, "loss": 0.04103206992149353, "step": 8450 }, { "epoch": 0.07957647058823529, "grad_norm": 0.7275605793087143, "learning_rate": 7.09194740556584e-06, "loss": 0.039508044719696045, "step": 8455 }, { "epoch": 0.0796235294117647, "grad_norm": 0.8655095540327624, "learning_rate": 7.089851118858235e-06, "loss": 0.03993093967437744, "step": 8460 }, { "epoch": 0.07967058823529412, "grad_norm": 0.6233132805340497, "learning_rate": 7.0877566899570024e-06, "loss": 0.03597037792205811, "step": 8465 }, { "epoch": 0.07971764705882353, "grad_norm": 1.107279631949208, "learning_rate": 7.08566411611967e-06, "loss": 0.04584409594535828, "step": 8470 }, { "epoch": 0.07976470588235295, "grad_norm": 1.1612648818079152, "learning_rate": 7.083573394609427e-06, "loss": 0.04378222227096558, "step": 8475 }, { "epoch": 0.07981176470588236, "grad_norm": 1.6063300463416332, "learning_rate": 7.081484522695114e-06, "loss": 0.044491493701934816, "step": 8480 }, { "epoch": 0.07985882352941176, "grad_norm": 0.7354273673597019, "learning_rate": 7.079397497651204e-06, "loss": 0.04181376099586487, "step": 8485 }, { "epoch": 0.07990588235294117, "grad_norm": 0.8145919169369346, "learning_rate": 7.077312316757794e-06, "loss": 0.03975628316402435, "step": 8490 }, { "epoch": 0.07995294117647059, "grad_norm": 0.7542494419102953, "learning_rate": 7.0752289773005815e-06, "loss": 0.044614487886428834, "step": 8495 }, { "epoch": 0.08, "grad_norm": 0.7765306510589547, "learning_rate": 7.073147476570853e-06, "loss": 0.04300686717033386, "step": 8500 }, { "epoch": 0.08004705882352942, "grad_norm": 0.7546280944863217, "learning_rate": 7.0710678118654756e-06, "loss": 0.04356490671634674, "step": 8505 }, { "epoch": 0.08009411764705883, "grad_norm": 0.7906135186302503, "learning_rate": 7.068989980486871e-06, "loss": 0.04230688214302063, "step": 8510 }, { "epoch": 0.08014117647058823, "grad_norm": 0.6705020903501012, "learning_rate": 7.066913979743009e-06, "loss": 0.04075765609741211, "step": 8515 }, { "epoch": 0.08018823529411764, "grad_norm": 0.6650442620541822, "learning_rate": 7.064839806947388e-06, "loss": 0.04505075216293335, "step": 8520 }, { "epoch": 0.08023529411764706, "grad_norm": 0.9920721077418477, "learning_rate": 7.062767459419024e-06, "loss": 0.039851906895637515, "step": 8525 }, { "epoch": 0.08028235294117647, "grad_norm": 0.7031064198800454, "learning_rate": 7.060696934482439e-06, "loss": 0.04508151113986969, "step": 8530 }, { "epoch": 0.08032941176470589, "grad_norm": 1.6423695556572049, "learning_rate": 7.058628229467632e-06, "loss": 0.04776774644851685, "step": 8535 }, { "epoch": 0.0803764705882353, "grad_norm": 0.7367093599421216, "learning_rate": 7.056561341710086e-06, "loss": 0.03401623070240021, "step": 8540 }, { "epoch": 0.0804235294117647, "grad_norm": 0.9844184372995334, "learning_rate": 7.054496268550737e-06, "loss": 0.04834948778152466, "step": 8545 }, { "epoch": 0.08047058823529411, "grad_norm": 0.5457358640898662, "learning_rate": 7.052433007335962e-06, "loss": 0.03943677246570587, "step": 8550 }, { "epoch": 0.08051764705882353, "grad_norm": 0.5951899043331406, "learning_rate": 7.0503715554175725e-06, "loss": 0.04416053891181946, "step": 8555 }, { "epoch": 0.08056470588235294, "grad_norm": 0.8221663512576957, "learning_rate": 7.0483119101527956e-06, "loss": 0.038949528336524965, "step": 8560 }, { "epoch": 0.08061176470588235, "grad_norm": 1.019564108046131, "learning_rate": 7.046254068904254e-06, "loss": 0.04393090009689331, "step": 8565 }, { "epoch": 0.08065882352941177, "grad_norm": 0.8001173587981586, "learning_rate": 7.044198029039963e-06, "loss": 0.035400408506393435, "step": 8570 }, { "epoch": 0.08070588235294118, "grad_norm": 0.7248475527542835, "learning_rate": 7.042143787933309e-06, "loss": 0.040976536273956296, "step": 8575 }, { "epoch": 0.08075294117647058, "grad_norm": 0.8521334240985476, "learning_rate": 7.040091342963036e-06, "loss": 0.05677019357681275, "step": 8580 }, { "epoch": 0.0808, "grad_norm": 0.7062382380933739, "learning_rate": 7.038040691513235e-06, "loss": 0.04364278018474579, "step": 8585 }, { "epoch": 0.08084705882352941, "grad_norm": 0.5463372032038516, "learning_rate": 7.035991830973324e-06, "loss": 0.048620092868804934, "step": 8590 }, { "epoch": 0.08089411764705882, "grad_norm": 0.6494930900459739, "learning_rate": 7.0339447587380415e-06, "loss": 0.0378721833229065, "step": 8595 }, { "epoch": 0.08094117647058824, "grad_norm": 0.8653429923385891, "learning_rate": 7.031899472207427e-06, "loss": 0.03954721093177795, "step": 8600 }, { "epoch": 0.08098823529411765, "grad_norm": 0.6714091207952442, "learning_rate": 7.029855968786807e-06, "loss": 0.04080085158348083, "step": 8605 }, { "epoch": 0.08103529411764705, "grad_norm": 1.0517982759522084, "learning_rate": 7.027814245886787e-06, "loss": 0.03988112211227417, "step": 8610 }, { "epoch": 0.08108235294117647, "grad_norm": 0.8794689035353347, "learning_rate": 7.025774300923234e-06, "loss": 0.0438003659248352, "step": 8615 }, { "epoch": 0.08112941176470588, "grad_norm": 0.6454138164376968, "learning_rate": 7.023736131317257e-06, "loss": 0.041916072368621826, "step": 8620 }, { "epoch": 0.0811764705882353, "grad_norm": 0.7521479445969379, "learning_rate": 7.021699734495205e-06, "loss": 0.04221407175064087, "step": 8625 }, { "epoch": 0.08122352941176471, "grad_norm": 0.5836806224200355, "learning_rate": 7.019665107888642e-06, "loss": 0.04088566601276398, "step": 8630 }, { "epoch": 0.08127058823529412, "grad_norm": 0.8036842559256012, "learning_rate": 7.017632248934344e-06, "loss": 0.04359248876571655, "step": 8635 }, { "epoch": 0.08131764705882354, "grad_norm": 0.6162210705127572, "learning_rate": 7.0156011550742774e-06, "loss": 0.038985824584960936, "step": 8640 }, { "epoch": 0.08136470588235294, "grad_norm": 0.7701971649753925, "learning_rate": 7.013571823755587e-06, "loss": 0.0429975152015686, "step": 8645 }, { "epoch": 0.08141176470588235, "grad_norm": 0.7990906811205213, "learning_rate": 7.011544252430583e-06, "loss": 0.03477831482887268, "step": 8650 }, { "epoch": 0.08145882352941176, "grad_norm": 0.7990533973041295, "learning_rate": 7.009518438556734e-06, "loss": 0.04429331123828888, "step": 8655 }, { "epoch": 0.08150588235294118, "grad_norm": 0.680593894540942, "learning_rate": 7.007494379596642e-06, "loss": 0.0380113959312439, "step": 8660 }, { "epoch": 0.08155294117647059, "grad_norm": 0.8057942673936328, "learning_rate": 7.005472073018038e-06, "loss": 0.0461940586566925, "step": 8665 }, { "epoch": 0.0816, "grad_norm": 0.8246870890803455, "learning_rate": 7.0034515162937636e-06, "loss": 0.03921870589256286, "step": 8670 }, { "epoch": 0.0816470588235294, "grad_norm": 0.7938962288622478, "learning_rate": 7.001432706901759e-06, "loss": 0.04342767596244812, "step": 8675 }, { "epoch": 0.08169411764705882, "grad_norm": 0.9268819338589552, "learning_rate": 6.999415642325056e-06, "loss": 0.035295242071151735, "step": 8680 }, { "epoch": 0.08174117647058823, "grad_norm": 0.800499932914539, "learning_rate": 6.997400320051752e-06, "loss": 0.04518255293369293, "step": 8685 }, { "epoch": 0.08178823529411765, "grad_norm": 0.9274084495658053, "learning_rate": 6.995386737575007e-06, "loss": 0.04380730390548706, "step": 8690 }, { "epoch": 0.08183529411764706, "grad_norm": 0.5731860206135363, "learning_rate": 6.993374892393032e-06, "loss": 0.03504987359046936, "step": 8695 }, { "epoch": 0.08188235294117648, "grad_norm": 0.6874297143115095, "learning_rate": 6.991364782009067e-06, "loss": 0.04105767607688904, "step": 8700 }, { "epoch": 0.08192941176470588, "grad_norm": 0.7383231506614012, "learning_rate": 6.989356403931372e-06, "loss": 0.04148807823657989, "step": 8705 }, { "epoch": 0.08197647058823529, "grad_norm": 0.9680416960089052, "learning_rate": 6.987349755673217e-06, "loss": 0.03875802159309387, "step": 8710 }, { "epoch": 0.0820235294117647, "grad_norm": 0.6563832570590684, "learning_rate": 6.985344834752866e-06, "loss": 0.04260079562664032, "step": 8715 }, { "epoch": 0.08207058823529412, "grad_norm": 0.8371591559997982, "learning_rate": 6.983341638693565e-06, "loss": 0.0434187650680542, "step": 8720 }, { "epoch": 0.08211764705882353, "grad_norm": 0.7546138955657208, "learning_rate": 6.98134016502353e-06, "loss": 0.05078037977218628, "step": 8725 }, { "epoch": 0.08216470588235295, "grad_norm": 0.8463603878021199, "learning_rate": 6.979340411275928e-06, "loss": 0.04041345119476318, "step": 8730 }, { "epoch": 0.08221176470588236, "grad_norm": 0.9019476497215687, "learning_rate": 6.977342374988878e-06, "loss": 0.042774307727813723, "step": 8735 }, { "epoch": 0.08225882352941176, "grad_norm": 0.8799991573127176, "learning_rate": 6.975346053705423e-06, "loss": 0.04582347571849823, "step": 8740 }, { "epoch": 0.08230588235294117, "grad_norm": 0.7543555671852151, "learning_rate": 6.973351444973528e-06, "loss": 0.03899899423122406, "step": 8745 }, { "epoch": 0.08235294117647059, "grad_norm": 0.9535783746084491, "learning_rate": 6.971358546346056e-06, "loss": 0.04233327507972717, "step": 8750 }, { "epoch": 0.0824, "grad_norm": 0.7941575705088688, "learning_rate": 6.969367355380774e-06, "loss": 0.03877993226051331, "step": 8755 }, { "epoch": 0.08244705882352941, "grad_norm": 0.6719788316998071, "learning_rate": 6.967377869640322e-06, "loss": 0.04532822370529175, "step": 8760 }, { "epoch": 0.08249411764705883, "grad_norm": 0.956445484610979, "learning_rate": 6.965390086692206e-06, "loss": 0.04755415916442871, "step": 8765 }, { "epoch": 0.08254117647058823, "grad_norm": 0.8730289082201225, "learning_rate": 6.9634040041087936e-06, "loss": 0.03730195164680481, "step": 8770 }, { "epoch": 0.08258823529411764, "grad_norm": 0.8345751300522206, "learning_rate": 6.961419619467288e-06, "loss": 0.0390250027179718, "step": 8775 }, { "epoch": 0.08263529411764706, "grad_norm": 1.1844484455598654, "learning_rate": 6.959436930349729e-06, "loss": 0.044278484582901, "step": 8780 }, { "epoch": 0.08268235294117647, "grad_norm": 0.9247280320403533, "learning_rate": 6.957455934342972e-06, "loss": 0.0487098217010498, "step": 8785 }, { "epoch": 0.08272941176470588, "grad_norm": 0.6167428460936581, "learning_rate": 6.955476629038674e-06, "loss": 0.041763827204704285, "step": 8790 }, { "epoch": 0.0827764705882353, "grad_norm": 0.6858190777542694, "learning_rate": 6.9534990120332924e-06, "loss": 0.0473728597164154, "step": 8795 }, { "epoch": 0.08282352941176471, "grad_norm": 0.6462148300551736, "learning_rate": 6.9515230809280586e-06, "loss": 0.03990352153778076, "step": 8800 }, { "epoch": 0.08287058823529411, "grad_norm": 0.6985401540019239, "learning_rate": 6.9495488333289805e-06, "loss": 0.03582478165626526, "step": 8805 }, { "epoch": 0.08291764705882353, "grad_norm": 0.8111455284301394, "learning_rate": 6.947576266846813e-06, "loss": 0.0511339545249939, "step": 8810 }, { "epoch": 0.08296470588235294, "grad_norm": 0.6901349397514648, "learning_rate": 6.9456053790970625e-06, "loss": 0.036801666021347046, "step": 8815 }, { "epoch": 0.08301176470588235, "grad_norm": 0.6504436251190637, "learning_rate": 6.943636167699967e-06, "loss": 0.04209767580032349, "step": 8820 }, { "epoch": 0.08305882352941177, "grad_norm": 0.6723528844603707, "learning_rate": 6.941668630280484e-06, "loss": 0.0415660172700882, "step": 8825 }, { "epoch": 0.08310588235294118, "grad_norm": 0.8877068527143038, "learning_rate": 6.939702764468277e-06, "loss": 0.038405376672744754, "step": 8830 }, { "epoch": 0.08315294117647058, "grad_norm": 0.9784667156806506, "learning_rate": 6.9377385678977096e-06, "loss": 0.038795819878578185, "step": 8835 }, { "epoch": 0.0832, "grad_norm": 0.6977055317720066, "learning_rate": 6.9357760382078266e-06, "loss": 0.03826616704463959, "step": 8840 }, { "epoch": 0.08324705882352941, "grad_norm": 0.7076470307749524, "learning_rate": 6.933815173042346e-06, "loss": 0.03893873691558838, "step": 8845 }, { "epoch": 0.08329411764705882, "grad_norm": 0.6433876332892007, "learning_rate": 6.931855970049648e-06, "loss": 0.039233839511871337, "step": 8850 }, { "epoch": 0.08334117647058824, "grad_norm": 0.8116387994432099, "learning_rate": 6.929898426882759e-06, "loss": 0.05028331279754639, "step": 8855 }, { "epoch": 0.08338823529411765, "grad_norm": 0.7720563736031054, "learning_rate": 6.927942541199344e-06, "loss": 0.04175340235233307, "step": 8860 }, { "epoch": 0.08343529411764705, "grad_norm": 0.8808946754394558, "learning_rate": 6.925988310661691e-06, "loss": 0.05150514841079712, "step": 8865 }, { "epoch": 0.08348235294117647, "grad_norm": 1.0543923448078396, "learning_rate": 6.924035732936705e-06, "loss": 0.04539868235588074, "step": 8870 }, { "epoch": 0.08352941176470588, "grad_norm": 0.5969085747009146, "learning_rate": 6.922084805695889e-06, "loss": 0.031255117058753966, "step": 8875 }, { "epoch": 0.0835764705882353, "grad_norm": 0.7233765962502591, "learning_rate": 6.9201355266153385e-06, "loss": 0.03817217350006104, "step": 8880 }, { "epoch": 0.08362352941176471, "grad_norm": 1.1678251505570976, "learning_rate": 6.9181878933757245e-06, "loss": 0.04784274995326996, "step": 8885 }, { "epoch": 0.08367058823529412, "grad_norm": 0.7914002914863847, "learning_rate": 6.9162419036622875e-06, "loss": 0.043749493360519406, "step": 8890 }, { "epoch": 0.08371764705882354, "grad_norm": 0.7457172804199723, "learning_rate": 6.9142975551648205e-06, "loss": 0.036763334274291994, "step": 8895 }, { "epoch": 0.08376470588235294, "grad_norm": 0.6937531368277489, "learning_rate": 6.912354845577666e-06, "loss": 0.04134987592697144, "step": 8900 }, { "epoch": 0.08381176470588235, "grad_norm": 1.0565535230905385, "learning_rate": 6.910413772599691e-06, "loss": 0.04781084656715393, "step": 8905 }, { "epoch": 0.08385882352941176, "grad_norm": 0.8692182017844055, "learning_rate": 6.908474333934288e-06, "loss": 0.04826708436012268, "step": 8910 }, { "epoch": 0.08390588235294118, "grad_norm": 1.2587088946847926, "learning_rate": 6.906536527289358e-06, "loss": 0.03968512117862701, "step": 8915 }, { "epoch": 0.08395294117647059, "grad_norm": 0.6706589934162865, "learning_rate": 6.9046003503772995e-06, "loss": 0.03730141520500183, "step": 8920 }, { "epoch": 0.084, "grad_norm": 0.745559442426267, "learning_rate": 6.902665800914997e-06, "loss": 0.040929090976715085, "step": 8925 }, { "epoch": 0.0840470588235294, "grad_norm": 1.196556997785126, "learning_rate": 6.900732876623813e-06, "loss": 0.04294393658638, "step": 8930 }, { "epoch": 0.08409411764705882, "grad_norm": 0.6451214464776693, "learning_rate": 6.898801575229569e-06, "loss": 0.03463381230831146, "step": 8935 }, { "epoch": 0.08414117647058823, "grad_norm": 1.0007110305042684, "learning_rate": 6.896871894462544e-06, "loss": 0.040076452493667605, "step": 8940 }, { "epoch": 0.08418823529411765, "grad_norm": 0.6273289031317434, "learning_rate": 6.894943832057459e-06, "loss": 0.0323174923658371, "step": 8945 }, { "epoch": 0.08423529411764706, "grad_norm": 0.9239186141878838, "learning_rate": 6.893017385753461e-06, "loss": 0.039687579870224, "step": 8950 }, { "epoch": 0.08428235294117647, "grad_norm": 1.487676058200855, "learning_rate": 6.89109255329412e-06, "loss": 0.04220586121082306, "step": 8955 }, { "epoch": 0.08432941176470589, "grad_norm": 0.7212237040676107, "learning_rate": 6.889169332427412e-06, "loss": 0.0316163420677185, "step": 8960 }, { "epoch": 0.08437647058823529, "grad_norm": 0.5564408476925605, "learning_rate": 6.887247720905713e-06, "loss": 0.04280976057052612, "step": 8965 }, { "epoch": 0.0844235294117647, "grad_norm": 3.1110587534337593, "learning_rate": 6.885327716485781e-06, "loss": 0.04229828119277954, "step": 8970 }, { "epoch": 0.08447058823529412, "grad_norm": 0.7710626621930945, "learning_rate": 6.883409316928752e-06, "loss": 0.029865193367004394, "step": 8975 }, { "epoch": 0.08451764705882353, "grad_norm": 0.7598543231612741, "learning_rate": 6.881492520000126e-06, "loss": 0.044534051418304445, "step": 8980 }, { "epoch": 0.08456470588235294, "grad_norm": 0.5913560580929867, "learning_rate": 6.879577323469756e-06, "loss": 0.03841285109519958, "step": 8985 }, { "epoch": 0.08461176470588236, "grad_norm": 0.6105298296437166, "learning_rate": 6.877663725111836e-06, "loss": 0.04356237649917603, "step": 8990 }, { "epoch": 0.08465882352941176, "grad_norm": 0.949210951697479, "learning_rate": 6.8757517227048935e-06, "loss": 0.04047861099243164, "step": 8995 }, { "epoch": 0.08470588235294117, "grad_norm": 1.0068145910990574, "learning_rate": 6.873841314031774e-06, "loss": 0.04341179132461548, "step": 9000 }, { "epoch": 0.08475294117647059, "grad_norm": 0.7834289983042515, "learning_rate": 6.871932496879636e-06, "loss": 0.03342318534851074, "step": 9005 }, { "epoch": 0.0848, "grad_norm": 0.8935529746945838, "learning_rate": 6.870025269039934e-06, "loss": 0.03971190452575683, "step": 9010 }, { "epoch": 0.08484705882352941, "grad_norm": 0.5754522947588141, "learning_rate": 6.8681196283084115e-06, "loss": 0.035812371969223024, "step": 9015 }, { "epoch": 0.08489411764705883, "grad_norm": 0.91426313066161, "learning_rate": 6.866215572485092e-06, "loss": 0.045473727583885196, "step": 9020 }, { "epoch": 0.08494117647058824, "grad_norm": 0.5951997829396168, "learning_rate": 6.8643130993742664e-06, "loss": 0.03804156184196472, "step": 9025 }, { "epoch": 0.08498823529411764, "grad_norm": 0.9594494103951997, "learning_rate": 6.8624122067844755e-06, "loss": 0.04339553713798523, "step": 9030 }, { "epoch": 0.08503529411764706, "grad_norm": 0.8726067641043345, "learning_rate": 6.860512892528512e-06, "loss": 0.039347225427627565, "step": 9035 }, { "epoch": 0.08508235294117647, "grad_norm": 0.9962336579369149, "learning_rate": 6.858615154423402e-06, "loss": 0.04510728120803833, "step": 9040 }, { "epoch": 0.08512941176470588, "grad_norm": 0.8016445833668977, "learning_rate": 6.856718990290396e-06, "loss": 0.03971085548400879, "step": 9045 }, { "epoch": 0.0851764705882353, "grad_norm": 0.7234531929439926, "learning_rate": 6.854824397954958e-06, "loss": 0.03932545781135559, "step": 9050 }, { "epoch": 0.08522352941176471, "grad_norm": 0.5276228408414458, "learning_rate": 6.852931375246758e-06, "loss": 0.04113103747367859, "step": 9055 }, { "epoch": 0.08527058823529411, "grad_norm": 1.1436423332864363, "learning_rate": 6.851039919999654e-06, "loss": 0.04252873361110687, "step": 9060 }, { "epoch": 0.08531764705882353, "grad_norm": 1.2288928958069916, "learning_rate": 6.849150030051693e-06, "loss": 0.04328763782978058, "step": 9065 }, { "epoch": 0.08536470588235294, "grad_norm": 0.9439305617479815, "learning_rate": 6.847261703245092e-06, "loss": 0.0495604395866394, "step": 9070 }, { "epoch": 0.08541176470588235, "grad_norm": 0.8066694061074333, "learning_rate": 6.845374937426229e-06, "loss": 0.03748957514762878, "step": 9075 }, { "epoch": 0.08545882352941177, "grad_norm": 0.7758240417425821, "learning_rate": 6.843489730445634e-06, "loss": 0.0373817503452301, "step": 9080 }, { "epoch": 0.08550588235294118, "grad_norm": 0.940492712047335, "learning_rate": 6.8416060801579775e-06, "loss": 0.036978495121002194, "step": 9085 }, { "epoch": 0.08555294117647058, "grad_norm": 1.1891482847860082, "learning_rate": 6.839723984422066e-06, "loss": 0.03862848579883575, "step": 9090 }, { "epoch": 0.0856, "grad_norm": 0.7914693063749714, "learning_rate": 6.837843441100821e-06, "loss": 0.038118612766265866, "step": 9095 }, { "epoch": 0.08564705882352941, "grad_norm": 0.6781770263420813, "learning_rate": 6.835964448061276e-06, "loss": 0.037748807668685914, "step": 9100 }, { "epoch": 0.08569411764705882, "grad_norm": 0.7811651268810594, "learning_rate": 6.83408700317457e-06, "loss": 0.046468150615692136, "step": 9105 }, { "epoch": 0.08574117647058824, "grad_norm": 0.9008343386182786, "learning_rate": 6.832211104315928e-06, "loss": 0.03999968469142914, "step": 9110 }, { "epoch": 0.08578823529411765, "grad_norm": 0.8771915720741743, "learning_rate": 6.830336749364654e-06, "loss": 0.041839584708213806, "step": 9115 }, { "epoch": 0.08583529411764707, "grad_norm": 0.8372063380630844, "learning_rate": 6.828463936204127e-06, "loss": 0.03935074806213379, "step": 9120 }, { "epoch": 0.08588235294117647, "grad_norm": 0.7184508435783724, "learning_rate": 6.826592662721783e-06, "loss": 0.03767170310020447, "step": 9125 }, { "epoch": 0.08592941176470588, "grad_norm": 0.8810806864280998, "learning_rate": 6.824722926809108e-06, "loss": 0.037111124396324156, "step": 9130 }, { "epoch": 0.0859764705882353, "grad_norm": 0.8056686243194605, "learning_rate": 6.822854726361632e-06, "loss": 0.03849880695343018, "step": 9135 }, { "epoch": 0.08602352941176471, "grad_norm": 0.8544267825270231, "learning_rate": 6.8209880592789106e-06, "loss": 0.042886209487915036, "step": 9140 }, { "epoch": 0.08607058823529412, "grad_norm": 0.9670616361303072, "learning_rate": 6.819122923464523e-06, "loss": 0.04325478971004486, "step": 9145 }, { "epoch": 0.08611764705882353, "grad_norm": 0.8716884284289331, "learning_rate": 6.817259316826057e-06, "loss": 0.04418821930885315, "step": 9150 }, { "epoch": 0.08616470588235293, "grad_norm": 0.7347601793659232, "learning_rate": 6.815397237275103e-06, "loss": 0.0375998318195343, "step": 9155 }, { "epoch": 0.08621176470588235, "grad_norm": 0.7842577459690433, "learning_rate": 6.813536682727242e-06, "loss": 0.036226782202720645, "step": 9160 }, { "epoch": 0.08625882352941176, "grad_norm": 0.9707399314301633, "learning_rate": 6.811677651102034e-06, "loss": 0.03705177307128906, "step": 9165 }, { "epoch": 0.08630588235294118, "grad_norm": 0.7157540259281024, "learning_rate": 6.809820140323011e-06, "loss": 0.03913466334342956, "step": 9170 }, { "epoch": 0.08635294117647059, "grad_norm": 0.7780627256748731, "learning_rate": 6.807964148317669e-06, "loss": 0.041901445388793944, "step": 9175 }, { "epoch": 0.0864, "grad_norm": 0.8451439821849459, "learning_rate": 6.806109673017454e-06, "loss": 0.04129839539527893, "step": 9180 }, { "epoch": 0.08644705882352942, "grad_norm": 0.7795236238278316, "learning_rate": 6.804256712357752e-06, "loss": 0.03632502853870392, "step": 9185 }, { "epoch": 0.08649411764705882, "grad_norm": 0.770188541793854, "learning_rate": 6.8024052642778835e-06, "loss": 0.041157844662666324, "step": 9190 }, { "epoch": 0.08654117647058823, "grad_norm": 0.7550942601772903, "learning_rate": 6.800555326721096e-06, "loss": 0.037829655408859256, "step": 9195 }, { "epoch": 0.08658823529411765, "grad_norm": 0.7328450067798326, "learning_rate": 6.798706897634541e-06, "loss": 0.04656876921653748, "step": 9200 }, { "epoch": 0.08663529411764706, "grad_norm": 0.643616847430563, "learning_rate": 6.796859974969282e-06, "loss": 0.04293566346168518, "step": 9205 }, { "epoch": 0.08668235294117647, "grad_norm": 0.7519778697836252, "learning_rate": 6.795014556680274e-06, "loss": 0.04268043041229248, "step": 9210 }, { "epoch": 0.08672941176470589, "grad_norm": 0.8594389358729053, "learning_rate": 6.7931706407263535e-06, "loss": 0.0415995717048645, "step": 9215 }, { "epoch": 0.08677647058823529, "grad_norm": 0.5963155051741142, "learning_rate": 6.7913282250702376e-06, "loss": 0.03767790794372559, "step": 9220 }, { "epoch": 0.0868235294117647, "grad_norm": 0.6665320768540237, "learning_rate": 6.789487307678504e-06, "loss": 0.049594083428382875, "step": 9225 }, { "epoch": 0.08687058823529412, "grad_norm": 1.4301636613618458, "learning_rate": 6.787647886521591e-06, "loss": 0.04476507306098938, "step": 9230 }, { "epoch": 0.08691764705882353, "grad_norm": 0.8670689712278729, "learning_rate": 6.785809959573782e-06, "loss": 0.034924489259719846, "step": 9235 }, { "epoch": 0.08696470588235294, "grad_norm": 0.6917121148317571, "learning_rate": 6.783973524813196e-06, "loss": 0.034074667096138, "step": 9240 }, { "epoch": 0.08701176470588236, "grad_norm": 1.0261067914966886, "learning_rate": 6.782138580221784e-06, "loss": 0.036955487728118894, "step": 9245 }, { "epoch": 0.08705882352941176, "grad_norm": 1.0453809394376186, "learning_rate": 6.780305123785313e-06, "loss": 0.03661714196205139, "step": 9250 }, { "epoch": 0.08710588235294117, "grad_norm": 1.214989103055067, "learning_rate": 6.778473153493361e-06, "loss": 0.039147955179214475, "step": 9255 }, { "epoch": 0.08715294117647059, "grad_norm": 0.9787037809196497, "learning_rate": 6.776642667339304e-06, "loss": 0.037692949175834656, "step": 9260 }, { "epoch": 0.0872, "grad_norm": 0.656628415894459, "learning_rate": 6.774813663320311e-06, "loss": 0.04226832985877991, "step": 9265 }, { "epoch": 0.08724705882352941, "grad_norm": 0.8014560501732055, "learning_rate": 6.772986139437334e-06, "loss": 0.040265238285064696, "step": 9270 }, { "epoch": 0.08729411764705883, "grad_norm": 0.8683921237801142, "learning_rate": 6.7711600936950986e-06, "loss": 0.041419774293899536, "step": 9275 }, { "epoch": 0.08734117647058824, "grad_norm": 1.0211679232596884, "learning_rate": 6.769335524102086e-06, "loss": 0.038726586103439334, "step": 9280 }, { "epoch": 0.08738823529411764, "grad_norm": 0.9030797047225076, "learning_rate": 6.7675124286705415e-06, "loss": 0.045632708072662356, "step": 9285 }, { "epoch": 0.08743529411764706, "grad_norm": 0.6742581541162626, "learning_rate": 6.765690805416453e-06, "loss": 0.03904982209205628, "step": 9290 }, { "epoch": 0.08748235294117647, "grad_norm": 0.7353889944431803, "learning_rate": 6.76387065235954e-06, "loss": 0.04365836977958679, "step": 9295 }, { "epoch": 0.08752941176470588, "grad_norm": 0.8080333022571536, "learning_rate": 6.7620519675232546e-06, "loss": 0.04106839895248413, "step": 9300 }, { "epoch": 0.0875764705882353, "grad_norm": 0.7203149554850758, "learning_rate": 6.760234748934764e-06, "loss": 0.038960039615631104, "step": 9305 }, { "epoch": 0.08762352941176471, "grad_norm": 0.5831950302827464, "learning_rate": 6.758418994624948e-06, "loss": 0.031039172410964967, "step": 9310 }, { "epoch": 0.08767058823529411, "grad_norm": 0.6117821891873102, "learning_rate": 6.756604702628384e-06, "loss": 0.04715862274169922, "step": 9315 }, { "epoch": 0.08771764705882353, "grad_norm": 1.0541474528388717, "learning_rate": 6.7547918709833405e-06, "loss": 0.04189038276672363, "step": 9320 }, { "epoch": 0.08776470588235294, "grad_norm": 0.7392429750089582, "learning_rate": 6.752980497731769e-06, "loss": 0.04229905605316162, "step": 9325 }, { "epoch": 0.08781176470588235, "grad_norm": 1.00135977663856, "learning_rate": 6.751170580919295e-06, "loss": 0.03603723645210266, "step": 9330 }, { "epoch": 0.08785882352941177, "grad_norm": 0.9092583878799417, "learning_rate": 6.749362118595209e-06, "loss": 0.03816964328289032, "step": 9335 }, { "epoch": 0.08790588235294118, "grad_norm": 0.7110221453709432, "learning_rate": 6.747555108812457e-06, "loss": 0.04239494204521179, "step": 9340 }, { "epoch": 0.0879529411764706, "grad_norm": 0.7724010228183343, "learning_rate": 6.745749549627631e-06, "loss": 0.035585203766822816, "step": 9345 }, { "epoch": 0.088, "grad_norm": 2.6483396565314776, "learning_rate": 6.743945439100961e-06, "loss": 0.03307757973670959, "step": 9350 }, { "epoch": 0.08804705882352941, "grad_norm": 0.6551622819716745, "learning_rate": 6.74214277529631e-06, "loss": 0.040553143620491026, "step": 9355 }, { "epoch": 0.08809411764705882, "grad_norm": 0.6881792138140109, "learning_rate": 6.740341556281158e-06, "loss": 0.03503621220588684, "step": 9360 }, { "epoch": 0.08814117647058824, "grad_norm": 0.6655160151520894, "learning_rate": 6.7385417801265995e-06, "loss": 0.03575009703636169, "step": 9365 }, { "epoch": 0.08818823529411765, "grad_norm": 0.8673672713796987, "learning_rate": 6.73674344490733e-06, "loss": 0.03957500457763672, "step": 9370 }, { "epoch": 0.08823529411764706, "grad_norm": 1.2019231999381275, "learning_rate": 6.734946548701642e-06, "loss": 0.0401077926158905, "step": 9375 }, { "epoch": 0.08828235294117646, "grad_norm": 0.8207679106201684, "learning_rate": 6.733151089591413e-06, "loss": 0.041169068217277525, "step": 9380 }, { "epoch": 0.08832941176470588, "grad_norm": 0.9461931464410559, "learning_rate": 6.731357065662099e-06, "loss": 0.04218246936798096, "step": 9385 }, { "epoch": 0.08837647058823529, "grad_norm": 0.8103405528015768, "learning_rate": 6.729564475002722e-06, "loss": 0.030695223808288576, "step": 9390 }, { "epoch": 0.0884235294117647, "grad_norm": 0.6540992102658656, "learning_rate": 6.72777331570587e-06, "loss": 0.04630417227745056, "step": 9395 }, { "epoch": 0.08847058823529412, "grad_norm": 0.7015808064747311, "learning_rate": 6.725983585867678e-06, "loss": 0.03872253298759461, "step": 9400 }, { "epoch": 0.08851764705882353, "grad_norm": 0.9440000258213711, "learning_rate": 6.7241952835878255e-06, "loss": 0.03627673387527466, "step": 9405 }, { "epoch": 0.08856470588235293, "grad_norm": 0.7044302005371196, "learning_rate": 6.722408406969529e-06, "loss": 0.04296727776527405, "step": 9410 }, { "epoch": 0.08861176470588235, "grad_norm": 0.6788248573887264, "learning_rate": 6.720622954119529e-06, "loss": 0.04338708817958832, "step": 9415 }, { "epoch": 0.08865882352941176, "grad_norm": 0.660098942096965, "learning_rate": 6.718838923148083e-06, "loss": 0.03608944416046143, "step": 9420 }, { "epoch": 0.08870588235294118, "grad_norm": 1.0325842434132078, "learning_rate": 6.717056312168962e-06, "loss": 0.04244666695594788, "step": 9425 }, { "epoch": 0.08875294117647059, "grad_norm": 0.6051356587819599, "learning_rate": 6.715275119299437e-06, "loss": 0.0336349219083786, "step": 9430 }, { "epoch": 0.0888, "grad_norm": 0.9363040683675714, "learning_rate": 6.713495342660271e-06, "loss": 0.04039005637168884, "step": 9435 }, { "epoch": 0.08884705882352942, "grad_norm": 0.7795707091552875, "learning_rate": 6.71171698037571e-06, "loss": 0.03990412652492523, "step": 9440 }, { "epoch": 0.08889411764705882, "grad_norm": 0.8149528813927702, "learning_rate": 6.70994003057348e-06, "loss": 0.03538329899311066, "step": 9445 }, { "epoch": 0.08894117647058823, "grad_norm": 0.6816471787854886, "learning_rate": 6.708164491384772e-06, "loss": 0.0451792061328888, "step": 9450 }, { "epoch": 0.08898823529411765, "grad_norm": 0.6516167239006437, "learning_rate": 6.706390360944237e-06, "loss": 0.036090797185897826, "step": 9455 }, { "epoch": 0.08903529411764706, "grad_norm": 0.7423317487347956, "learning_rate": 6.704617637389982e-06, "loss": 0.035808682441711426, "step": 9460 }, { "epoch": 0.08908235294117647, "grad_norm": 0.8418724715883599, "learning_rate": 6.702846318863549e-06, "loss": 0.037008872628211974, "step": 9465 }, { "epoch": 0.08912941176470589, "grad_norm": 0.9606824651314974, "learning_rate": 6.701076403509923e-06, "loss": 0.0394602507352829, "step": 9470 }, { "epoch": 0.08917647058823529, "grad_norm": 0.8130964381199765, "learning_rate": 6.699307889477512e-06, "loss": 0.036573418974876405, "step": 9475 }, { "epoch": 0.0892235294117647, "grad_norm": 1.0432987683240904, "learning_rate": 6.697540774918142e-06, "loss": 0.03978976011276245, "step": 9480 }, { "epoch": 0.08927058823529412, "grad_norm": 0.8364513564915254, "learning_rate": 6.695775057987055e-06, "loss": 0.04560332298278809, "step": 9485 }, { "epoch": 0.08931764705882353, "grad_norm": 0.91487009930179, "learning_rate": 6.694010736842887e-06, "loss": 0.04740684330463409, "step": 9490 }, { "epoch": 0.08936470588235294, "grad_norm": 0.924355408938712, "learning_rate": 6.692247809647678e-06, "loss": 0.047283226251602174, "step": 9495 }, { "epoch": 0.08941176470588236, "grad_norm": 0.6983054966964765, "learning_rate": 6.690486274566849e-06, "loss": 0.04057443141937256, "step": 9500 }, { "epoch": 0.08945882352941177, "grad_norm": 0.5893208618145479, "learning_rate": 6.688726129769199e-06, "loss": 0.0429552972316742, "step": 9505 }, { "epoch": 0.08950588235294117, "grad_norm": 0.7809870177919965, "learning_rate": 6.6869673734269e-06, "loss": 0.032868558168411256, "step": 9510 }, { "epoch": 0.08955294117647059, "grad_norm": 0.6668670860057763, "learning_rate": 6.6852100037154844e-06, "loss": 0.030065417289733887, "step": 9515 }, { "epoch": 0.0896, "grad_norm": 0.7455213964344242, "learning_rate": 6.683454018813843e-06, "loss": 0.046268928050994876, "step": 9520 }, { "epoch": 0.08964705882352941, "grad_norm": 0.7614396488883503, "learning_rate": 6.68169941690421e-06, "loss": 0.04044801890850067, "step": 9525 }, { "epoch": 0.08969411764705883, "grad_norm": 0.8721087502966631, "learning_rate": 6.6799461961721565e-06, "loss": 0.046908581256866456, "step": 9530 }, { "epoch": 0.08974117647058824, "grad_norm": 0.6952627915179447, "learning_rate": 6.67819435480659e-06, "loss": 0.03620227575302124, "step": 9535 }, { "epoch": 0.08978823529411764, "grad_norm": 0.7523295041933836, "learning_rate": 6.676443890999735e-06, "loss": 0.03728237450122833, "step": 9540 }, { "epoch": 0.08983529411764705, "grad_norm": 0.8083752817875259, "learning_rate": 6.674694802947138e-06, "loss": 0.044376975297927855, "step": 9545 }, { "epoch": 0.08988235294117647, "grad_norm": 0.653373507459743, "learning_rate": 6.672947088847645e-06, "loss": 0.040241241455078125, "step": 9550 }, { "epoch": 0.08992941176470588, "grad_norm": 0.8194746236110315, "learning_rate": 6.67120074690341e-06, "loss": 0.04050260782241821, "step": 9555 }, { "epoch": 0.0899764705882353, "grad_norm": 0.840467738638315, "learning_rate": 6.669455775319872e-06, "loss": 0.03920261859893799, "step": 9560 }, { "epoch": 0.09002352941176471, "grad_norm": 0.8572937306592616, "learning_rate": 6.667712172305757e-06, "loss": 0.04304627776145935, "step": 9565 }, { "epoch": 0.09007058823529412, "grad_norm": 0.7420381797284193, "learning_rate": 6.6659699360730655e-06, "loss": 0.04326624870300293, "step": 9570 }, { "epoch": 0.09011764705882352, "grad_norm": 0.8304777393033528, "learning_rate": 6.664229064837072e-06, "loss": 0.038446205854415896, "step": 9575 }, { "epoch": 0.09016470588235294, "grad_norm": 0.8296038180910142, "learning_rate": 6.662489556816304e-06, "loss": 0.03881843090057373, "step": 9580 }, { "epoch": 0.09021176470588235, "grad_norm": 0.9273316309288644, "learning_rate": 6.660751410232547e-06, "loss": 0.037400317192077634, "step": 9585 }, { "epoch": 0.09025882352941177, "grad_norm": 1.14041496572175, "learning_rate": 6.6590146233108335e-06, "loss": 0.042406225204467775, "step": 9590 }, { "epoch": 0.09030588235294118, "grad_norm": 0.8347000276203506, "learning_rate": 6.6572791942794306e-06, "loss": 0.04489903151988983, "step": 9595 }, { "epoch": 0.0903529411764706, "grad_norm": 1.1264693226451805, "learning_rate": 6.655545121369839e-06, "loss": 0.049310344457626346, "step": 9600 }, { "epoch": 0.0904, "grad_norm": 0.8640325989655542, "learning_rate": 6.653812402816778e-06, "loss": 0.046983802318573, "step": 9605 }, { "epoch": 0.09044705882352941, "grad_norm": 0.7900584117392231, "learning_rate": 6.652081036858188e-06, "loss": 0.03696361780166626, "step": 9610 }, { "epoch": 0.09049411764705882, "grad_norm": 0.8065036581013896, "learning_rate": 6.650351021735212e-06, "loss": 0.04286531507968903, "step": 9615 }, { "epoch": 0.09054117647058824, "grad_norm": 0.9673724501280726, "learning_rate": 6.648622355692196e-06, "loss": 0.04127687215805054, "step": 9620 }, { "epoch": 0.09058823529411765, "grad_norm": 0.7596450861620069, "learning_rate": 6.646895036976679e-06, "loss": 0.0377388596534729, "step": 9625 }, { "epoch": 0.09063529411764706, "grad_norm": 0.6912122290035811, "learning_rate": 6.645169063839385e-06, "loss": 0.04926061034202576, "step": 9630 }, { "epoch": 0.09068235294117646, "grad_norm": 0.757959731198128, "learning_rate": 6.643444434534213e-06, "loss": 0.03673778772354126, "step": 9635 }, { "epoch": 0.09072941176470588, "grad_norm": 0.5508228168851266, "learning_rate": 6.64172114731824e-06, "loss": 0.03141194880008698, "step": 9640 }, { "epoch": 0.09077647058823529, "grad_norm": 0.7674977710848241, "learning_rate": 6.639999200451697e-06, "loss": 0.041526615619659424, "step": 9645 }, { "epoch": 0.0908235294117647, "grad_norm": 0.7359001132697942, "learning_rate": 6.638278592197978e-06, "loss": 0.03450727760791779, "step": 9650 }, { "epoch": 0.09087058823529412, "grad_norm": 0.8436487956121846, "learning_rate": 6.63655932082362e-06, "loss": 0.03860948085784912, "step": 9655 }, { "epoch": 0.09091764705882353, "grad_norm": 1.0680839404471418, "learning_rate": 6.634841384598305e-06, "loss": 0.04705808162689209, "step": 9660 }, { "epoch": 0.09096470588235295, "grad_norm": 0.834367298148507, "learning_rate": 6.6331247817948465e-06, "loss": 0.03872596025466919, "step": 9665 }, { "epoch": 0.09101176470588235, "grad_norm": 0.9633787063410975, "learning_rate": 6.631409510689187e-06, "loss": 0.04015357196331024, "step": 9670 }, { "epoch": 0.09105882352941176, "grad_norm": 0.9308756136520938, "learning_rate": 6.629695569560385e-06, "loss": 0.030828303098678587, "step": 9675 }, { "epoch": 0.09110588235294118, "grad_norm": 0.7316604579713968, "learning_rate": 6.627982956690612e-06, "loss": 0.03875037133693695, "step": 9680 }, { "epoch": 0.09115294117647059, "grad_norm": 0.9660134924254963, "learning_rate": 6.626271670365145e-06, "loss": 0.040949130058288576, "step": 9685 }, { "epoch": 0.0912, "grad_norm": 0.6838451717907975, "learning_rate": 6.624561708872359e-06, "loss": 0.040693032741546634, "step": 9690 }, { "epoch": 0.09124705882352942, "grad_norm": 0.8653999624197631, "learning_rate": 6.622853070503716e-06, "loss": 0.043216747045516965, "step": 9695 }, { "epoch": 0.09129411764705882, "grad_norm": 0.7738705866478979, "learning_rate": 6.621145753553767e-06, "loss": 0.036798495054245, "step": 9700 }, { "epoch": 0.09134117647058823, "grad_norm": 0.6770931776088488, "learning_rate": 6.619439756320133e-06, "loss": 0.03702298402786255, "step": 9705 }, { "epoch": 0.09138823529411765, "grad_norm": 0.6842435861323953, "learning_rate": 6.617735077103505e-06, "loss": 0.036354219913482665, "step": 9710 }, { "epoch": 0.09143529411764706, "grad_norm": 0.70967006134268, "learning_rate": 6.61603171420764e-06, "loss": 0.03615371584892273, "step": 9715 }, { "epoch": 0.09148235294117647, "grad_norm": 0.5958810823788815, "learning_rate": 6.614329665939345e-06, "loss": 0.03471494913101196, "step": 9720 }, { "epoch": 0.09152941176470589, "grad_norm": 0.6546235829191364, "learning_rate": 6.612628930608477e-06, "loss": 0.033778414130210876, "step": 9725 }, { "epoch": 0.0915764705882353, "grad_norm": 0.7965734634455587, "learning_rate": 6.610929506527931e-06, "loss": 0.03814879059791565, "step": 9730 }, { "epoch": 0.0916235294117647, "grad_norm": 0.5935720306109409, "learning_rate": 6.6092313920136395e-06, "loss": 0.03412275314331055, "step": 9735 }, { "epoch": 0.09167058823529411, "grad_norm": 0.6880158044449775, "learning_rate": 6.60753458538456e-06, "loss": 0.04120055139064789, "step": 9740 }, { "epoch": 0.09171764705882353, "grad_norm": 0.647255585244944, "learning_rate": 6.605839084962664e-06, "loss": 0.03783694505691528, "step": 9745 }, { "epoch": 0.09176470588235294, "grad_norm": 0.6270446563268705, "learning_rate": 6.604144889072945e-06, "loss": 0.039106494188308714, "step": 9750 }, { "epoch": 0.09181176470588236, "grad_norm": 0.6876764046457606, "learning_rate": 6.602451996043395e-06, "loss": 0.03383408188819885, "step": 9755 }, { "epoch": 0.09185882352941177, "grad_norm": 0.6827671132310983, "learning_rate": 6.6007604042050086e-06, "loss": 0.039003366231918336, "step": 9760 }, { "epoch": 0.09190588235294117, "grad_norm": 0.8408801582127263, "learning_rate": 6.59907011189177e-06, "loss": 0.041913282871246335, "step": 9765 }, { "epoch": 0.09195294117647058, "grad_norm": 0.9839958372872314, "learning_rate": 6.597381117440649e-06, "loss": 0.04031192660331726, "step": 9770 }, { "epoch": 0.092, "grad_norm": 0.7966581605718825, "learning_rate": 6.595693419191591e-06, "loss": 0.038621377944946286, "step": 9775 }, { "epoch": 0.09204705882352941, "grad_norm": 0.6737431401736014, "learning_rate": 6.594007015487518e-06, "loss": 0.04094279408454895, "step": 9780 }, { "epoch": 0.09209411764705883, "grad_norm": 0.696300089492697, "learning_rate": 6.592321904674311e-06, "loss": 0.0348619669675827, "step": 9785 }, { "epoch": 0.09214117647058824, "grad_norm": 0.629563302426325, "learning_rate": 6.590638085100811e-06, "loss": 0.03800536990165711, "step": 9790 }, { "epoch": 0.09218823529411764, "grad_norm": 1.2835019272336945, "learning_rate": 6.58895555511881e-06, "loss": 0.037875515222549436, "step": 9795 }, { "epoch": 0.09223529411764705, "grad_norm": 0.8264164676521876, "learning_rate": 6.587274313083042e-06, "loss": 0.03466296195983887, "step": 9800 }, { "epoch": 0.09228235294117647, "grad_norm": 0.7147572609571409, "learning_rate": 6.585594357351178e-06, "loss": 0.03656233549118042, "step": 9805 }, { "epoch": 0.09232941176470588, "grad_norm": 0.6003037367268821, "learning_rate": 6.583915686283825e-06, "loss": 0.03778379559516907, "step": 9810 }, { "epoch": 0.0923764705882353, "grad_norm": 0.7013638197257069, "learning_rate": 6.582238298244505e-06, "loss": 0.03959997892379761, "step": 9815 }, { "epoch": 0.09242352941176471, "grad_norm": 0.7554259446954383, "learning_rate": 6.580562191599662e-06, "loss": 0.04655928909778595, "step": 9820 }, { "epoch": 0.09247058823529412, "grad_norm": 0.6562340968169538, "learning_rate": 6.5788873647186525e-06, "loss": 0.03258412480354309, "step": 9825 }, { "epoch": 0.09251764705882352, "grad_norm": 0.8176979946743304, "learning_rate": 6.577213815973731e-06, "loss": 0.03646551668643951, "step": 9830 }, { "epoch": 0.09256470588235294, "grad_norm": 0.7446346195021, "learning_rate": 6.5755415437400514e-06, "loss": 0.03596840500831604, "step": 9835 }, { "epoch": 0.09261176470588235, "grad_norm": 0.9910370981988199, "learning_rate": 6.573870546395661e-06, "loss": 0.04423884451389313, "step": 9840 }, { "epoch": 0.09265882352941177, "grad_norm": 0.9104955904401604, "learning_rate": 6.572200822321485e-06, "loss": 0.038975882530212405, "step": 9845 }, { "epoch": 0.09270588235294118, "grad_norm": 0.7878995754202056, "learning_rate": 6.570532369901332e-06, "loss": 0.041908839344978334, "step": 9850 }, { "epoch": 0.0927529411764706, "grad_norm": 0.6470021123681403, "learning_rate": 6.568865187521876e-06, "loss": 0.032102805376052854, "step": 9855 }, { "epoch": 0.0928, "grad_norm": 0.7964550369679703, "learning_rate": 6.56719927357266e-06, "loss": 0.04226946234703064, "step": 9860 }, { "epoch": 0.09284705882352941, "grad_norm": 0.8030323344508152, "learning_rate": 6.56553462644608e-06, "loss": 0.04654750525951386, "step": 9865 }, { "epoch": 0.09289411764705882, "grad_norm": 0.8057976172659096, "learning_rate": 6.563871244537383e-06, "loss": 0.04449990391731262, "step": 9870 }, { "epoch": 0.09294117647058824, "grad_norm": 0.9016875471076806, "learning_rate": 6.562209126244665e-06, "loss": 0.040350842475891116, "step": 9875 }, { "epoch": 0.09298823529411765, "grad_norm": 0.7575312107146415, "learning_rate": 6.560548269968857e-06, "loss": 0.03920877277851105, "step": 9880 }, { "epoch": 0.09303529411764706, "grad_norm": 0.8624840410340789, "learning_rate": 6.558888674113721e-06, "loss": 0.04508686661720276, "step": 9885 }, { "epoch": 0.09308235294117648, "grad_norm": 0.7032049963074335, "learning_rate": 6.557230337085846e-06, "loss": 0.04248293936252594, "step": 9890 }, { "epoch": 0.09312941176470588, "grad_norm": 1.3129460708508154, "learning_rate": 6.555573257294637e-06, "loss": 0.04990506172180176, "step": 9895 }, { "epoch": 0.09317647058823529, "grad_norm": 0.8384858158772225, "learning_rate": 6.553917433152316e-06, "loss": 0.03619279265403748, "step": 9900 }, { "epoch": 0.0932235294117647, "grad_norm": 0.7242641947602766, "learning_rate": 6.552262863073906e-06, "loss": 0.03520618081092834, "step": 9905 }, { "epoch": 0.09327058823529412, "grad_norm": 0.5979470603928305, "learning_rate": 6.550609545477231e-06, "loss": 0.034505003690719606, "step": 9910 }, { "epoch": 0.09331764705882353, "grad_norm": 1.1389579059037112, "learning_rate": 6.5489574787829105e-06, "loss": 0.0361982136964798, "step": 9915 }, { "epoch": 0.09336470588235295, "grad_norm": 0.8763375439188409, "learning_rate": 6.547306661414347e-06, "loss": 0.04692710041999817, "step": 9920 }, { "epoch": 0.09341176470588235, "grad_norm": 0.8405153356756765, "learning_rate": 6.545657091797725e-06, "loss": 0.03389594554901123, "step": 9925 }, { "epoch": 0.09345882352941176, "grad_norm": 0.5570429737048709, "learning_rate": 6.544008768362007e-06, "loss": 0.04015337526798248, "step": 9930 }, { "epoch": 0.09350588235294117, "grad_norm": 0.7752475842558235, "learning_rate": 6.5423616895389184e-06, "loss": 0.04884015321731568, "step": 9935 }, { "epoch": 0.09355294117647059, "grad_norm": 0.6357586724860669, "learning_rate": 6.540715853762948e-06, "loss": 0.035580316185951234, "step": 9940 }, { "epoch": 0.0936, "grad_norm": 0.6145209495931102, "learning_rate": 6.53907125947134e-06, "loss": 0.04368492364883423, "step": 9945 }, { "epoch": 0.09364705882352942, "grad_norm": 0.7049686955528346, "learning_rate": 6.537427905104088e-06, "loss": 0.038032883405685426, "step": 9950 }, { "epoch": 0.09369411764705882, "grad_norm": 0.7923646141407397, "learning_rate": 6.535785789103928e-06, "loss": 0.03964385092258453, "step": 9955 }, { "epoch": 0.09374117647058823, "grad_norm": 0.9570333538720407, "learning_rate": 6.534144909916334e-06, "loss": 0.035998481512069705, "step": 9960 }, { "epoch": 0.09378823529411764, "grad_norm": 0.5445401546220026, "learning_rate": 6.532505265989509e-06, "loss": 0.030014508962631227, "step": 9965 }, { "epoch": 0.09383529411764706, "grad_norm": 0.6885186252366956, "learning_rate": 6.5308668557743806e-06, "loss": 0.04385541677474976, "step": 9970 }, { "epoch": 0.09388235294117647, "grad_norm": 0.6056364453259546, "learning_rate": 6.529229677724598e-06, "loss": 0.036984652280807495, "step": 9975 }, { "epoch": 0.09392941176470589, "grad_norm": 0.732383185199857, "learning_rate": 6.527593730296517e-06, "loss": 0.038866478204727176, "step": 9980 }, { "epoch": 0.0939764705882353, "grad_norm": 0.7420359862127975, "learning_rate": 6.525959011949204e-06, "loss": 0.035238015651702884, "step": 9985 }, { "epoch": 0.0940235294117647, "grad_norm": 0.5973404337141923, "learning_rate": 6.5243255211444234e-06, "loss": 0.040208077430725096, "step": 9990 }, { "epoch": 0.09407058823529411, "grad_norm": 0.9922625725216969, "learning_rate": 6.522693256346634e-06, "loss": 0.04753125309944153, "step": 9995 }, { "epoch": 0.09411764705882353, "grad_norm": 0.6448837997858629, "learning_rate": 6.5210622160229794e-06, "loss": 0.03799945116043091, "step": 10000 }, { "epoch": 0.09416470588235294, "grad_norm": 0.9467024237812431, "learning_rate": 6.519432398643291e-06, "loss": 0.039738011360168454, "step": 10005 }, { "epoch": 0.09421176470588236, "grad_norm": 0.8013266333368716, "learning_rate": 6.5178038026800714e-06, "loss": 0.03284009695053101, "step": 10010 }, { "epoch": 0.09425882352941177, "grad_norm": 0.8773729212474816, "learning_rate": 6.516176426608495e-06, "loss": 0.03825916051864624, "step": 10015 }, { "epoch": 0.09430588235294117, "grad_norm": 0.6013590366994167, "learning_rate": 6.514550268906396e-06, "loss": 0.038253337144851685, "step": 10020 }, { "epoch": 0.09435294117647058, "grad_norm": 0.9038244532019379, "learning_rate": 6.512925328054272e-06, "loss": 0.038355854153633115, "step": 10025 }, { "epoch": 0.0944, "grad_norm": 0.7668709197917912, "learning_rate": 6.511301602535272e-06, "loss": 0.03691934943199158, "step": 10030 }, { "epoch": 0.09444705882352941, "grad_norm": 0.5555278859452568, "learning_rate": 6.509679090835184e-06, "loss": 0.03683922290802002, "step": 10035 }, { "epoch": 0.09449411764705883, "grad_norm": 0.6914390781904872, "learning_rate": 6.50805779144244e-06, "loss": 0.03719070553779602, "step": 10040 }, { "epoch": 0.09454117647058824, "grad_norm": 0.6882758524600346, "learning_rate": 6.506437702848112e-06, "loss": 0.040488255023956296, "step": 10045 }, { "epoch": 0.09458823529411765, "grad_norm": 0.8835044702708554, "learning_rate": 6.50481882354589e-06, "loss": 0.04159972667694092, "step": 10050 }, { "epoch": 0.09463529411764705, "grad_norm": 1.9504139736194805, "learning_rate": 6.503201152032092e-06, "loss": 0.03181360363960266, "step": 10055 }, { "epoch": 0.09468235294117647, "grad_norm": 0.8122325825251133, "learning_rate": 6.501584686805652e-06, "loss": 0.0350442111492157, "step": 10060 }, { "epoch": 0.09472941176470588, "grad_norm": 0.47735173740261355, "learning_rate": 6.4999694263681115e-06, "loss": 0.03140139579772949, "step": 10065 }, { "epoch": 0.0947764705882353, "grad_norm": 0.5349098616568319, "learning_rate": 6.4983553692236215e-06, "loss": 0.03871364891529083, "step": 10070 }, { "epoch": 0.09482352941176471, "grad_norm": 0.7348281363199893, "learning_rate": 6.496742513878925e-06, "loss": 0.03827386200428009, "step": 10075 }, { "epoch": 0.09487058823529412, "grad_norm": 0.5747056915481955, "learning_rate": 6.495130858843366e-06, "loss": 0.03803354203701019, "step": 10080 }, { "epoch": 0.09491764705882352, "grad_norm": 0.6940702376665916, "learning_rate": 6.49352040262887e-06, "loss": 0.03302556276321411, "step": 10085 }, { "epoch": 0.09496470588235294, "grad_norm": 0.9358561672934651, "learning_rate": 6.491911143749946e-06, "loss": 0.04185345470905304, "step": 10090 }, { "epoch": 0.09501176470588235, "grad_norm": 0.8273740708585048, "learning_rate": 6.49030308072368e-06, "loss": 0.041125887632369997, "step": 10095 }, { "epoch": 0.09505882352941177, "grad_norm": 0.6537745248092972, "learning_rate": 6.488696212069724e-06, "loss": 0.0351625919342041, "step": 10100 }, { "epoch": 0.09510588235294118, "grad_norm": 0.6209954371449661, "learning_rate": 6.4870905363102975e-06, "loss": 0.03756606578826904, "step": 10105 }, { "epoch": 0.09515294117647059, "grad_norm": 0.9050748600277939, "learning_rate": 6.485486051970179e-06, "loss": 0.03499358892440796, "step": 10110 }, { "epoch": 0.0952, "grad_norm": 0.92401747107433, "learning_rate": 6.483882757576697e-06, "loss": 0.042552840709686277, "step": 10115 }, { "epoch": 0.0952470588235294, "grad_norm": 0.6816349045594028, "learning_rate": 6.482280651659731e-06, "loss": 0.04391119182109833, "step": 10120 }, { "epoch": 0.09529411764705882, "grad_norm": 0.6160319219931961, "learning_rate": 6.480679732751698e-06, "loss": 0.035510790348052976, "step": 10125 }, { "epoch": 0.09534117647058823, "grad_norm": 0.7719402758301102, "learning_rate": 6.479079999387554e-06, "loss": 0.03348591923713684, "step": 10130 }, { "epoch": 0.09538823529411765, "grad_norm": 0.9369925377088476, "learning_rate": 6.477481450104782e-06, "loss": 0.04139852523803711, "step": 10135 }, { "epoch": 0.09543529411764706, "grad_norm": 0.9280417751131653, "learning_rate": 6.475884083443393e-06, "loss": 0.0362396776676178, "step": 10140 }, { "epoch": 0.09548235294117648, "grad_norm": 0.935511489553632, "learning_rate": 6.474287897945918e-06, "loss": 0.03193454742431641, "step": 10145 }, { "epoch": 0.09552941176470588, "grad_norm": 1.0732812841162818, "learning_rate": 6.472692892157394e-06, "loss": 0.0393180251121521, "step": 10150 }, { "epoch": 0.09557647058823529, "grad_norm": 0.7948894299503827, "learning_rate": 6.471099064625374e-06, "loss": 0.03802662491798401, "step": 10155 }, { "epoch": 0.0956235294117647, "grad_norm": 0.8069509136559195, "learning_rate": 6.469506413899911e-06, "loss": 0.032450780272483826, "step": 10160 }, { "epoch": 0.09567058823529412, "grad_norm": 1.0647969368035621, "learning_rate": 6.467914938533551e-06, "loss": 0.047362920641899106, "step": 10165 }, { "epoch": 0.09571764705882353, "grad_norm": 0.8999016571928088, "learning_rate": 6.466324637081337e-06, "loss": 0.031836897134780884, "step": 10170 }, { "epoch": 0.09576470588235295, "grad_norm": 0.7394006512414802, "learning_rate": 6.464735508100794e-06, "loss": 0.04354014992713928, "step": 10175 }, { "epoch": 0.09581176470588235, "grad_norm": 0.9837262572991698, "learning_rate": 6.463147550151929e-06, "loss": 0.04250353872776032, "step": 10180 }, { "epoch": 0.09585882352941176, "grad_norm": 0.7852530219299164, "learning_rate": 6.461560761797222e-06, "loss": 0.04403408765792847, "step": 10185 }, { "epoch": 0.09590588235294117, "grad_norm": 0.7067471890782836, "learning_rate": 6.459975141601623e-06, "loss": 0.03358942866325378, "step": 10190 }, { "epoch": 0.09595294117647059, "grad_norm": 1.0965779872576085, "learning_rate": 6.458390688132548e-06, "loss": 0.039904934167861936, "step": 10195 }, { "epoch": 0.096, "grad_norm": 0.7043612436664036, "learning_rate": 6.456807399959867e-06, "loss": 0.037634140253067015, "step": 10200 }, { "epoch": 0.09604705882352942, "grad_norm": 0.632495154646955, "learning_rate": 6.4552252756559075e-06, "loss": 0.043210798501968385, "step": 10205 }, { "epoch": 0.09609411764705883, "grad_norm": 0.6809372653494937, "learning_rate": 6.453644313795441e-06, "loss": 0.041280022263526915, "step": 10210 }, { "epoch": 0.09614117647058823, "grad_norm": 1.1254343362075456, "learning_rate": 6.452064512955686e-06, "loss": 0.03330950736999512, "step": 10215 }, { "epoch": 0.09618823529411764, "grad_norm": 0.8889789999431352, "learning_rate": 6.450485871716291e-06, "loss": 0.037585845589637755, "step": 10220 }, { "epoch": 0.09623529411764706, "grad_norm": 0.752127122541522, "learning_rate": 6.448908388659342e-06, "loss": 0.04264208376407623, "step": 10225 }, { "epoch": 0.09628235294117647, "grad_norm": 0.7109427948853875, "learning_rate": 6.447332062369347e-06, "loss": 0.03656031787395477, "step": 10230 }, { "epoch": 0.09632941176470589, "grad_norm": 0.7982060429709569, "learning_rate": 6.445756891433237e-06, "loss": 0.03473211526870727, "step": 10235 }, { "epoch": 0.0963764705882353, "grad_norm": 0.5676504475815077, "learning_rate": 6.444182874440359e-06, "loss": 0.0433110386133194, "step": 10240 }, { "epoch": 0.0964235294117647, "grad_norm": 0.7299692214644872, "learning_rate": 6.442610009982468e-06, "loss": 0.0346408486366272, "step": 10245 }, { "epoch": 0.09647058823529411, "grad_norm": 0.7555704530183165, "learning_rate": 6.441038296653723e-06, "loss": 0.0402583122253418, "step": 10250 }, { "epoch": 0.09651764705882353, "grad_norm": 0.6909283476920396, "learning_rate": 6.4394677330506895e-06, "loss": 0.03628941178321839, "step": 10255 }, { "epoch": 0.09656470588235294, "grad_norm": 1.076477185070381, "learning_rate": 6.437898317772317e-06, "loss": 0.0438043475151062, "step": 10260 }, { "epoch": 0.09661176470588236, "grad_norm": 0.6430539555255792, "learning_rate": 6.436330049419954e-06, "loss": 0.04135508835315704, "step": 10265 }, { "epoch": 0.09665882352941177, "grad_norm": 0.7093143557237266, "learning_rate": 6.434762926597324e-06, "loss": 0.04437861442565918, "step": 10270 }, { "epoch": 0.09670588235294118, "grad_norm": 0.9321364800026826, "learning_rate": 6.433196947910535e-06, "loss": 0.040146276354789734, "step": 10275 }, { "epoch": 0.09675294117647058, "grad_norm": 0.582818465600026, "learning_rate": 6.431632111968067e-06, "loss": 0.03368658721446991, "step": 10280 }, { "epoch": 0.0968, "grad_norm": 0.6835350137579658, "learning_rate": 6.430068417380766e-06, "loss": 0.037766766548156736, "step": 10285 }, { "epoch": 0.09684705882352941, "grad_norm": 0.5811283020663243, "learning_rate": 6.428505862761846e-06, "loss": 0.03463187515735626, "step": 10290 }, { "epoch": 0.09689411764705883, "grad_norm": 0.8030061435616713, "learning_rate": 6.426944446726874e-06, "loss": 0.03803182542324066, "step": 10295 }, { "epoch": 0.09694117647058824, "grad_norm": 0.6959093928288129, "learning_rate": 6.42538416789377e-06, "loss": 0.031772667169570924, "step": 10300 }, { "epoch": 0.09698823529411765, "grad_norm": 0.7492421542157465, "learning_rate": 6.423825024882805e-06, "loss": 0.03492077589035034, "step": 10305 }, { "epoch": 0.09703529411764705, "grad_norm": 0.8330542163402688, "learning_rate": 6.42226701631659e-06, "loss": 0.036852025985717775, "step": 10310 }, { "epoch": 0.09708235294117647, "grad_norm": 0.8759929544797955, "learning_rate": 6.420710140820074e-06, "loss": 0.03547675013542175, "step": 10315 }, { "epoch": 0.09712941176470588, "grad_norm": 0.7196564691194717, "learning_rate": 6.419154397020535e-06, "loss": 0.03976379036903381, "step": 10320 }, { "epoch": 0.0971764705882353, "grad_norm": 1.3883419457659882, "learning_rate": 6.417599783547585e-06, "loss": 0.04409765601158142, "step": 10325 }, { "epoch": 0.09722352941176471, "grad_norm": 1.0222324543578671, "learning_rate": 6.416046299033152e-06, "loss": 0.039807742834091185, "step": 10330 }, { "epoch": 0.09727058823529412, "grad_norm": 0.7046953069488483, "learning_rate": 6.414493942111485e-06, "loss": 0.044257867336273196, "step": 10335 }, { "epoch": 0.09731764705882352, "grad_norm": 0.6741408849765008, "learning_rate": 6.412942711419144e-06, "loss": 0.03657228350639343, "step": 10340 }, { "epoch": 0.09736470588235294, "grad_norm": 0.8616555522442505, "learning_rate": 6.41139260559499e-06, "loss": 0.041537144780159, "step": 10345 }, { "epoch": 0.09741176470588235, "grad_norm": 0.7030090371708677, "learning_rate": 6.409843623280196e-06, "loss": 0.03740726411342621, "step": 10350 }, { "epoch": 0.09745882352941176, "grad_norm": 0.5966713154433765, "learning_rate": 6.408295763118226e-06, "loss": 0.03635180294513703, "step": 10355 }, { "epoch": 0.09750588235294118, "grad_norm": 0.5192261212903931, "learning_rate": 6.406749023754836e-06, "loss": 0.03324366211891174, "step": 10360 }, { "epoch": 0.09755294117647059, "grad_norm": 0.7650572332328827, "learning_rate": 6.4052034038380695e-06, "loss": 0.03208202123641968, "step": 10365 }, { "epoch": 0.0976, "grad_norm": 0.628394002351013, "learning_rate": 6.403658902018254e-06, "loss": 0.032799118757247926, "step": 10370 }, { "epoch": 0.0976470588235294, "grad_norm": 0.8888403604041535, "learning_rate": 6.402115516947992e-06, "loss": 0.041652911901474, "step": 10375 }, { "epoch": 0.09769411764705882, "grad_norm": 0.6409307833317974, "learning_rate": 6.400573247282158e-06, "loss": 0.037189054489135745, "step": 10380 }, { "epoch": 0.09774117647058823, "grad_norm": 0.6483785439184079, "learning_rate": 6.399032091677896e-06, "loss": 0.03512178063392639, "step": 10385 }, { "epoch": 0.09778823529411765, "grad_norm": 0.8385157719671262, "learning_rate": 6.397492048794608e-06, "loss": 0.04030711352825165, "step": 10390 }, { "epoch": 0.09783529411764706, "grad_norm": 0.8752791353462961, "learning_rate": 6.395953117293957e-06, "loss": 0.039134344458580016, "step": 10395 }, { "epoch": 0.09788235294117648, "grad_norm": 0.7694421275764268, "learning_rate": 6.394415295839859e-06, "loss": 0.04004046320915222, "step": 10400 }, { "epoch": 0.09792941176470588, "grad_norm": 0.6370334111433695, "learning_rate": 6.3928785830984745e-06, "loss": 0.04051439762115479, "step": 10405 }, { "epoch": 0.09797647058823529, "grad_norm": 0.8544142384708698, "learning_rate": 6.391342977738207e-06, "loss": 0.0349460244178772, "step": 10410 }, { "epoch": 0.0980235294117647, "grad_norm": 0.7535515459202413, "learning_rate": 6.389808478429702e-06, "loss": 0.04008095860481262, "step": 10415 }, { "epoch": 0.09807058823529412, "grad_norm": 0.6416866599511393, "learning_rate": 6.388275083845834e-06, "loss": 0.043359375, "step": 10420 }, { "epoch": 0.09811764705882353, "grad_norm": 0.9136049286605671, "learning_rate": 6.3867427926617085e-06, "loss": 0.038709187507629396, "step": 10425 }, { "epoch": 0.09816470588235295, "grad_norm": 0.7255924986412299, "learning_rate": 6.385211603554652e-06, "loss": 0.035410791635513306, "step": 10430 }, { "epoch": 0.09821176470588236, "grad_norm": 0.5847040138300197, "learning_rate": 6.383681515204209e-06, "loss": 0.03595660924911499, "step": 10435 }, { "epoch": 0.09825882352941176, "grad_norm": 0.8985591833992879, "learning_rate": 6.382152526292142e-06, "loss": 0.041501641273498535, "step": 10440 }, { "epoch": 0.09830588235294117, "grad_norm": 0.6086071335455718, "learning_rate": 6.38062463550242e-06, "loss": 0.03668221235275269, "step": 10445 }, { "epoch": 0.09835294117647059, "grad_norm": 0.674035891339714, "learning_rate": 6.3790978415212155e-06, "loss": 0.04182220101356506, "step": 10450 }, { "epoch": 0.0984, "grad_norm": 1.1828152221958022, "learning_rate": 6.377572143036904e-06, "loss": 0.036453771591186526, "step": 10455 }, { "epoch": 0.09844705882352942, "grad_norm": 0.8298778039413407, "learning_rate": 6.3760475387400526e-06, "loss": 0.0382098376750946, "step": 10460 }, { "epoch": 0.09849411764705883, "grad_norm": 0.6809101299145798, "learning_rate": 6.374524027323416e-06, "loss": 0.03331720232963562, "step": 10465 }, { "epoch": 0.09854117647058823, "grad_norm": 0.7333147211220572, "learning_rate": 6.373001607481943e-06, "loss": 0.035138997435569766, "step": 10470 }, { "epoch": 0.09858823529411764, "grad_norm": 0.6494485054369605, "learning_rate": 6.371480277912756e-06, "loss": 0.03355609178543091, "step": 10475 }, { "epoch": 0.09863529411764706, "grad_norm": 0.7005424693329433, "learning_rate": 6.3699600373151545e-06, "loss": 0.036088031530380246, "step": 10480 }, { "epoch": 0.09868235294117647, "grad_norm": 0.7060208441327105, "learning_rate": 6.368440884390612e-06, "loss": 0.03385041356086731, "step": 10485 }, { "epoch": 0.09872941176470588, "grad_norm": 0.6266139297388515, "learning_rate": 6.366922817842766e-06, "loss": 0.035991355776786804, "step": 10490 }, { "epoch": 0.0987764705882353, "grad_norm": 0.4157434641802742, "learning_rate": 6.365405836377417e-06, "loss": 0.03198578059673309, "step": 10495 }, { "epoch": 0.0988235294117647, "grad_norm": 0.7287279798102141, "learning_rate": 6.363889938702526e-06, "loss": 0.03526226282119751, "step": 10500 }, { "epoch": 0.09887058823529411, "grad_norm": 0.466904953570618, "learning_rate": 6.3623751235282e-06, "loss": 0.030703026056289672, "step": 10505 }, { "epoch": 0.09891764705882353, "grad_norm": 0.7820366707763681, "learning_rate": 6.3608613895667014e-06, "loss": 0.03540288507938385, "step": 10510 }, { "epoch": 0.09896470588235294, "grad_norm": 0.5707462367220588, "learning_rate": 6.359348735532433e-06, "loss": 0.037659245729446414, "step": 10515 }, { "epoch": 0.09901176470588235, "grad_norm": 1.1125443612416963, "learning_rate": 6.357837160141934e-06, "loss": 0.04414454698562622, "step": 10520 }, { "epoch": 0.09905882352941177, "grad_norm": 0.5528835806509299, "learning_rate": 6.356326662113883e-06, "loss": 0.03499376177787781, "step": 10525 }, { "epoch": 0.09910588235294118, "grad_norm": 0.7235785187152445, "learning_rate": 6.354817240169085e-06, "loss": 0.037611651420593264, "step": 10530 }, { "epoch": 0.09915294117647058, "grad_norm": 0.8512916798312373, "learning_rate": 6.353308893030475e-06, "loss": 0.040208691358566286, "step": 10535 }, { "epoch": 0.0992, "grad_norm": 0.5043161485597134, "learning_rate": 6.351801619423101e-06, "loss": 0.03515550494194031, "step": 10540 }, { "epoch": 0.09924705882352941, "grad_norm": 0.6551428537326612, "learning_rate": 6.350295418074136e-06, "loss": 0.03412358164787292, "step": 10545 }, { "epoch": 0.09929411764705882, "grad_norm": 0.6122574440218894, "learning_rate": 6.348790287712857e-06, "loss": 0.032509329915046695, "step": 10550 }, { "epoch": 0.09934117647058824, "grad_norm": 1.24164233691571, "learning_rate": 6.347286227070655e-06, "loss": 0.04274335503578186, "step": 10555 }, { "epoch": 0.09938823529411765, "grad_norm": 0.6716989956491286, "learning_rate": 6.345783234881017e-06, "loss": 0.038126197457313535, "step": 10560 }, { "epoch": 0.09943529411764705, "grad_norm": 0.8663400802221247, "learning_rate": 6.3442813098795355e-06, "loss": 0.0465313196182251, "step": 10565 }, { "epoch": 0.09948235294117647, "grad_norm": 0.48071056988295136, "learning_rate": 6.3427804508038915e-06, "loss": 0.0295600950717926, "step": 10570 }, { "epoch": 0.09952941176470588, "grad_norm": 0.5917260311742054, "learning_rate": 6.3412806563938575e-06, "loss": 0.03346183300018311, "step": 10575 }, { "epoch": 0.0995764705882353, "grad_norm": 0.7647161296463678, "learning_rate": 6.3397819253912905e-06, "loss": 0.033443087339401247, "step": 10580 }, { "epoch": 0.09962352941176471, "grad_norm": 0.6108859074715306, "learning_rate": 6.33828425654013e-06, "loss": 0.03519887328147888, "step": 10585 }, { "epoch": 0.09967058823529412, "grad_norm": 0.7269545426777175, "learning_rate": 6.3367876485863875e-06, "loss": 0.03603391051292419, "step": 10590 }, { "epoch": 0.09971764705882354, "grad_norm": 0.6748098857814933, "learning_rate": 6.3352921002781485e-06, "loss": 0.03898308575153351, "step": 10595 }, { "epoch": 0.09976470588235294, "grad_norm": 1.2712659867267226, "learning_rate": 6.333797610365567e-06, "loss": 0.04520304203033447, "step": 10600 }, { "epoch": 0.09981176470588235, "grad_norm": 0.7640040706543276, "learning_rate": 6.332304177600858e-06, "loss": 0.03629133701324463, "step": 10605 }, { "epoch": 0.09985882352941176, "grad_norm": 0.7393474071763324, "learning_rate": 6.330811800738294e-06, "loss": 0.03470860123634338, "step": 10610 }, { "epoch": 0.09990588235294118, "grad_norm": 0.5534383297379575, "learning_rate": 6.3293204785342055e-06, "loss": 0.02658933401107788, "step": 10615 }, { "epoch": 0.09995294117647059, "grad_norm": 0.6644103517613223, "learning_rate": 6.32783020974697e-06, "loss": 0.03653046488761902, "step": 10620 }, { "epoch": 0.1, "grad_norm": 0.6072949807769613, "learning_rate": 6.326340993137011e-06, "loss": 0.03381769061088562, "step": 10625 }, { "epoch": 0.1000470588235294, "grad_norm": 1.0005255490860767, "learning_rate": 6.324852827466793e-06, "loss": 0.03747303485870361, "step": 10630 }, { "epoch": 0.10009411764705882, "grad_norm": 0.660872560807158, "learning_rate": 6.323365711500818e-06, "loss": 0.03454044759273529, "step": 10635 }, { "epoch": 0.10014117647058823, "grad_norm": 0.5659906254708214, "learning_rate": 6.32187964400562e-06, "loss": 0.0339982271194458, "step": 10640 }, { "epoch": 0.10018823529411765, "grad_norm": 0.7456669265766525, "learning_rate": 6.320394623749758e-06, "loss": 0.042403262853622434, "step": 10645 }, { "epoch": 0.10023529411764706, "grad_norm": 0.6280415448098117, "learning_rate": 6.3189106495038226e-06, "loss": 0.036436057090759276, "step": 10650 }, { "epoch": 0.10028235294117648, "grad_norm": 0.5663346768705375, "learning_rate": 6.317427720040417e-06, "loss": 0.026539376378059386, "step": 10655 }, { "epoch": 0.10032941176470589, "grad_norm": 0.7308681807597233, "learning_rate": 6.3159458341341615e-06, "loss": 0.03025054931640625, "step": 10660 }, { "epoch": 0.10037647058823529, "grad_norm": 0.9774704801811381, "learning_rate": 6.314464990561692e-06, "loss": 0.03193170428276062, "step": 10665 }, { "epoch": 0.1004235294117647, "grad_norm": 0.7933457612103418, "learning_rate": 6.3129851881016435e-06, "loss": 0.029906964302062987, "step": 10670 }, { "epoch": 0.10047058823529412, "grad_norm": 0.9681500419771792, "learning_rate": 6.31150642553466e-06, "loss": 0.040733087062835696, "step": 10675 }, { "epoch": 0.10051764705882353, "grad_norm": 0.7572546334756445, "learning_rate": 6.310028701643381e-06, "loss": 0.0374875009059906, "step": 10680 }, { "epoch": 0.10056470588235294, "grad_norm": 0.6951832560798312, "learning_rate": 6.308552015212443e-06, "loss": 0.03801475763320923, "step": 10685 }, { "epoch": 0.10061176470588236, "grad_norm": 0.5621422262796778, "learning_rate": 6.307076365028466e-06, "loss": 0.035943815112113954, "step": 10690 }, { "epoch": 0.10065882352941176, "grad_norm": 0.707438329427044, "learning_rate": 6.3056017498800635e-06, "loss": 0.031980079412460324, "step": 10695 }, { "epoch": 0.10070588235294117, "grad_norm": 0.6654700604430532, "learning_rate": 6.304128168557827e-06, "loss": 0.03704928755760193, "step": 10700 }, { "epoch": 0.10075294117647059, "grad_norm": 0.9035662716541117, "learning_rate": 6.302655619854326e-06, "loss": 0.04571771025657654, "step": 10705 }, { "epoch": 0.1008, "grad_norm": 1.2423203680134012, "learning_rate": 6.301184102564103e-06, "loss": 0.03999128341674805, "step": 10710 }, { "epoch": 0.10084705882352941, "grad_norm": 0.7537582434090441, "learning_rate": 6.29971361548367e-06, "loss": 0.03789693713188171, "step": 10715 }, { "epoch": 0.10089411764705883, "grad_norm": 1.260290029368349, "learning_rate": 6.298244157411503e-06, "loss": 0.037118589878082274, "step": 10720 }, { "epoch": 0.10094117647058823, "grad_norm": 0.8258432381919668, "learning_rate": 6.296775727148042e-06, "loss": 0.045222118496894836, "step": 10725 }, { "epoch": 0.10098823529411764, "grad_norm": 0.7401150160835067, "learning_rate": 6.295308323495681e-06, "loss": 0.03599094152450562, "step": 10730 }, { "epoch": 0.10103529411764706, "grad_norm": 0.810017654459474, "learning_rate": 6.293841945258767e-06, "loss": 0.04112953245639801, "step": 10735 }, { "epoch": 0.10108235294117647, "grad_norm": 0.7551625110085424, "learning_rate": 6.292376591243595e-06, "loss": 0.03822368383407593, "step": 10740 }, { "epoch": 0.10112941176470588, "grad_norm": 1.0579030307712765, "learning_rate": 6.290912260258408e-06, "loss": 0.03550118505954743, "step": 10745 }, { "epoch": 0.1011764705882353, "grad_norm": 0.6915509133165838, "learning_rate": 6.289448951113387e-06, "loss": 0.03133663237094879, "step": 10750 }, { "epoch": 0.10122352941176471, "grad_norm": 1.0426020563527059, "learning_rate": 6.287986662620648e-06, "loss": 0.04368422031402588, "step": 10755 }, { "epoch": 0.10127058823529411, "grad_norm": 0.827908835785823, "learning_rate": 6.286525393594242e-06, "loss": 0.03659976422786713, "step": 10760 }, { "epoch": 0.10131764705882353, "grad_norm": 0.921297896990277, "learning_rate": 6.285065142850148e-06, "loss": 0.04410411119461059, "step": 10765 }, { "epoch": 0.10136470588235294, "grad_norm": 0.7002284185562628, "learning_rate": 6.2836059092062665e-06, "loss": 0.0319637656211853, "step": 10770 }, { "epoch": 0.10141176470588235, "grad_norm": 0.7505206398627123, "learning_rate": 6.282147691482422e-06, "loss": 0.03667289614677429, "step": 10775 }, { "epoch": 0.10145882352941177, "grad_norm": 0.9635830775668153, "learning_rate": 6.280690488500354e-06, "loss": 0.04243766665458679, "step": 10780 }, { "epoch": 0.10150588235294118, "grad_norm": 0.6753231933080742, "learning_rate": 6.279234299083713e-06, "loss": 0.036227256059646606, "step": 10785 }, { "epoch": 0.10155294117647058, "grad_norm": 0.44149509927925534, "learning_rate": 6.277779122058059e-06, "loss": 0.03357902765274048, "step": 10790 }, { "epoch": 0.1016, "grad_norm": 0.6554102030600876, "learning_rate": 6.2763249562508554e-06, "loss": 0.03286367654800415, "step": 10795 }, { "epoch": 0.10164705882352941, "grad_norm": 0.7399454957338402, "learning_rate": 6.274871800491468e-06, "loss": 0.03367577195167541, "step": 10800 }, { "epoch": 0.10169411764705882, "grad_norm": 0.7526919006243339, "learning_rate": 6.2734196536111546e-06, "loss": 0.034760981798172, "step": 10805 }, { "epoch": 0.10174117647058824, "grad_norm": 0.6672772509707735, "learning_rate": 6.271968514443072e-06, "loss": 0.03911662995815277, "step": 10810 }, { "epoch": 0.10178823529411765, "grad_norm": 0.7589283083566324, "learning_rate": 6.270518381822261e-06, "loss": 0.039448323845863345, "step": 10815 }, { "epoch": 0.10183529411764707, "grad_norm": 0.787551575416672, "learning_rate": 6.269069254585645e-06, "loss": 0.033416342735290525, "step": 10820 }, { "epoch": 0.10188235294117647, "grad_norm": 0.7876730992820977, "learning_rate": 6.267621131572035e-06, "loss": 0.030544370412826538, "step": 10825 }, { "epoch": 0.10192941176470588, "grad_norm": 0.5837143297119675, "learning_rate": 6.266174011622111e-06, "loss": 0.0365772545337677, "step": 10830 }, { "epoch": 0.1019764705882353, "grad_norm": 0.7310539206193852, "learning_rate": 6.264727893578433e-06, "loss": 0.04197899103164673, "step": 10835 }, { "epoch": 0.10202352941176471, "grad_norm": 0.7676145017818102, "learning_rate": 6.263282776285423e-06, "loss": 0.035654985904693605, "step": 10840 }, { "epoch": 0.10207058823529412, "grad_norm": 0.744832874506368, "learning_rate": 6.261838658589373e-06, "loss": 0.03183887004852295, "step": 10845 }, { "epoch": 0.10211764705882354, "grad_norm": 0.7298972364966507, "learning_rate": 6.260395539338436e-06, "loss": 0.04319084286689758, "step": 10850 }, { "epoch": 0.10216470588235294, "grad_norm": 0.7997836771635517, "learning_rate": 6.258953417382618e-06, "loss": 0.03318794369697571, "step": 10855 }, { "epoch": 0.10221176470588235, "grad_norm": 0.6307121094977625, "learning_rate": 6.257512291573783e-06, "loss": 0.03476710319519043, "step": 10860 }, { "epoch": 0.10225882352941176, "grad_norm": 0.5628699400727206, "learning_rate": 6.256072160765645e-06, "loss": 0.03584723472595215, "step": 10865 }, { "epoch": 0.10230588235294118, "grad_norm": 0.6945796406285735, "learning_rate": 6.25463302381376e-06, "loss": 0.03255473375320435, "step": 10870 }, { "epoch": 0.10235294117647059, "grad_norm": 0.9692316621127159, "learning_rate": 6.253194879575528e-06, "loss": 0.038294929265975955, "step": 10875 }, { "epoch": 0.1024, "grad_norm": 1.0221052007667024, "learning_rate": 6.25175772691019e-06, "loss": 0.036245429515838624, "step": 10880 }, { "epoch": 0.1024470588235294, "grad_norm": 0.6248312884316979, "learning_rate": 6.250321564678814e-06, "loss": 0.03232043981552124, "step": 10885 }, { "epoch": 0.10249411764705882, "grad_norm": 0.9924699155760822, "learning_rate": 6.248886391744306e-06, "loss": 0.037812459468841556, "step": 10890 }, { "epoch": 0.10254117647058823, "grad_norm": 0.66179354022944, "learning_rate": 6.247452206971398e-06, "loss": 0.040283769369125366, "step": 10895 }, { "epoch": 0.10258823529411765, "grad_norm": 0.7791251303110479, "learning_rate": 6.246019009226638e-06, "loss": 0.04164524972438812, "step": 10900 }, { "epoch": 0.10263529411764706, "grad_norm": 0.6838515209012737, "learning_rate": 6.244586797378403e-06, "loss": 0.031555598974227904, "step": 10905 }, { "epoch": 0.10268235294117647, "grad_norm": 0.636958353014561, "learning_rate": 6.24315557029688e-06, "loss": 0.030279505252838134, "step": 10910 }, { "epoch": 0.10272941176470589, "grad_norm": 0.7655809256962136, "learning_rate": 6.241725326854068e-06, "loss": 0.03125147223472595, "step": 10915 }, { "epoch": 0.10277647058823529, "grad_norm": 0.9485246005201174, "learning_rate": 6.240296065923776e-06, "loss": 0.03368169665336609, "step": 10920 }, { "epoch": 0.1028235294117647, "grad_norm": 0.7374696048460979, "learning_rate": 6.238867786381612e-06, "loss": 0.03272923827171326, "step": 10925 }, { "epoch": 0.10287058823529412, "grad_norm": 0.83160517784694, "learning_rate": 6.2374404871049955e-06, "loss": 0.041505861282348636, "step": 10930 }, { "epoch": 0.10291764705882353, "grad_norm": 0.5647764325191749, "learning_rate": 6.2360141669731325e-06, "loss": 0.034598708152770996, "step": 10935 }, { "epoch": 0.10296470588235294, "grad_norm": 0.7582987017773213, "learning_rate": 6.234588824867025e-06, "loss": 0.04478946030139923, "step": 10940 }, { "epoch": 0.10301176470588236, "grad_norm": 0.8520742228726153, "learning_rate": 6.233164459669466e-06, "loss": 0.04371407330036163, "step": 10945 }, { "epoch": 0.10305882352941176, "grad_norm": 1.0081986312420468, "learning_rate": 6.2317410702650376e-06, "loss": 0.03675359487533569, "step": 10950 }, { "epoch": 0.10310588235294117, "grad_norm": 0.6098604641792393, "learning_rate": 6.230318655540095e-06, "loss": 0.035893422365188596, "step": 10955 }, { "epoch": 0.10315294117647059, "grad_norm": 0.8144612887045316, "learning_rate": 6.228897214382781e-06, "loss": 0.031778478622436525, "step": 10960 }, { "epoch": 0.1032, "grad_norm": 1.08802231564564, "learning_rate": 6.2274767456830075e-06, "loss": 0.04590315818786621, "step": 10965 }, { "epoch": 0.10324705882352941, "grad_norm": 0.9456443979575783, "learning_rate": 6.226057248332461e-06, "loss": 0.04178647100925446, "step": 10970 }, { "epoch": 0.10329411764705883, "grad_norm": 0.5230545655863827, "learning_rate": 6.224638721224595e-06, "loss": 0.03459461331367493, "step": 10975 }, { "epoch": 0.10334117647058824, "grad_norm": 0.7492058940031051, "learning_rate": 6.223221163254622e-06, "loss": 0.04214600324630737, "step": 10980 }, { "epoch": 0.10338823529411764, "grad_norm": 0.672487082262591, "learning_rate": 6.221804573319524e-06, "loss": 0.0364873468875885, "step": 10985 }, { "epoch": 0.10343529411764706, "grad_norm": 0.7947989199602158, "learning_rate": 6.220388950318035e-06, "loss": 0.03951293230056763, "step": 10990 }, { "epoch": 0.10348235294117647, "grad_norm": 0.6744159308978019, "learning_rate": 6.218974293150638e-06, "loss": 0.04437777996063232, "step": 10995 }, { "epoch": 0.10352941176470588, "grad_norm": 0.659595910759858, "learning_rate": 6.217560600719573e-06, "loss": 0.034883683919906615, "step": 11000 }, { "epoch": 0.1035764705882353, "grad_norm": 0.8537245642209973, "learning_rate": 6.216147871928822e-06, "loss": 0.040554594993591306, "step": 11005 }, { "epoch": 0.10362352941176471, "grad_norm": 0.6173370016319149, "learning_rate": 6.214736105684108e-06, "loss": 0.0386712372303009, "step": 11010 }, { "epoch": 0.10367058823529411, "grad_norm": 0.7410957300740635, "learning_rate": 6.213325300892898e-06, "loss": 0.03937709927558899, "step": 11015 }, { "epoch": 0.10371764705882353, "grad_norm": 0.8852747153422758, "learning_rate": 6.21191545646439e-06, "loss": 0.03724826574325561, "step": 11020 }, { "epoch": 0.10376470588235294, "grad_norm": 0.755677030350616, "learning_rate": 6.210506571309515e-06, "loss": 0.03752951920032501, "step": 11025 }, { "epoch": 0.10381176470588235, "grad_norm": 0.6669306449606179, "learning_rate": 6.209098644340933e-06, "loss": 0.04159627854824066, "step": 11030 }, { "epoch": 0.10385882352941177, "grad_norm": 0.8299465950862269, "learning_rate": 6.207691674473027e-06, "loss": 0.03383664190769196, "step": 11035 }, { "epoch": 0.10390588235294118, "grad_norm": 0.6500962452754306, "learning_rate": 6.206285660621904e-06, "loss": 0.03672888278961182, "step": 11040 }, { "epoch": 0.10395294117647058, "grad_norm": 1.227826953710494, "learning_rate": 6.204880601705385e-06, "loss": 0.035595950484275815, "step": 11045 }, { "epoch": 0.104, "grad_norm": 0.6415619313951718, "learning_rate": 6.203476496643008e-06, "loss": 0.03331712484359741, "step": 11050 }, { "epoch": 0.10404705882352941, "grad_norm": 0.7086521839653803, "learning_rate": 6.2020733443560225e-06, "loss": 0.034480059146881105, "step": 11055 }, { "epoch": 0.10409411764705882, "grad_norm": 0.7930434532228171, "learning_rate": 6.20067114376738e-06, "loss": 0.03756978511810303, "step": 11060 }, { "epoch": 0.10414117647058824, "grad_norm": 0.5404323128732794, "learning_rate": 6.199269893801744e-06, "loss": 0.03371647596359253, "step": 11065 }, { "epoch": 0.10418823529411765, "grad_norm": 0.7343417996498567, "learning_rate": 6.197869593385469e-06, "loss": 0.04139329195022583, "step": 11070 }, { "epoch": 0.10423529411764706, "grad_norm": 0.758982205852326, "learning_rate": 6.196470241446615e-06, "loss": 0.036234050989151, "step": 11075 }, { "epoch": 0.10428235294117646, "grad_norm": 0.6340525194767543, "learning_rate": 6.1950718369149295e-06, "loss": 0.035271698236465455, "step": 11080 }, { "epoch": 0.10432941176470588, "grad_norm": 0.840017375790729, "learning_rate": 6.193674378721852e-06, "loss": 0.0352529913187027, "step": 11085 }, { "epoch": 0.10437647058823529, "grad_norm": 0.6304933651637832, "learning_rate": 6.192277865800508e-06, "loss": 0.03716529905796051, "step": 11090 }, { "epoch": 0.1044235294117647, "grad_norm": 0.884100311973619, "learning_rate": 6.190882297085709e-06, "loss": 0.04081965684890747, "step": 11095 }, { "epoch": 0.10447058823529412, "grad_norm": 0.9611712108669553, "learning_rate": 6.189487671513941e-06, "loss": 0.036471742391586306, "step": 11100 }, { "epoch": 0.10451764705882353, "grad_norm": 1.256325244136663, "learning_rate": 6.188093988023368e-06, "loss": 0.03214051723480225, "step": 11105 }, { "epoch": 0.10456470588235293, "grad_norm": 0.7523559492085999, "learning_rate": 6.186701245553831e-06, "loss": 0.03836609721183777, "step": 11110 }, { "epoch": 0.10461176470588235, "grad_norm": 0.758733137094644, "learning_rate": 6.185309443046839e-06, "loss": 0.04026780128479004, "step": 11115 }, { "epoch": 0.10465882352941176, "grad_norm": 0.8083923823816294, "learning_rate": 6.183918579445559e-06, "loss": 0.03295803666114807, "step": 11120 }, { "epoch": 0.10470588235294118, "grad_norm": 0.569504606262402, "learning_rate": 6.182528653694834e-06, "loss": 0.03938201665878296, "step": 11125 }, { "epoch": 0.10475294117647059, "grad_norm": 0.7088204982990522, "learning_rate": 6.1811396647411546e-06, "loss": 0.04634780585765839, "step": 11130 }, { "epoch": 0.1048, "grad_norm": 0.5526036743230456, "learning_rate": 6.1797516115326774e-06, "loss": 0.03398619294166565, "step": 11135 }, { "epoch": 0.10484705882352942, "grad_norm": 0.54966721995504, "learning_rate": 6.178364493019202e-06, "loss": 0.03639876246452332, "step": 11140 }, { "epoch": 0.10489411764705882, "grad_norm": 0.7427976693672417, "learning_rate": 6.176978308152185e-06, "loss": 0.03065875768661499, "step": 11145 }, { "epoch": 0.10494117647058823, "grad_norm": 0.8208531405783288, "learning_rate": 6.1755930558847265e-06, "loss": 0.03909181356430054, "step": 11150 }, { "epoch": 0.10498823529411765, "grad_norm": 0.7067802781986766, "learning_rate": 6.174208735171568e-06, "loss": 0.04000340402126312, "step": 11155 }, { "epoch": 0.10503529411764706, "grad_norm": 0.928643120566042, "learning_rate": 6.172825344969092e-06, "loss": 0.03866868615150452, "step": 11160 }, { "epoch": 0.10508235294117647, "grad_norm": 0.7142065701761133, "learning_rate": 6.171442884235317e-06, "loss": 0.03313310444355011, "step": 11165 }, { "epoch": 0.10512941176470589, "grad_norm": 0.8115760463071735, "learning_rate": 6.170061351929893e-06, "loss": 0.03814491033554077, "step": 11170 }, { "epoch": 0.10517647058823529, "grad_norm": 0.8106789184697943, "learning_rate": 6.168680747014099e-06, "loss": 0.03902616500854492, "step": 11175 }, { "epoch": 0.1052235294117647, "grad_norm": 0.777171581468864, "learning_rate": 6.167301068450845e-06, "loss": 0.04583904147148132, "step": 11180 }, { "epoch": 0.10527058823529412, "grad_norm": 1.0136176376624213, "learning_rate": 6.165922315204658e-06, "loss": 0.043354785442352294, "step": 11185 }, { "epoch": 0.10531764705882353, "grad_norm": 0.8704534249216455, "learning_rate": 6.164544486241689e-06, "loss": 0.04424891471862793, "step": 11190 }, { "epoch": 0.10536470588235294, "grad_norm": 0.6998024739952272, "learning_rate": 6.163167580529702e-06, "loss": 0.03643447160720825, "step": 11195 }, { "epoch": 0.10541176470588236, "grad_norm": 0.776698148167811, "learning_rate": 6.1617915970380775e-06, "loss": 0.029407685995101927, "step": 11200 }, { "epoch": 0.10545882352941177, "grad_norm": 0.6639225506279471, "learning_rate": 6.160416534737802e-06, "loss": 0.030787861347198485, "step": 11205 }, { "epoch": 0.10550588235294117, "grad_norm": 0.6617780555926479, "learning_rate": 6.159042392601473e-06, "loss": 0.033149552345275876, "step": 11210 }, { "epoch": 0.10555294117647059, "grad_norm": 0.6271766729948649, "learning_rate": 6.15766916960329e-06, "loss": 0.035883986949920656, "step": 11215 }, { "epoch": 0.1056, "grad_norm": 0.6834510706640896, "learning_rate": 6.15629686471905e-06, "loss": 0.03531291484832764, "step": 11220 }, { "epoch": 0.10564705882352941, "grad_norm": 0.8093413925705786, "learning_rate": 6.154925476926152e-06, "loss": 0.03937536776065827, "step": 11225 }, { "epoch": 0.10569411764705883, "grad_norm": 0.8447878203135657, "learning_rate": 6.153555005203584e-06, "loss": 0.03756617307662964, "step": 11230 }, { "epoch": 0.10574117647058824, "grad_norm": 0.5980141484831709, "learning_rate": 6.152185448531929e-06, "loss": 0.03625381886959076, "step": 11235 }, { "epoch": 0.10578823529411764, "grad_norm": 0.7486360093905489, "learning_rate": 6.150816805893354e-06, "loss": 0.033026814460754395, "step": 11240 }, { "epoch": 0.10583529411764706, "grad_norm": 0.5740097130486942, "learning_rate": 6.149449076271612e-06, "loss": 0.03269757628440857, "step": 11245 }, { "epoch": 0.10588235294117647, "grad_norm": 0.6048981666389694, "learning_rate": 6.148082258652037e-06, "loss": 0.03405157923698425, "step": 11250 }, { "epoch": 0.10592941176470588, "grad_norm": 0.6154743537979275, "learning_rate": 6.146716352021541e-06, "loss": 0.0340039074420929, "step": 11255 }, { "epoch": 0.1059764705882353, "grad_norm": 0.636967650053412, "learning_rate": 6.14535135536861e-06, "loss": 0.03411675691604614, "step": 11260 }, { "epoch": 0.10602352941176471, "grad_norm": 0.6482203486193645, "learning_rate": 6.143987267683304e-06, "loss": 0.035171645879745486, "step": 11265 }, { "epoch": 0.10607058823529411, "grad_norm": 0.7241743462877995, "learning_rate": 6.142624087957248e-06, "loss": 0.03935444355010986, "step": 11270 }, { "epoch": 0.10611764705882352, "grad_norm": 0.6333853965601728, "learning_rate": 6.141261815183636e-06, "loss": 0.03586332798004151, "step": 11275 }, { "epoch": 0.10616470588235294, "grad_norm": 0.6897971630163136, "learning_rate": 6.139900448357222e-06, "loss": 0.03486221432685852, "step": 11280 }, { "epoch": 0.10621176470588235, "grad_norm": 0.8058992640962743, "learning_rate": 6.138539986474319e-06, "loss": 0.039734429121017455, "step": 11285 }, { "epoch": 0.10625882352941177, "grad_norm": 0.7657042587989074, "learning_rate": 6.137180428532798e-06, "loss": 0.03362127542495728, "step": 11290 }, { "epoch": 0.10630588235294118, "grad_norm": 0.5981831056226552, "learning_rate": 6.135821773532082e-06, "loss": 0.033229750394821164, "step": 11295 }, { "epoch": 0.1063529411764706, "grad_norm": 0.5733109047027068, "learning_rate": 6.134464020473143e-06, "loss": 0.03421051502227783, "step": 11300 }, { "epoch": 0.1064, "grad_norm": 0.6222079381104714, "learning_rate": 6.133107168358501e-06, "loss": 0.03325878977775574, "step": 11305 }, { "epoch": 0.10644705882352941, "grad_norm": 0.8394219545398718, "learning_rate": 6.1317512161922176e-06, "loss": 0.03661830425262451, "step": 11310 }, { "epoch": 0.10649411764705882, "grad_norm": 0.7846584464067607, "learning_rate": 6.1303961629798994e-06, "loss": 0.03741724193096161, "step": 11315 }, { "epoch": 0.10654117647058824, "grad_norm": 0.7687868681542859, "learning_rate": 6.129042007728685e-06, "loss": 0.03645428717136383, "step": 11320 }, { "epoch": 0.10658823529411765, "grad_norm": 0.6367775309737044, "learning_rate": 6.127688749447253e-06, "loss": 0.028523042798042297, "step": 11325 }, { "epoch": 0.10663529411764706, "grad_norm": 0.7063452374359112, "learning_rate": 6.126336387145809e-06, "loss": 0.029324263334274292, "step": 11330 }, { "epoch": 0.10668235294117646, "grad_norm": 1.1439127831741476, "learning_rate": 6.124984919836091e-06, "loss": 0.03683438003063202, "step": 11335 }, { "epoch": 0.10672941176470588, "grad_norm": 0.711310986495162, "learning_rate": 6.123634346531358e-06, "loss": 0.035146871209144594, "step": 11340 }, { "epoch": 0.10677647058823529, "grad_norm": 0.7096742912374713, "learning_rate": 6.122284666246394e-06, "loss": 0.03301103413105011, "step": 11345 }, { "epoch": 0.1068235294117647, "grad_norm": 0.6303926703841481, "learning_rate": 6.120935877997502e-06, "loss": 0.03704249560832977, "step": 11350 }, { "epoch": 0.10687058823529412, "grad_norm": 0.6920838964560915, "learning_rate": 6.1195879808025026e-06, "loss": 0.03653289675712586, "step": 11355 }, { "epoch": 0.10691764705882353, "grad_norm": 0.694700872919977, "learning_rate": 6.118240973680728e-06, "loss": 0.04084494113922119, "step": 11360 }, { "epoch": 0.10696470588235295, "grad_norm": 0.6298044163992884, "learning_rate": 6.116894855653022e-06, "loss": 0.035307079553604126, "step": 11365 }, { "epoch": 0.10701176470588235, "grad_norm": 0.7977467753228791, "learning_rate": 6.115549625741732e-06, "loss": 0.04020196199417114, "step": 11370 }, { "epoch": 0.10705882352941176, "grad_norm": 0.7694604967684661, "learning_rate": 6.114205282970717e-06, "loss": 0.03476878702640533, "step": 11375 }, { "epoch": 0.10710588235294118, "grad_norm": 0.6243971355678084, "learning_rate": 6.112861826365329e-06, "loss": 0.03961245119571686, "step": 11380 }, { "epoch": 0.10715294117647059, "grad_norm": 0.4789570650538159, "learning_rate": 6.111519254952426e-06, "loss": 0.03278928399085999, "step": 11385 }, { "epoch": 0.1072, "grad_norm": 0.5971443403838529, "learning_rate": 6.110177567760356e-06, "loss": 0.033611106872558597, "step": 11390 }, { "epoch": 0.10724705882352942, "grad_norm": 0.8169224596950514, "learning_rate": 6.108836763818963e-06, "loss": 0.03628435730934143, "step": 11395 }, { "epoch": 0.10729411764705882, "grad_norm": 1.4812355500745304, "learning_rate": 6.107496842159578e-06, "loss": 0.03816576600074768, "step": 11400 }, { "epoch": 0.10734117647058823, "grad_norm": 0.9524534643216759, "learning_rate": 6.10615780181502e-06, "loss": 0.040394681692123416, "step": 11405 }, { "epoch": 0.10738823529411765, "grad_norm": 0.781469166313667, "learning_rate": 6.104819641819592e-06, "loss": 0.03471187353134155, "step": 11410 }, { "epoch": 0.10743529411764706, "grad_norm": 0.8389463896234475, "learning_rate": 6.103482361209077e-06, "loss": 0.034952801465988156, "step": 11415 }, { "epoch": 0.10748235294117647, "grad_norm": 0.5632202120703096, "learning_rate": 6.102145959020738e-06, "loss": 0.024661916494369506, "step": 11420 }, { "epoch": 0.10752941176470589, "grad_norm": 0.921038624694977, "learning_rate": 6.100810434293306e-06, "loss": 0.04140390157699585, "step": 11425 }, { "epoch": 0.10757647058823529, "grad_norm": 0.702777401762077, "learning_rate": 6.099475786066994e-06, "loss": 0.03401981592178345, "step": 11430 }, { "epoch": 0.1076235294117647, "grad_norm": 0.8005546540781723, "learning_rate": 6.098142013383477e-06, "loss": 0.04078429937362671, "step": 11435 }, { "epoch": 0.10767058823529412, "grad_norm": 0.6973105135223082, "learning_rate": 6.096809115285901e-06, "loss": 0.032088494300842284, "step": 11440 }, { "epoch": 0.10771764705882353, "grad_norm": 0.630265034884536, "learning_rate": 6.095477090818869e-06, "loss": 0.03262895047664642, "step": 11445 }, { "epoch": 0.10776470588235294, "grad_norm": 0.9618748473116996, "learning_rate": 6.094145939028451e-06, "loss": 0.03271580934524536, "step": 11450 }, { "epoch": 0.10781176470588236, "grad_norm": 1.0007427348489744, "learning_rate": 6.092815658962172e-06, "loss": 0.03618106245994568, "step": 11455 }, { "epoch": 0.10785882352941177, "grad_norm": 0.6163525118696784, "learning_rate": 6.091486249669011e-06, "loss": 0.031594157218933105, "step": 11460 }, { "epoch": 0.10790588235294117, "grad_norm": 0.6520014106198262, "learning_rate": 6.090157710199399e-06, "loss": 0.032428643107414244, "step": 11465 }, { "epoch": 0.10795294117647058, "grad_norm": 0.6137676012676772, "learning_rate": 6.088830039605218e-06, "loss": 0.03388186097145081, "step": 11470 }, { "epoch": 0.108, "grad_norm": 0.6898443919712032, "learning_rate": 6.087503236939796e-06, "loss": 0.038869670033454894, "step": 11475 }, { "epoch": 0.10804705882352941, "grad_norm": 0.7601578360127236, "learning_rate": 6.0861773012579e-06, "loss": 0.033452081680297854, "step": 11480 }, { "epoch": 0.10809411764705883, "grad_norm": 0.5479867655709811, "learning_rate": 6.084852231615743e-06, "loss": 0.03445451855659485, "step": 11485 }, { "epoch": 0.10814117647058824, "grad_norm": 0.5469359627336803, "learning_rate": 6.083528027070975e-06, "loss": 0.04221142828464508, "step": 11490 }, { "epoch": 0.10818823529411764, "grad_norm": 0.6721705735550878, "learning_rate": 6.082204686682677e-06, "loss": 0.03676363825798035, "step": 11495 }, { "epoch": 0.10823529411764705, "grad_norm": 1.1615496660531117, "learning_rate": 6.0808822095113655e-06, "loss": 0.036501955986022946, "step": 11500 }, { "epoch": 0.10828235294117647, "grad_norm": 0.6049151658648727, "learning_rate": 6.0795605946189865e-06, "loss": 0.0333378404378891, "step": 11505 }, { "epoch": 0.10832941176470588, "grad_norm": 0.6555003113617787, "learning_rate": 6.078239841068909e-06, "loss": 0.030457431077957155, "step": 11510 }, { "epoch": 0.1083764705882353, "grad_norm": 0.8160153804798699, "learning_rate": 6.07691994792593e-06, "loss": 0.03377025127410889, "step": 11515 }, { "epoch": 0.10842352941176471, "grad_norm": 0.6853013786056632, "learning_rate": 6.075600914256267e-06, "loss": 0.03359506130218506, "step": 11520 }, { "epoch": 0.10847058823529412, "grad_norm": 0.5481070763924236, "learning_rate": 6.07428273912755e-06, "loss": 0.0358665257692337, "step": 11525 }, { "epoch": 0.10851764705882352, "grad_norm": 0.5053078462054136, "learning_rate": 6.072965421608833e-06, "loss": 0.02734050154685974, "step": 11530 }, { "epoch": 0.10856470588235294, "grad_norm": 0.6942124784503956, "learning_rate": 6.071648960770577e-06, "loss": 0.03457595705986023, "step": 11535 }, { "epoch": 0.10861176470588235, "grad_norm": 0.629187949270944, "learning_rate": 6.070333355684654e-06, "loss": 0.03028825521469116, "step": 11540 }, { "epoch": 0.10865882352941177, "grad_norm": 0.8290065599112845, "learning_rate": 6.069018605424343e-06, "loss": 0.04451470077037811, "step": 11545 }, { "epoch": 0.10870588235294118, "grad_norm": 0.6637448466054786, "learning_rate": 6.067704709064329e-06, "loss": 0.03823853731155395, "step": 11550 }, { "epoch": 0.1087529411764706, "grad_norm": 1.0715722131952994, "learning_rate": 6.066391665680696e-06, "loss": 0.037737250328063965, "step": 11555 }, { "epoch": 0.1088, "grad_norm": 0.7170425276698532, "learning_rate": 6.065079474350928e-06, "loss": 0.035987406969070435, "step": 11560 }, { "epoch": 0.10884705882352941, "grad_norm": 0.5289186134554898, "learning_rate": 6.063768134153907e-06, "loss": 0.034706002473831175, "step": 11565 }, { "epoch": 0.10889411764705882, "grad_norm": 0.9832368638093308, "learning_rate": 6.062457644169906e-06, "loss": 0.04036756157875061, "step": 11570 }, { "epoch": 0.10894117647058824, "grad_norm": 0.8008627789327006, "learning_rate": 6.061148003480592e-06, "loss": 0.032676714658737185, "step": 11575 }, { "epoch": 0.10898823529411765, "grad_norm": 0.6904880280809487, "learning_rate": 6.059839211169016e-06, "loss": 0.04011877477169037, "step": 11580 }, { "epoch": 0.10903529411764706, "grad_norm": 0.8315995709338757, "learning_rate": 6.058531266319617e-06, "loss": 0.03426017165184021, "step": 11585 }, { "epoch": 0.10908235294117648, "grad_norm": 1.0432260889733704, "learning_rate": 6.057224168018215e-06, "loss": 0.03629041314125061, "step": 11590 }, { "epoch": 0.10912941176470588, "grad_norm": 0.991549692569036, "learning_rate": 6.055917915352012e-06, "loss": 0.03793170154094696, "step": 11595 }, { "epoch": 0.10917647058823529, "grad_norm": 0.5901044305657207, "learning_rate": 6.054612507409586e-06, "loss": 0.03688921332359314, "step": 11600 }, { "epoch": 0.1092235294117647, "grad_norm": 1.021976722892347, "learning_rate": 6.05330794328089e-06, "loss": 0.040502458810806274, "step": 11605 }, { "epoch": 0.10927058823529412, "grad_norm": 0.6264579858477936, "learning_rate": 6.05200422205725e-06, "loss": 0.03454729616641998, "step": 11610 }, { "epoch": 0.10931764705882353, "grad_norm": 0.6642948080815707, "learning_rate": 6.050701342831359e-06, "loss": 0.03631860017776489, "step": 11615 }, { "epoch": 0.10936470588235295, "grad_norm": 0.5525126048878366, "learning_rate": 6.0493993046972784e-06, "loss": 0.035602998733520505, "step": 11620 }, { "epoch": 0.10941176470588235, "grad_norm": 0.8311151525643317, "learning_rate": 6.048098106750434e-06, "loss": 0.032363080978393556, "step": 11625 }, { "epoch": 0.10945882352941176, "grad_norm": 0.7263286719173584, "learning_rate": 6.046797748087611e-06, "loss": 0.0387813001871109, "step": 11630 }, { "epoch": 0.10950588235294118, "grad_norm": 0.5913364426174365, "learning_rate": 6.045498227806956e-06, "loss": 0.03890208601951599, "step": 11635 }, { "epoch": 0.10955294117647059, "grad_norm": 0.6942391352465297, "learning_rate": 6.044199545007971e-06, "loss": 0.03306882679462433, "step": 11640 }, { "epoch": 0.1096, "grad_norm": 0.98964867112832, "learning_rate": 6.042901698791509e-06, "loss": 0.03361735343933105, "step": 11645 }, { "epoch": 0.10964705882352942, "grad_norm": 0.7208906892133387, "learning_rate": 6.041604688259776e-06, "loss": 0.033284148573875426, "step": 11650 }, { "epoch": 0.10969411764705882, "grad_norm": 0.8277998829512061, "learning_rate": 6.0403085125163275e-06, "loss": 0.04076094031333923, "step": 11655 }, { "epoch": 0.10974117647058823, "grad_norm": 1.2891378789288601, "learning_rate": 6.039013170666064e-06, "loss": 0.03271182179450989, "step": 11660 }, { "epoch": 0.10978823529411764, "grad_norm": 0.8999300884530613, "learning_rate": 6.037718661815225e-06, "loss": 0.035480308532714847, "step": 11665 }, { "epoch": 0.10983529411764706, "grad_norm": 0.5525782740250168, "learning_rate": 6.036424985071397e-06, "loss": 0.029695039987564086, "step": 11670 }, { "epoch": 0.10988235294117647, "grad_norm": 0.7729220382981773, "learning_rate": 6.0351321395435e-06, "loss": 0.03558616638183594, "step": 11675 }, { "epoch": 0.10992941176470589, "grad_norm": 0.9883331053504715, "learning_rate": 6.033840124341792e-06, "loss": 0.03352401256561279, "step": 11680 }, { "epoch": 0.1099764705882353, "grad_norm": 1.000619379544432, "learning_rate": 6.032548938577862e-06, "loss": 0.03576747179031372, "step": 11685 }, { "epoch": 0.1100235294117647, "grad_norm": 0.7537474456219972, "learning_rate": 6.031258581364627e-06, "loss": 0.03797864317893982, "step": 11690 }, { "epoch": 0.11007058823529411, "grad_norm": 0.6425597184912156, "learning_rate": 6.029969051816339e-06, "loss": 0.03959016799926758, "step": 11695 }, { "epoch": 0.11011764705882353, "grad_norm": 0.7717284538358103, "learning_rate": 6.028680349048568e-06, "loss": 0.03230854570865631, "step": 11700 }, { "epoch": 0.11016470588235294, "grad_norm": 0.8805723668174957, "learning_rate": 6.02739247217821e-06, "loss": 0.044037675857543944, "step": 11705 }, { "epoch": 0.11021176470588236, "grad_norm": 0.6976872944353525, "learning_rate": 6.026105420323478e-06, "loss": 0.03579652607440949, "step": 11710 }, { "epoch": 0.11025882352941177, "grad_norm": 0.5921866400199489, "learning_rate": 6.024819192603905e-06, "loss": 0.03420213460922241, "step": 11715 }, { "epoch": 0.11030588235294117, "grad_norm": 0.8951106680790686, "learning_rate": 6.0235337881403425e-06, "loss": 0.03089815378189087, "step": 11720 }, { "epoch": 0.11035294117647058, "grad_norm": 0.7252067250050648, "learning_rate": 6.022249206054944e-06, "loss": 0.03560430407524109, "step": 11725 }, { "epoch": 0.1104, "grad_norm": 1.6103894112389692, "learning_rate": 6.020965445471185e-06, "loss": 0.035046765208244325, "step": 11730 }, { "epoch": 0.11044705882352941, "grad_norm": 0.8307571496936657, "learning_rate": 6.019682505513839e-06, "loss": 0.03184196949005127, "step": 11735 }, { "epoch": 0.11049411764705883, "grad_norm": 0.49224802147126756, "learning_rate": 6.018400385308989e-06, "loss": 0.03967433273792267, "step": 11740 }, { "epoch": 0.11054117647058824, "grad_norm": 0.8652986423703598, "learning_rate": 6.01711908398402e-06, "loss": 0.03415412902832031, "step": 11745 }, { "epoch": 0.11058823529411765, "grad_norm": 1.0751874007794775, "learning_rate": 6.0158386006676165e-06, "loss": 0.041477978229522705, "step": 11750 }, { "epoch": 0.11063529411764705, "grad_norm": 0.7447776154268835, "learning_rate": 6.014558934489758e-06, "loss": 0.02969057857990265, "step": 11755 }, { "epoch": 0.11068235294117647, "grad_norm": 0.8076400485923493, "learning_rate": 6.013280084581724e-06, "loss": 0.035027575492858884, "step": 11760 }, { "epoch": 0.11072941176470588, "grad_norm": 0.8281320832915976, "learning_rate": 6.012002050076082e-06, "loss": 0.042192375659942626, "step": 11765 }, { "epoch": 0.1107764705882353, "grad_norm": 0.6765946924621956, "learning_rate": 6.010724830106691e-06, "loss": 0.03401427268981934, "step": 11770 }, { "epoch": 0.11082352941176471, "grad_norm": 0.8372560532114641, "learning_rate": 6.009448423808699e-06, "loss": 0.03545602560043335, "step": 11775 }, { "epoch": 0.11087058823529412, "grad_norm": 0.917892821926906, "learning_rate": 6.008172830318536e-06, "loss": 0.03866420984268189, "step": 11780 }, { "epoch": 0.11091764705882352, "grad_norm": 0.6945527943261627, "learning_rate": 6.006898048773917e-06, "loss": 0.03209893107414245, "step": 11785 }, { "epoch": 0.11096470588235294, "grad_norm": 0.631616416571269, "learning_rate": 6.005624078313837e-06, "loss": 0.03104323148727417, "step": 11790 }, { "epoch": 0.11101176470588235, "grad_norm": 1.8987277615870606, "learning_rate": 6.004350918078565e-06, "loss": 0.039294517040252684, "step": 11795 }, { "epoch": 0.11105882352941177, "grad_norm": 1.840139625061605, "learning_rate": 6.00307856720965e-06, "loss": 0.03621892929077149, "step": 11800 }, { "epoch": 0.11110588235294118, "grad_norm": 0.774933011420684, "learning_rate": 6.001807024849915e-06, "loss": 0.03455252647399902, "step": 11805 }, { "epoch": 0.1111529411764706, "grad_norm": 0.6205595825177096, "learning_rate": 6.000536290143447e-06, "loss": 0.0336814284324646, "step": 11810 }, { "epoch": 0.1112, "grad_norm": 0.7339421934783955, "learning_rate": 5.999266362235605e-06, "loss": 0.03487446904182434, "step": 11815 }, { "epoch": 0.11124705882352941, "grad_norm": 0.6200278211104835, "learning_rate": 5.997997240273015e-06, "loss": 0.03461880087852478, "step": 11820 }, { "epoch": 0.11129411764705882, "grad_norm": 0.7371099457646253, "learning_rate": 5.9967289234035634e-06, "loss": 0.03609545230865478, "step": 11825 }, { "epoch": 0.11134117647058824, "grad_norm": 1.0007450061880336, "learning_rate": 5.995461410776396e-06, "loss": 0.03829156756401062, "step": 11830 }, { "epoch": 0.11138823529411765, "grad_norm": 1.0679276128380373, "learning_rate": 5.994194701541922e-06, "loss": 0.04149955809116364, "step": 11835 }, { "epoch": 0.11143529411764706, "grad_norm": 0.5235041651531214, "learning_rate": 5.9929287948518025e-06, "loss": 0.026252448558807373, "step": 11840 }, { "epoch": 0.11148235294117648, "grad_norm": 0.7831159722060386, "learning_rate": 5.991663689858953e-06, "loss": 0.035150659084320066, "step": 11845 }, { "epoch": 0.11152941176470588, "grad_norm": 0.6833285818304189, "learning_rate": 5.990399385717541e-06, "loss": 0.027288126945495605, "step": 11850 }, { "epoch": 0.11157647058823529, "grad_norm": 0.8233926052488665, "learning_rate": 5.989135881582985e-06, "loss": 0.0323345273733139, "step": 11855 }, { "epoch": 0.1116235294117647, "grad_norm": 0.7228906845092467, "learning_rate": 5.987873176611943e-06, "loss": 0.035716935992240906, "step": 11860 }, { "epoch": 0.11167058823529412, "grad_norm": 0.7818766826554177, "learning_rate": 5.986611269962326e-06, "loss": 0.037814277410507205, "step": 11865 }, { "epoch": 0.11171764705882353, "grad_norm": 0.8048297072186555, "learning_rate": 5.985350160793278e-06, "loss": 0.03576885759830475, "step": 11870 }, { "epoch": 0.11176470588235295, "grad_norm": 0.5212137713789314, "learning_rate": 5.9840898482651895e-06, "loss": 0.03621273338794708, "step": 11875 }, { "epoch": 0.11181176470588235, "grad_norm": 0.5678791796421242, "learning_rate": 5.982830331539684e-06, "loss": 0.035459858179092404, "step": 11880 }, { "epoch": 0.11185882352941176, "grad_norm": 0.7688971898066016, "learning_rate": 5.981571609779622e-06, "loss": 0.037465882301330564, "step": 11885 }, { "epoch": 0.11190588235294117, "grad_norm": 0.9249054510273057, "learning_rate": 5.980313682149095e-06, "loss": 0.03872917592525482, "step": 11890 }, { "epoch": 0.11195294117647059, "grad_norm": 0.5148346070549588, "learning_rate": 5.979056547813424e-06, "loss": 0.031278467178344725, "step": 11895 }, { "epoch": 0.112, "grad_norm": 0.6256387956759454, "learning_rate": 5.9778002059391575e-06, "loss": 0.036602118611335756, "step": 11900 }, { "epoch": 0.11204705882352942, "grad_norm": 0.5100238118205739, "learning_rate": 5.976544655694073e-06, "loss": 0.033399653434753415, "step": 11905 }, { "epoch": 0.11209411764705883, "grad_norm": 0.8010323754152497, "learning_rate": 5.975289896247166e-06, "loss": 0.0361072838306427, "step": 11910 }, { "epoch": 0.11214117647058823, "grad_norm": 0.7918118335727875, "learning_rate": 5.974035926768658e-06, "loss": 0.03158895373344421, "step": 11915 }, { "epoch": 0.11218823529411764, "grad_norm": 1.0820574374442753, "learning_rate": 5.972782746429982e-06, "loss": 0.03906518220901489, "step": 11920 }, { "epoch": 0.11223529411764706, "grad_norm": 0.5775212714940628, "learning_rate": 5.971530354403795e-06, "loss": 0.03162063360214233, "step": 11925 }, { "epoch": 0.11228235294117647, "grad_norm": 0.7112276159404268, "learning_rate": 5.970278749863964e-06, "loss": 0.028926777839660644, "step": 11930 }, { "epoch": 0.11232941176470589, "grad_norm": 0.5478418648164203, "learning_rate": 5.969027931985565e-06, "loss": 0.03609996736049652, "step": 11935 }, { "epoch": 0.1123764705882353, "grad_norm": 0.711410652079576, "learning_rate": 5.9677778999448875e-06, "loss": 0.036841994524002074, "step": 11940 }, { "epoch": 0.1124235294117647, "grad_norm": 0.6110447411989939, "learning_rate": 5.966528652919428e-06, "loss": 0.03235906958580017, "step": 11945 }, { "epoch": 0.11247058823529411, "grad_norm": 0.7044507662258745, "learning_rate": 5.965280190087882e-06, "loss": 0.036591562628746035, "step": 11950 }, { "epoch": 0.11251764705882353, "grad_norm": 1.028420611313198, "learning_rate": 5.9640325106301565e-06, "loss": 0.03762036263942718, "step": 11955 }, { "epoch": 0.11256470588235294, "grad_norm": 0.7989645432169947, "learning_rate": 5.96278561372735e-06, "loss": 0.0409502238035202, "step": 11960 }, { "epoch": 0.11261176470588236, "grad_norm": 0.6876718291454311, "learning_rate": 5.961539498561766e-06, "loss": 0.04521210193634033, "step": 11965 }, { "epoch": 0.11265882352941177, "grad_norm": 0.8506706615031167, "learning_rate": 5.960294164316899e-06, "loss": 0.0294727623462677, "step": 11970 }, { "epoch": 0.11270588235294117, "grad_norm": 0.8034517121196496, "learning_rate": 5.959049610177438e-06, "loss": 0.03494953215122223, "step": 11975 }, { "epoch": 0.11275294117647058, "grad_norm": 0.7193800998915807, "learning_rate": 5.957805835329265e-06, "loss": 0.03425236642360687, "step": 11980 }, { "epoch": 0.1128, "grad_norm": 0.627148740979526, "learning_rate": 5.956562838959448e-06, "loss": 0.029822003841400147, "step": 11985 }, { "epoch": 0.11284705882352941, "grad_norm": 0.6574880473698268, "learning_rate": 5.955320620256244e-06, "loss": 0.03253326416015625, "step": 11990 }, { "epoch": 0.11289411764705883, "grad_norm": 0.5392379035135123, "learning_rate": 5.954079178409095e-06, "loss": 0.029502004384994507, "step": 11995 }, { "epoch": 0.11294117647058824, "grad_norm": 0.7013019229199241, "learning_rate": 5.952838512608623e-06, "loss": 0.03937113881111145, "step": 12000 }, { "epoch": 0.11298823529411765, "grad_norm": 0.9332173857648239, "learning_rate": 5.95159862204663e-06, "loss": 0.04263421893119812, "step": 12005 }, { "epoch": 0.11303529411764705, "grad_norm": 0.7749705365877868, "learning_rate": 5.9503595059161015e-06, "loss": 0.03959270417690277, "step": 12010 }, { "epoch": 0.11308235294117647, "grad_norm": 0.6820406601677249, "learning_rate": 5.949121163411189e-06, "loss": 0.03512839078903198, "step": 12015 }, { "epoch": 0.11312941176470588, "grad_norm": 0.618790644276781, "learning_rate": 5.947883593727226e-06, "loss": 0.027713239192962646, "step": 12020 }, { "epoch": 0.1131764705882353, "grad_norm": 0.62650308249068, "learning_rate": 5.9466467960607135e-06, "loss": 0.03426690697669983, "step": 12025 }, { "epoch": 0.11322352941176471, "grad_norm": 1.0802314095846584, "learning_rate": 5.94541076960932e-06, "loss": 0.03722424507141113, "step": 12030 }, { "epoch": 0.11327058823529412, "grad_norm": 0.7269412844963253, "learning_rate": 5.944175513571886e-06, "loss": 0.03009571135044098, "step": 12035 }, { "epoch": 0.11331764705882352, "grad_norm": 0.6974917202707175, "learning_rate": 5.942941027148411e-06, "loss": 0.02934901714324951, "step": 12040 }, { "epoch": 0.11336470588235294, "grad_norm": 0.596127070514127, "learning_rate": 5.94170730954006e-06, "loss": 0.032557687163352965, "step": 12045 }, { "epoch": 0.11341176470588235, "grad_norm": 0.761376409115292, "learning_rate": 5.94047435994916e-06, "loss": 0.033385342359542845, "step": 12050 }, { "epoch": 0.11345882352941176, "grad_norm": 0.6714779623405911, "learning_rate": 5.939242177579192e-06, "loss": 0.03157155513763428, "step": 12055 }, { "epoch": 0.11350588235294118, "grad_norm": 0.7929451720653314, "learning_rate": 5.938010761634797e-06, "loss": 0.03852024376392364, "step": 12060 }, { "epoch": 0.11355294117647059, "grad_norm": 0.7536897927606451, "learning_rate": 5.936780111321766e-06, "loss": 0.03617796301841736, "step": 12065 }, { "epoch": 0.1136, "grad_norm": 0.6805931214631427, "learning_rate": 5.935550225847045e-06, "loss": 0.03339607715606689, "step": 12070 }, { "epoch": 0.1136470588235294, "grad_norm": 1.2106240794327, "learning_rate": 5.934321104418729e-06, "loss": 0.03386745452880859, "step": 12075 }, { "epoch": 0.11369411764705882, "grad_norm": 0.7952348737904237, "learning_rate": 5.933092746246058e-06, "loss": 0.04922422766685486, "step": 12080 }, { "epoch": 0.11374117647058823, "grad_norm": 0.882135554248762, "learning_rate": 5.93186515053942e-06, "loss": 0.03839605450630188, "step": 12085 }, { "epoch": 0.11378823529411765, "grad_norm": 0.8258560104945801, "learning_rate": 5.930638316510344e-06, "loss": 0.03515403568744659, "step": 12090 }, { "epoch": 0.11383529411764706, "grad_norm": 0.9184910552581244, "learning_rate": 5.929412243371504e-06, "loss": 0.0353317379951477, "step": 12095 }, { "epoch": 0.11388235294117648, "grad_norm": 0.8941405210690674, "learning_rate": 5.9281869303367075e-06, "loss": 0.03466467261314392, "step": 12100 }, { "epoch": 0.11392941176470588, "grad_norm": 0.7327265190115364, "learning_rate": 5.926962376620904e-06, "loss": 0.039468133449554445, "step": 12105 }, { "epoch": 0.11397647058823529, "grad_norm": 0.672381324929528, "learning_rate": 5.9257385814401725e-06, "loss": 0.03701100945472717, "step": 12110 }, { "epoch": 0.1140235294117647, "grad_norm": 1.0638266773892793, "learning_rate": 5.924515544011728e-06, "loss": 0.03032148480415344, "step": 12115 }, { "epoch": 0.11407058823529412, "grad_norm": 0.7007264337786455, "learning_rate": 5.9232932635539175e-06, "loss": 0.033776527643203734, "step": 12120 }, { "epoch": 0.11411764705882353, "grad_norm": 0.6611214719267444, "learning_rate": 5.92207173928621e-06, "loss": 0.032246518135070804, "step": 12125 }, { "epoch": 0.11416470588235295, "grad_norm": 0.704889728163093, "learning_rate": 5.920850970429207e-06, "loss": 0.036041858792304995, "step": 12130 }, { "epoch": 0.11421176470588236, "grad_norm": 0.8651468719828841, "learning_rate": 5.919630956204633e-06, "loss": 0.03655657470226288, "step": 12135 }, { "epoch": 0.11425882352941176, "grad_norm": 0.6984640679089437, "learning_rate": 5.918411695835332e-06, "loss": 0.03278613090515137, "step": 12140 }, { "epoch": 0.11430588235294117, "grad_norm": 0.839646553184126, "learning_rate": 5.917193188545271e-06, "loss": 0.03486073613166809, "step": 12145 }, { "epoch": 0.11435294117647059, "grad_norm": 0.5144034575524151, "learning_rate": 5.915975433559531e-06, "loss": 0.03202007412910461, "step": 12150 }, { "epoch": 0.1144, "grad_norm": 0.565258030317353, "learning_rate": 5.9147584301043145e-06, "loss": 0.029577884078025817, "step": 12155 }, { "epoch": 0.11444705882352942, "grad_norm": 0.7548019547838364, "learning_rate": 5.913542177406933e-06, "loss": 0.031104779243469237, "step": 12160 }, { "epoch": 0.11449411764705883, "grad_norm": 0.7603069972052772, "learning_rate": 5.912326674695812e-06, "loss": 0.03904790878295898, "step": 12165 }, { "epoch": 0.11454117647058823, "grad_norm": 0.6334752567740339, "learning_rate": 5.911111921200486e-06, "loss": 0.03215308785438538, "step": 12170 }, { "epoch": 0.11458823529411764, "grad_norm": 0.7857132165217104, "learning_rate": 5.909897916151599e-06, "loss": 0.03146962523460388, "step": 12175 }, { "epoch": 0.11463529411764706, "grad_norm": 0.599205744951473, "learning_rate": 5.9086846587808965e-06, "loss": 0.034112179279327394, "step": 12180 }, { "epoch": 0.11468235294117647, "grad_norm": 0.5127853425380128, "learning_rate": 5.907472148321233e-06, "loss": 0.02644512951374054, "step": 12185 }, { "epoch": 0.11472941176470589, "grad_norm": 0.8293076153187062, "learning_rate": 5.906260384006558e-06, "loss": 0.03757176399230957, "step": 12190 }, { "epoch": 0.1147764705882353, "grad_norm": 0.7450925354738024, "learning_rate": 5.905049365071926e-06, "loss": 0.035126683115959165, "step": 12195 }, { "epoch": 0.1148235294117647, "grad_norm": 0.7060283424773566, "learning_rate": 5.9038390907534894e-06, "loss": 0.04058432579040527, "step": 12200 }, { "epoch": 0.11487058823529411, "grad_norm": 0.8253977686762547, "learning_rate": 5.90262956028849e-06, "loss": 0.032826858758926394, "step": 12205 }, { "epoch": 0.11491764705882353, "grad_norm": 1.400872368772285, "learning_rate": 5.901420772915267e-06, "loss": 0.03563632369041443, "step": 12210 }, { "epoch": 0.11496470588235294, "grad_norm": 0.7040966398976155, "learning_rate": 5.900212727873252e-06, "loss": 0.03401053547859192, "step": 12215 }, { "epoch": 0.11501176470588236, "grad_norm": 1.4555559418111694, "learning_rate": 5.899005424402966e-06, "loss": 0.03329780697822571, "step": 12220 }, { "epoch": 0.11505882352941177, "grad_norm": 0.892143222175019, "learning_rate": 5.897798861746014e-06, "loss": 0.04045618176460266, "step": 12225 }, { "epoch": 0.11510588235294118, "grad_norm": 0.6361046054028275, "learning_rate": 5.896593039145088e-06, "loss": 0.036760589480400084, "step": 12230 }, { "epoch": 0.11515294117647058, "grad_norm": 0.7484300624400229, "learning_rate": 5.895387955843965e-06, "loss": 0.035663658380508424, "step": 12235 }, { "epoch": 0.1152, "grad_norm": 0.5744505784881303, "learning_rate": 5.894183611087503e-06, "loss": 0.03285527527332306, "step": 12240 }, { "epoch": 0.11524705882352941, "grad_norm": 0.6397031364402062, "learning_rate": 5.892980004121638e-06, "loss": 0.030492126941680908, "step": 12245 }, { "epoch": 0.11529411764705882, "grad_norm": 0.7503432654630902, "learning_rate": 5.891777134193384e-06, "loss": 0.033899688720703126, "step": 12250 }, { "epoch": 0.11534117647058824, "grad_norm": 0.5842252281056038, "learning_rate": 5.89057500055083e-06, "loss": 0.03178236782550812, "step": 12255 }, { "epoch": 0.11538823529411765, "grad_norm": 0.6425913800351297, "learning_rate": 5.88937360244314e-06, "loss": 0.030292680859565733, "step": 12260 }, { "epoch": 0.11543529411764705, "grad_norm": 0.8039208589468932, "learning_rate": 5.888172939120549e-06, "loss": 0.0376015305519104, "step": 12265 }, { "epoch": 0.11548235294117647, "grad_norm": 0.711107295085272, "learning_rate": 5.88697300983436e-06, "loss": 0.029732489585876466, "step": 12270 }, { "epoch": 0.11552941176470588, "grad_norm": 0.7087914764975016, "learning_rate": 5.885773813836945e-06, "loss": 0.0369412899017334, "step": 12275 }, { "epoch": 0.1155764705882353, "grad_norm": 0.7645514645870903, "learning_rate": 5.88457535038174e-06, "loss": 0.030230993032455446, "step": 12280 }, { "epoch": 0.11562352941176471, "grad_norm": 0.9336471566919283, "learning_rate": 5.8833776187232494e-06, "loss": 0.037973514199256896, "step": 12285 }, { "epoch": 0.11567058823529412, "grad_norm": 0.672424270422542, "learning_rate": 5.882180618117034e-06, "loss": 0.0367697536945343, "step": 12290 }, { "epoch": 0.11571764705882354, "grad_norm": 0.7645701759532517, "learning_rate": 5.880984347819715e-06, "loss": 0.031694060564041136, "step": 12295 }, { "epoch": 0.11576470588235294, "grad_norm": 0.9035580717903977, "learning_rate": 5.879788807088974e-06, "loss": 0.03829440772533417, "step": 12300 }, { "epoch": 0.11581176470588235, "grad_norm": 1.7730225101080268, "learning_rate": 5.878593995183548e-06, "loss": 0.03084554970264435, "step": 12305 }, { "epoch": 0.11585882352941176, "grad_norm": 0.47209168619585756, "learning_rate": 5.877399911363226e-06, "loss": 0.025741803646087646, "step": 12310 }, { "epoch": 0.11590588235294118, "grad_norm": 0.6300543496393366, "learning_rate": 5.876206554888851e-06, "loss": 0.02776663601398468, "step": 12315 }, { "epoch": 0.11595294117647059, "grad_norm": 0.6833913820029968, "learning_rate": 5.875013925022316e-06, "loss": 0.04339834451675415, "step": 12320 }, { "epoch": 0.116, "grad_norm": 0.661074889882876, "learning_rate": 5.873822021026559e-06, "loss": 0.02959118187427521, "step": 12325 }, { "epoch": 0.1160470588235294, "grad_norm": 0.7164932687449074, "learning_rate": 5.87263084216557e-06, "loss": 0.033687430620193484, "step": 12330 }, { "epoch": 0.11609411764705882, "grad_norm": 0.711287066791096, "learning_rate": 5.871440387704379e-06, "loss": 0.030295443534851075, "step": 12335 }, { "epoch": 0.11614117647058823, "grad_norm": 0.600670055479619, "learning_rate": 5.87025065690906e-06, "loss": 0.03289272785186768, "step": 12340 }, { "epoch": 0.11618823529411765, "grad_norm": 0.9849707037696226, "learning_rate": 5.869061649046728e-06, "loss": 0.03766148686408997, "step": 12345 }, { "epoch": 0.11623529411764706, "grad_norm": 0.6336387176036548, "learning_rate": 5.867873363385535e-06, "loss": 0.037563157081604, "step": 12350 }, { "epoch": 0.11628235294117648, "grad_norm": 0.9141283659202453, "learning_rate": 5.86668579919467e-06, "loss": 0.03526826202869415, "step": 12355 }, { "epoch": 0.11632941176470588, "grad_norm": 0.6716833932629682, "learning_rate": 5.865498955744361e-06, "loss": 0.04020472764968872, "step": 12360 }, { "epoch": 0.11637647058823529, "grad_norm": 0.5844592369998424, "learning_rate": 5.864312832305863e-06, "loss": 0.030415141582489015, "step": 12365 }, { "epoch": 0.1164235294117647, "grad_norm": 0.7437752737779538, "learning_rate": 5.863127428151466e-06, "loss": 0.03915645182132721, "step": 12370 }, { "epoch": 0.11647058823529412, "grad_norm": 0.8810991798931334, "learning_rate": 5.861942742554488e-06, "loss": 0.03533821702003479, "step": 12375 }, { "epoch": 0.11651764705882353, "grad_norm": 0.6869280322715531, "learning_rate": 5.860758774789276e-06, "loss": 0.03500109314918518, "step": 12380 }, { "epoch": 0.11656470588235295, "grad_norm": 0.7403547545221105, "learning_rate": 5.859575524131199e-06, "loss": 0.03590534925460816, "step": 12385 }, { "epoch": 0.11661176470588236, "grad_norm": 0.4936267614138984, "learning_rate": 5.858392989856655e-06, "loss": 0.033336830139160153, "step": 12390 }, { "epoch": 0.11665882352941176, "grad_norm": 0.6551271059302671, "learning_rate": 5.857211171243056e-06, "loss": 0.04049029350280762, "step": 12395 }, { "epoch": 0.11670588235294117, "grad_norm": 1.2813394228791806, "learning_rate": 5.856030067568843e-06, "loss": 0.03385442495346069, "step": 12400 }, { "epoch": 0.11675294117647059, "grad_norm": 0.6872380094288653, "learning_rate": 5.854849678113469e-06, "loss": 0.036106011271476744, "step": 12405 }, { "epoch": 0.1168, "grad_norm": 0.8278911371872116, "learning_rate": 5.853670002157405e-06, "loss": 0.034232226014137265, "step": 12410 }, { "epoch": 0.11684705882352941, "grad_norm": 0.9595691439021592, "learning_rate": 5.852491038982137e-06, "loss": 0.035353314876556394, "step": 12415 }, { "epoch": 0.11689411764705883, "grad_norm": 0.6938424152944738, "learning_rate": 5.851312787870161e-06, "loss": 0.03370201289653778, "step": 12420 }, { "epoch": 0.11694117647058823, "grad_norm": 0.8376566168631958, "learning_rate": 5.850135248104989e-06, "loss": 0.03082851767539978, "step": 12425 }, { "epoch": 0.11698823529411764, "grad_norm": 0.6602357968731605, "learning_rate": 5.848958418971137e-06, "loss": 0.02806839346885681, "step": 12430 }, { "epoch": 0.11703529411764706, "grad_norm": 0.7601529619843247, "learning_rate": 5.847782299754127e-06, "loss": 0.03493169546127319, "step": 12435 }, { "epoch": 0.11708235294117647, "grad_norm": 0.8880248007022715, "learning_rate": 5.846606889740496e-06, "loss": 0.036434540152549745, "step": 12440 }, { "epoch": 0.11712941176470588, "grad_norm": 0.6217489929919502, "learning_rate": 5.845432188217771e-06, "loss": 0.031114521622657775, "step": 12445 }, { "epoch": 0.1171764705882353, "grad_norm": 0.7829943117284951, "learning_rate": 5.84425819447449e-06, "loss": 0.03734353184700012, "step": 12450 }, { "epoch": 0.11722352941176471, "grad_norm": 0.6287010919322872, "learning_rate": 5.843084907800187e-06, "loss": 0.03247601091861725, "step": 12455 }, { "epoch": 0.11727058823529411, "grad_norm": 1.173432623745647, "learning_rate": 5.841912327485398e-06, "loss": 0.04190978407859802, "step": 12460 }, { "epoch": 0.11731764705882353, "grad_norm": 1.0759715853856588, "learning_rate": 5.840740452821647e-06, "loss": 0.03982222378253937, "step": 12465 }, { "epoch": 0.11736470588235294, "grad_norm": 0.9321255302452651, "learning_rate": 5.8395692831014614e-06, "loss": 0.03784998655319214, "step": 12470 }, { "epoch": 0.11741176470588235, "grad_norm": 0.516474610325838, "learning_rate": 5.838398817618357e-06, "loss": 0.03321818113327026, "step": 12475 }, { "epoch": 0.11745882352941177, "grad_norm": 0.7291786196873236, "learning_rate": 5.83722905566684e-06, "loss": 0.035307615995407104, "step": 12480 }, { "epoch": 0.11750588235294118, "grad_norm": 0.8972817401135519, "learning_rate": 5.836059996542407e-06, "loss": 0.03837005198001862, "step": 12485 }, { "epoch": 0.11755294117647058, "grad_norm": 0.7719570757469985, "learning_rate": 5.8348916395415426e-06, "loss": 0.03134806454181671, "step": 12490 }, { "epoch": 0.1176, "grad_norm": 0.7912258618939367, "learning_rate": 5.833723983961712e-06, "loss": 0.03252391219139099, "step": 12495 }, { "epoch": 0.11764705882352941, "grad_norm": 0.4041929718876156, "learning_rate": 5.832557029101373e-06, "loss": 0.03395684063434601, "step": 12500 }, { "epoch": 0.11769411764705882, "grad_norm": 0.7281865736709751, "learning_rate": 5.831390774259955e-06, "loss": 0.03384950459003448, "step": 12505 }, { "epoch": 0.11774117647058824, "grad_norm": 0.5415865444339248, "learning_rate": 5.830225218737879e-06, "loss": 0.03724015951156616, "step": 12510 }, { "epoch": 0.11778823529411765, "grad_norm": 0.6968550018574556, "learning_rate": 5.829060361836532e-06, "loss": 0.03853947520256042, "step": 12515 }, { "epoch": 0.11783529411764705, "grad_norm": 0.9319012705090853, "learning_rate": 5.827896202858288e-06, "loss": 0.04011477828025818, "step": 12520 }, { "epoch": 0.11788235294117647, "grad_norm": 0.9582147670768365, "learning_rate": 5.82673274110649e-06, "loss": 0.03474810123443604, "step": 12525 }, { "epoch": 0.11792941176470588, "grad_norm": 0.7321719878901232, "learning_rate": 5.825569975885456e-06, "loss": 0.03181545734405518, "step": 12530 }, { "epoch": 0.1179764705882353, "grad_norm": 0.8154419144867905, "learning_rate": 5.8244079065004755e-06, "loss": 0.029438737034797668, "step": 12535 }, { "epoch": 0.11802352941176471, "grad_norm": 0.729953093159127, "learning_rate": 5.823246532257808e-06, "loss": 0.03635562658309936, "step": 12540 }, { "epoch": 0.11807058823529412, "grad_norm": 0.6749337439375717, "learning_rate": 5.82208585246468e-06, "loss": 0.040562385320663454, "step": 12545 }, { "epoch": 0.11811764705882354, "grad_norm": 0.8285552848473057, "learning_rate": 5.8209258664292865e-06, "loss": 0.032199323177337646, "step": 12550 }, { "epoch": 0.11816470588235294, "grad_norm": 0.844407503455689, "learning_rate": 5.819766573460782e-06, "loss": 0.03590390682220459, "step": 12555 }, { "epoch": 0.11821176470588235, "grad_norm": 0.8162193964695528, "learning_rate": 5.818607972869289e-06, "loss": 0.03734274208545685, "step": 12560 }, { "epoch": 0.11825882352941176, "grad_norm": 0.7380603804120631, "learning_rate": 5.817450063965888e-06, "loss": 0.03701640069484711, "step": 12565 }, { "epoch": 0.11830588235294118, "grad_norm": 0.6416939694738374, "learning_rate": 5.81629284606262e-06, "loss": 0.0380897045135498, "step": 12570 }, { "epoch": 0.11835294117647059, "grad_norm": 0.535328318347948, "learning_rate": 5.815136318472482e-06, "loss": 0.03590988218784332, "step": 12575 }, { "epoch": 0.1184, "grad_norm": 0.8416103714437907, "learning_rate": 5.813980480509429e-06, "loss": 0.03816655576229096, "step": 12580 }, { "epoch": 0.1184470588235294, "grad_norm": 0.7347529985890033, "learning_rate": 5.8128253314883706e-06, "loss": 0.03594166934490204, "step": 12585 }, { "epoch": 0.11849411764705882, "grad_norm": 0.6988115298502451, "learning_rate": 5.8116708707251654e-06, "loss": 0.03439871370792389, "step": 12590 }, { "epoch": 0.11854117647058823, "grad_norm": 0.8190541866388088, "learning_rate": 5.810517097536625e-06, "loss": 0.03259240686893463, "step": 12595 }, { "epoch": 0.11858823529411765, "grad_norm": 0.6952813977529139, "learning_rate": 5.8093640112405114e-06, "loss": 0.03768856525421142, "step": 12600 }, { "epoch": 0.11863529411764706, "grad_norm": 0.7714155001175284, "learning_rate": 5.80821161115553e-06, "loss": 0.03267548680305481, "step": 12605 }, { "epoch": 0.11868235294117647, "grad_norm": 0.6919878604271773, "learning_rate": 5.807059896601337e-06, "loss": 0.03790009021759033, "step": 12610 }, { "epoch": 0.11872941176470589, "grad_norm": 0.793317017578349, "learning_rate": 5.805908866898528e-06, "loss": 0.03437758684158325, "step": 12615 }, { "epoch": 0.11877647058823529, "grad_norm": 0.7139164385095043, "learning_rate": 5.804758521368642e-06, "loss": 0.03411885797977447, "step": 12620 }, { "epoch": 0.1188235294117647, "grad_norm": 0.42833944995894496, "learning_rate": 5.803608859334161e-06, "loss": 0.03253351151943207, "step": 12625 }, { "epoch": 0.11887058823529412, "grad_norm": 1.1623452155142446, "learning_rate": 5.8024598801185025e-06, "loss": 0.03026222586631775, "step": 12630 }, { "epoch": 0.11891764705882353, "grad_norm": 0.7683633639440306, "learning_rate": 5.801311583046026e-06, "loss": 0.03373821973800659, "step": 12635 }, { "epoch": 0.11896470588235294, "grad_norm": 0.6096210204321466, "learning_rate": 5.800163967442021e-06, "loss": 0.03991883099079132, "step": 12640 }, { "epoch": 0.11901176470588236, "grad_norm": 0.6835992074725498, "learning_rate": 5.799017032632713e-06, "loss": 0.03275016844272614, "step": 12645 }, { "epoch": 0.11905882352941176, "grad_norm": 0.7551796931609351, "learning_rate": 5.797870777945263e-06, "loss": 0.029094335436820985, "step": 12650 }, { "epoch": 0.11910588235294117, "grad_norm": 0.5542107224569087, "learning_rate": 5.796725202707757e-06, "loss": 0.035112392902374265, "step": 12655 }, { "epoch": 0.11915294117647059, "grad_norm": 0.6499936226918966, "learning_rate": 5.795580306249215e-06, "loss": 0.03041028678417206, "step": 12660 }, { "epoch": 0.1192, "grad_norm": 0.5907608729662384, "learning_rate": 5.794436087899581e-06, "loss": 0.03387390971183777, "step": 12665 }, { "epoch": 0.11924705882352941, "grad_norm": 0.6174668036794343, "learning_rate": 5.7932925469897274e-06, "loss": 0.030128473043441774, "step": 12670 }, { "epoch": 0.11929411764705883, "grad_norm": 0.7241281566161295, "learning_rate": 5.7921496828514456e-06, "loss": 0.031124645471572877, "step": 12675 }, { "epoch": 0.11934117647058824, "grad_norm": 0.7535556183344297, "learning_rate": 5.791007494817456e-06, "loss": 0.03821996450424194, "step": 12680 }, { "epoch": 0.11938823529411764, "grad_norm": 0.728526989877177, "learning_rate": 5.789865982221396e-06, "loss": 0.03128325343132019, "step": 12685 }, { "epoch": 0.11943529411764706, "grad_norm": 0.6595561005349131, "learning_rate": 5.78872514439782e-06, "loss": 0.030334192514419555, "step": 12690 }, { "epoch": 0.11948235294117647, "grad_norm": 0.7886778184931826, "learning_rate": 5.787584980682206e-06, "loss": 0.03436822891235351, "step": 12695 }, { "epoch": 0.11952941176470588, "grad_norm": 0.5612097621183881, "learning_rate": 5.786445490410942e-06, "loss": 0.029335999488830568, "step": 12700 }, { "epoch": 0.1195764705882353, "grad_norm": 0.7208362597054813, "learning_rate": 5.785306672921334e-06, "loss": 0.040702319145202635, "step": 12705 }, { "epoch": 0.11962352941176471, "grad_norm": 0.6709502321343039, "learning_rate": 5.784168527551598e-06, "loss": 0.03630302548408508, "step": 12710 }, { "epoch": 0.11967058823529411, "grad_norm": 0.49795371155065093, "learning_rate": 5.783031053640863e-06, "loss": 0.025247961282730103, "step": 12715 }, { "epoch": 0.11971764705882353, "grad_norm": 0.5214464552097571, "learning_rate": 5.781894250529167e-06, "loss": 0.030308160185813903, "step": 12720 }, { "epoch": 0.11976470588235294, "grad_norm": 0.4896960060279535, "learning_rate": 5.780758117557454e-06, "loss": 0.03239006996154785, "step": 12725 }, { "epoch": 0.11981176470588235, "grad_norm": 0.8736354627678425, "learning_rate": 5.7796226540675745e-06, "loss": 0.034387826919555664, "step": 12730 }, { "epoch": 0.11985882352941177, "grad_norm": 0.5904005500989182, "learning_rate": 5.778487859402287e-06, "loss": 0.03203432559967041, "step": 12735 }, { "epoch": 0.11990588235294118, "grad_norm": 0.7720827155012586, "learning_rate": 5.77735373290525e-06, "loss": 0.04280346632003784, "step": 12740 }, { "epoch": 0.11995294117647058, "grad_norm": 0.8829065408648139, "learning_rate": 5.776220273921021e-06, "loss": 0.03439708948135376, "step": 12745 }, { "epoch": 0.12, "grad_norm": 0.6232316783046206, "learning_rate": 5.7750874817950656e-06, "loss": 0.03515142202377319, "step": 12750 }, { "epoch": 0.12004705882352941, "grad_norm": 1.232503470824926, "learning_rate": 5.773955355873738e-06, "loss": 0.04181605279445648, "step": 12755 }, { "epoch": 0.12009411764705882, "grad_norm": 0.7874838816775195, "learning_rate": 5.772823895504294e-06, "loss": 0.038935065269470215, "step": 12760 }, { "epoch": 0.12014117647058824, "grad_norm": 0.5960101765901461, "learning_rate": 5.771693100034884e-06, "loss": 0.028431978821754456, "step": 12765 }, { "epoch": 0.12018823529411765, "grad_norm": 0.7584336225738233, "learning_rate": 5.7705629688145516e-06, "loss": 0.03096526563167572, "step": 12770 }, { "epoch": 0.12023529411764707, "grad_norm": 0.8894021947241512, "learning_rate": 5.7694335011932316e-06, "loss": 0.04418740570545197, "step": 12775 }, { "epoch": 0.12028235294117647, "grad_norm": 0.6871546289789703, "learning_rate": 5.768304696521748e-06, "loss": 0.03941014707088471, "step": 12780 }, { "epoch": 0.12032941176470588, "grad_norm": 0.6873587596247918, "learning_rate": 5.767176554151819e-06, "loss": 0.029733937978744508, "step": 12785 }, { "epoch": 0.1203764705882353, "grad_norm": 0.6096732194382549, "learning_rate": 5.766049073436044e-06, "loss": 0.035414040088653564, "step": 12790 }, { "epoch": 0.12042352941176471, "grad_norm": 0.3784844895875096, "learning_rate": 5.76492225372791e-06, "loss": 0.03579130172729492, "step": 12795 }, { "epoch": 0.12047058823529412, "grad_norm": 0.582603870452984, "learning_rate": 5.7637960943817885e-06, "loss": 0.041622644662857054, "step": 12800 }, { "epoch": 0.12051764705882353, "grad_norm": 0.6520577648211439, "learning_rate": 5.762670594752932e-06, "loss": 0.02394741475582123, "step": 12805 }, { "epoch": 0.12056470588235293, "grad_norm": 1.2542687461222333, "learning_rate": 5.7615457541974785e-06, "loss": 0.035013610124588014, "step": 12810 }, { "epoch": 0.12061176470588235, "grad_norm": 0.7965184639857794, "learning_rate": 5.760421572072439e-06, "loss": 0.04052800238132477, "step": 12815 }, { "epoch": 0.12065882352941176, "grad_norm": 0.6209518245094274, "learning_rate": 5.759298047735706e-06, "loss": 0.029480087757110595, "step": 12820 }, { "epoch": 0.12070588235294118, "grad_norm": 0.7651661652433678, "learning_rate": 5.758175180546049e-06, "loss": 0.03110593557357788, "step": 12825 }, { "epoch": 0.12075294117647059, "grad_norm": 0.5554052198892551, "learning_rate": 5.757052969863111e-06, "loss": 0.033265942335128786, "step": 12830 }, { "epoch": 0.1208, "grad_norm": 0.852195022687813, "learning_rate": 5.755931415047405e-06, "loss": 0.03285674452781677, "step": 12835 }, { "epoch": 0.12084705882352942, "grad_norm": 0.6785465391127617, "learning_rate": 5.754810515460324e-06, "loss": 0.032951349020004274, "step": 12840 }, { "epoch": 0.12089411764705882, "grad_norm": 0.7885805895151513, "learning_rate": 5.7536902704641225e-06, "loss": 0.037095415592193606, "step": 12845 }, { "epoch": 0.12094117647058823, "grad_norm": 0.8654846026405868, "learning_rate": 5.752570679421931e-06, "loss": 0.03513379395008087, "step": 12850 }, { "epoch": 0.12098823529411765, "grad_norm": 0.7155601797020272, "learning_rate": 5.75145174169774e-06, "loss": 0.03361808657646179, "step": 12855 }, { "epoch": 0.12103529411764706, "grad_norm": 0.6565751358081361, "learning_rate": 5.75033345665641e-06, "loss": 0.027468621730804443, "step": 12860 }, { "epoch": 0.12108235294117647, "grad_norm": 0.8739424698856381, "learning_rate": 5.749215823663665e-06, "loss": 0.027422991394996644, "step": 12865 }, { "epoch": 0.12112941176470589, "grad_norm": 0.5997900007819672, "learning_rate": 5.748098842086094e-06, "loss": 0.03442652821540833, "step": 12870 }, { "epoch": 0.12117647058823529, "grad_norm": 0.5275275454046098, "learning_rate": 5.7469825112911404e-06, "loss": 0.028399848937988283, "step": 12875 }, { "epoch": 0.1212235294117647, "grad_norm": 0.6405735143333188, "learning_rate": 5.745866830647114e-06, "loss": 0.03673148155212402, "step": 12880 }, { "epoch": 0.12127058823529412, "grad_norm": 0.7582455057893496, "learning_rate": 5.744751799523181e-06, "loss": 0.03725391626358032, "step": 12885 }, { "epoch": 0.12131764705882353, "grad_norm": 0.7522481973370697, "learning_rate": 5.743637417289358e-06, "loss": 0.03946205675601959, "step": 12890 }, { "epoch": 0.12136470588235294, "grad_norm": 0.7103737939072172, "learning_rate": 5.742523683316528e-06, "loss": 0.032750204205513, "step": 12895 }, { "epoch": 0.12141176470588236, "grad_norm": 0.6138126120182189, "learning_rate": 5.741410596976418e-06, "loss": 0.032406154274940493, "step": 12900 }, { "epoch": 0.12145882352941176, "grad_norm": 1.0596445221638402, "learning_rate": 5.740298157641613e-06, "loss": 0.0336535781621933, "step": 12905 }, { "epoch": 0.12150588235294117, "grad_norm": 0.8589428864735155, "learning_rate": 5.739186364685543e-06, "loss": 0.042114758491516115, "step": 12910 }, { "epoch": 0.12155294117647059, "grad_norm": 0.7145451622879102, "learning_rate": 5.738075217482492e-06, "loss": 0.03579763770103454, "step": 12915 }, { "epoch": 0.1216, "grad_norm": 0.6538748168551568, "learning_rate": 5.736964715407593e-06, "loss": 0.027126175165176392, "step": 12920 }, { "epoch": 0.12164705882352941, "grad_norm": 0.5539712045315802, "learning_rate": 5.735854857836819e-06, "loss": 0.03159646987915039, "step": 12925 }, { "epoch": 0.12169411764705883, "grad_norm": 0.8846621433304331, "learning_rate": 5.734745644146992e-06, "loss": 0.035677185654640196, "step": 12930 }, { "epoch": 0.12174117647058824, "grad_norm": 0.6074757710152701, "learning_rate": 5.733637073715774e-06, "loss": 0.03283994793891907, "step": 12935 }, { "epoch": 0.12178823529411764, "grad_norm": 0.6461188550833857, "learning_rate": 5.732529145921676e-06, "loss": 0.03388366103172302, "step": 12940 }, { "epoch": 0.12183529411764706, "grad_norm": 0.6132380782060636, "learning_rate": 5.73142186014404e-06, "loss": 0.038252097368240354, "step": 12945 }, { "epoch": 0.12188235294117647, "grad_norm": 0.4933420437113025, "learning_rate": 5.730315215763053e-06, "loss": 0.03394418954849243, "step": 12950 }, { "epoch": 0.12192941176470588, "grad_norm": 0.7618710646157603, "learning_rate": 5.729209212159739e-06, "loss": 0.03600043058395386, "step": 12955 }, { "epoch": 0.1219764705882353, "grad_norm": 0.7162763156983823, "learning_rate": 5.7281038487159536e-06, "loss": 0.03151877522468567, "step": 12960 }, { "epoch": 0.12202352941176471, "grad_norm": 0.5586515672765563, "learning_rate": 5.726999124814392e-06, "loss": 0.03620442748069763, "step": 12965 }, { "epoch": 0.12207058823529411, "grad_norm": 0.7317432610053695, "learning_rate": 5.72589503983858e-06, "loss": 0.03368120789527893, "step": 12970 }, { "epoch": 0.12211764705882353, "grad_norm": 0.9300513888273068, "learning_rate": 5.724791593172877e-06, "loss": 0.03846096992492676, "step": 12975 }, { "epoch": 0.12216470588235294, "grad_norm": 0.8824465179698008, "learning_rate": 5.723688784202468e-06, "loss": 0.037382423877716064, "step": 12980 }, { "epoch": 0.12221176470588235, "grad_norm": 0.8018117176272235, "learning_rate": 5.722586612313371e-06, "loss": 0.03648455142974853, "step": 12985 }, { "epoch": 0.12225882352941177, "grad_norm": 0.7199235821597864, "learning_rate": 5.721485076892431e-06, "loss": 0.03547198176383972, "step": 12990 }, { "epoch": 0.12230588235294118, "grad_norm": 0.6379578739371632, "learning_rate": 5.720384177327316e-06, "loss": 0.03457921147346497, "step": 12995 }, { "epoch": 0.1223529411764706, "grad_norm": 0.49946611527284773, "learning_rate": 5.719283913006522e-06, "loss": 0.03917381763458252, "step": 13000 }, { "epoch": 0.1224, "grad_norm": 0.7715107504758243, "learning_rate": 5.7181842833193666e-06, "loss": 0.03604763150215149, "step": 13005 }, { "epoch": 0.12244705882352941, "grad_norm": 0.5437461099954878, "learning_rate": 5.717085287655988e-06, "loss": 0.03357391357421875, "step": 13010 }, { "epoch": 0.12249411764705882, "grad_norm": 0.6114785009772419, "learning_rate": 5.715986925407344e-06, "loss": 0.028597605228424073, "step": 13015 }, { "epoch": 0.12254117647058824, "grad_norm": 1.4602554429085388, "learning_rate": 5.714889195965214e-06, "loss": 0.031127029657363893, "step": 13020 }, { "epoch": 0.12258823529411765, "grad_norm": 0.7402333717367365, "learning_rate": 5.7137920987221915e-06, "loss": 0.03792281746864319, "step": 13025 }, { "epoch": 0.12263529411764706, "grad_norm": 0.6896688595746857, "learning_rate": 5.71269563307169e-06, "loss": 0.02910271883010864, "step": 13030 }, { "epoch": 0.12268235294117646, "grad_norm": 0.5747447350300718, "learning_rate": 5.7115997984079344e-06, "loss": 0.045345208048820494, "step": 13035 }, { "epoch": 0.12272941176470588, "grad_norm": 0.9225521437980833, "learning_rate": 5.710504594125961e-06, "loss": 0.04817652404308319, "step": 13040 }, { "epoch": 0.12277647058823529, "grad_norm": 1.0990733579388918, "learning_rate": 5.709410019621624e-06, "loss": 0.04358199834823608, "step": 13045 }, { "epoch": 0.1228235294117647, "grad_norm": 0.6803664942344948, "learning_rate": 5.70831607429158e-06, "loss": 0.037959825992584226, "step": 13050 }, { "epoch": 0.12287058823529412, "grad_norm": 0.6553330340038933, "learning_rate": 5.707222757533302e-06, "loss": 0.029367506504058838, "step": 13055 }, { "epoch": 0.12291764705882353, "grad_norm": 0.8474278186160354, "learning_rate": 5.706130068745066e-06, "loss": 0.032883429527282716, "step": 13060 }, { "epoch": 0.12296470588235293, "grad_norm": 0.6874736878126064, "learning_rate": 5.7050380073259534e-06, "loss": 0.04057698249816895, "step": 13065 }, { "epoch": 0.12301176470588235, "grad_norm": 0.7032178523217065, "learning_rate": 5.703946572675855e-06, "loss": 0.0306135892868042, "step": 13070 }, { "epoch": 0.12305882352941176, "grad_norm": 0.6583812832255745, "learning_rate": 5.70285576419546e-06, "loss": 0.03879573941230774, "step": 13075 }, { "epoch": 0.12310588235294118, "grad_norm": 0.7521864620604413, "learning_rate": 5.701765581286263e-06, "loss": 0.03537952899932861, "step": 13080 }, { "epoch": 0.12315294117647059, "grad_norm": 1.17561566261524, "learning_rate": 5.700676023350557e-06, "loss": 0.031422197818756104, "step": 13085 }, { "epoch": 0.1232, "grad_norm": 0.66650599580267, "learning_rate": 5.699587089791435e-06, "loss": 0.032375913858413694, "step": 13090 }, { "epoch": 0.12324705882352942, "grad_norm": 0.9060215208695381, "learning_rate": 5.6984987800127885e-06, "loss": 0.034367865324020384, "step": 13095 }, { "epoch": 0.12329411764705882, "grad_norm": 0.5852934282288025, "learning_rate": 5.697411093419306e-06, "loss": 0.03236795663833618, "step": 13100 }, { "epoch": 0.12334117647058823, "grad_norm": 0.7713376541055118, "learning_rate": 5.6963240294164664e-06, "loss": 0.031018161773681642, "step": 13105 }, { "epoch": 0.12338823529411765, "grad_norm": 1.0341153349295682, "learning_rate": 5.6952375874105494e-06, "loss": 0.03808937072753906, "step": 13110 }, { "epoch": 0.12343529411764706, "grad_norm": 0.633020895006699, "learning_rate": 5.694151766808622e-06, "loss": 0.030274984240531922, "step": 13115 }, { "epoch": 0.12348235294117647, "grad_norm": 0.56596150609436, "learning_rate": 5.693066567018545e-06, "loss": 0.035029202699661255, "step": 13120 }, { "epoch": 0.12352941176470589, "grad_norm": 0.7449768706620234, "learning_rate": 5.691981987448965e-06, "loss": 0.03574273586273193, "step": 13125 }, { "epoch": 0.12357647058823529, "grad_norm": 0.5237133916883978, "learning_rate": 5.6908980275093215e-06, "loss": 0.029413771629333497, "step": 13130 }, { "epoch": 0.1236235294117647, "grad_norm": 0.9294395699890212, "learning_rate": 5.689814686609839e-06, "loss": 0.0354516327381134, "step": 13135 }, { "epoch": 0.12367058823529412, "grad_norm": 0.8899874952229352, "learning_rate": 5.688731964161528e-06, "loss": 0.04331667721271515, "step": 13140 }, { "epoch": 0.12371764705882353, "grad_norm": 1.0969012017634645, "learning_rate": 5.687649859576181e-06, "loss": 0.03346914052963257, "step": 13145 }, { "epoch": 0.12376470588235294, "grad_norm": 0.6400745201183821, "learning_rate": 5.6865683722663766e-06, "loss": 0.03645181357860565, "step": 13150 }, { "epoch": 0.12381176470588236, "grad_norm": 0.6565951286171361, "learning_rate": 5.685487501645473e-06, "loss": 0.03408132195472717, "step": 13155 }, { "epoch": 0.12385882352941177, "grad_norm": 0.5177574538134714, "learning_rate": 5.6844072471276105e-06, "loss": 0.03326141834259033, "step": 13160 }, { "epoch": 0.12390588235294117, "grad_norm": 1.5477419030925323, "learning_rate": 5.683327608127707e-06, "loss": 0.03174854516983032, "step": 13165 }, { "epoch": 0.12395294117647059, "grad_norm": 0.7141818790560192, "learning_rate": 5.682248584061456e-06, "loss": 0.0326251745223999, "step": 13170 }, { "epoch": 0.124, "grad_norm": 0.6425271420533614, "learning_rate": 5.681170174345331e-06, "loss": 0.028887641429901124, "step": 13175 }, { "epoch": 0.12404705882352941, "grad_norm": 0.5726206291371279, "learning_rate": 5.680092378396579e-06, "loss": 0.03171008825302124, "step": 13180 }, { "epoch": 0.12409411764705883, "grad_norm": 0.6694951644865555, "learning_rate": 5.679015195633219e-06, "loss": 0.03737906813621521, "step": 13185 }, { "epoch": 0.12414117647058824, "grad_norm": 0.7164912947377324, "learning_rate": 5.6779386254740455e-06, "loss": 0.03350966870784759, "step": 13190 }, { "epoch": 0.12418823529411764, "grad_norm": 0.6171055795359723, "learning_rate": 5.676862667338619e-06, "loss": 0.031228840351104736, "step": 13195 }, { "epoch": 0.12423529411764705, "grad_norm": 1.000031116654835, "learning_rate": 5.675787320647278e-06, "loss": 0.032387572526931765, "step": 13200 }, { "epoch": 0.12428235294117647, "grad_norm": 0.609039541519242, "learning_rate": 5.674712584821118e-06, "loss": 0.0322556734085083, "step": 13205 }, { "epoch": 0.12432941176470588, "grad_norm": 1.1073317129419042, "learning_rate": 5.673638459282011e-06, "loss": 0.036830395460128784, "step": 13210 }, { "epoch": 0.1243764705882353, "grad_norm": 0.677773029793337, "learning_rate": 5.672564943452592e-06, "loss": 0.03219467997550964, "step": 13215 }, { "epoch": 0.12442352941176471, "grad_norm": 0.7577677133869576, "learning_rate": 5.6714920367562574e-06, "loss": 0.029713428020477294, "step": 13220 }, { "epoch": 0.12447058823529412, "grad_norm": 0.6382739805708786, "learning_rate": 5.6704197386171696e-06, "loss": 0.03401485681533813, "step": 13225 }, { "epoch": 0.12451764705882352, "grad_norm": 0.733213372081691, "learning_rate": 5.669348048460254e-06, "loss": 0.04142638444900513, "step": 13230 }, { "epoch": 0.12456470588235294, "grad_norm": 0.7983327871725018, "learning_rate": 5.668276965711193e-06, "loss": 0.03325539231300354, "step": 13235 }, { "epoch": 0.12461176470588235, "grad_norm": 0.5445148257780207, "learning_rate": 5.667206489796432e-06, "loss": 0.032594209909439086, "step": 13240 }, { "epoch": 0.12465882352941177, "grad_norm": 0.803582849552956, "learning_rate": 5.666136620143169e-06, "loss": 0.03478328883647919, "step": 13245 }, { "epoch": 0.12470588235294118, "grad_norm": 0.7713492630753673, "learning_rate": 5.665067356179366e-06, "loss": 0.030611220002174377, "step": 13250 }, { "epoch": 0.1247529411764706, "grad_norm": 0.7895335604097292, "learning_rate": 5.663998697333734e-06, "loss": 0.04251894056797027, "step": 13255 }, { "epoch": 0.1248, "grad_norm": 0.5473537241149472, "learning_rate": 5.66293064303574e-06, "loss": 0.031167709827423097, "step": 13260 }, { "epoch": 0.12484705882352941, "grad_norm": 0.7722851619158685, "learning_rate": 5.661863192715605e-06, "loss": 0.031914302706718446, "step": 13265 }, { "epoch": 0.12489411764705882, "grad_norm": 0.5805617405624052, "learning_rate": 5.6607963458043025e-06, "loss": 0.027595818042755127, "step": 13270 }, { "epoch": 0.12494117647058824, "grad_norm": 0.60163512680814, "learning_rate": 5.659730101733553e-06, "loss": 0.030192404985427856, "step": 13275 }, { "epoch": 0.12498823529411765, "grad_norm": 0.5816469232775293, "learning_rate": 5.658664459935828e-06, "loss": 0.03543359041213989, "step": 13280 }, { "epoch": 0.12503529411764705, "grad_norm": 0.8041001327045519, "learning_rate": 5.657599419844345e-06, "loss": 0.03822294771671295, "step": 13285 }, { "epoch": 0.12508235294117648, "grad_norm": 0.5846729442454985, "learning_rate": 5.656534980893071e-06, "loss": 0.03728623986244202, "step": 13290 }, { "epoch": 0.12512941176470588, "grad_norm": 0.8344184706989042, "learning_rate": 5.655471142516716e-06, "loss": 0.032949844002723695, "step": 13295 }, { "epoch": 0.1251764705882353, "grad_norm": 0.7289104464554274, "learning_rate": 5.654407904150735e-06, "loss": 0.03700070977210999, "step": 13300 }, { "epoch": 0.1252235294117647, "grad_norm": 0.5163769692501117, "learning_rate": 5.653345265231323e-06, "loss": 0.03652751445770264, "step": 13305 }, { "epoch": 0.1252705882352941, "grad_norm": 0.516670075797285, "learning_rate": 5.6522832251954195e-06, "loss": 0.031993597745895386, "step": 13310 }, { "epoch": 0.12531764705882353, "grad_norm": 0.8786157395531362, "learning_rate": 5.651221783480704e-06, "loss": 0.032583987712860106, "step": 13315 }, { "epoch": 0.12536470588235293, "grad_norm": 1.3751179643810392, "learning_rate": 5.650160939525594e-06, "loss": 0.033310437202453615, "step": 13320 }, { "epoch": 0.12541176470588236, "grad_norm": 0.5305222562758537, "learning_rate": 5.6491006927692425e-06, "loss": 0.03323370814323425, "step": 13325 }, { "epoch": 0.12545882352941176, "grad_norm": 0.7937796543727594, "learning_rate": 5.648041042651543e-06, "loss": 0.03970901966094971, "step": 13330 }, { "epoch": 0.1255058823529412, "grad_norm": 0.6003875893919987, "learning_rate": 5.646981988613123e-06, "loss": 0.039159750938415526, "step": 13335 }, { "epoch": 0.1255529411764706, "grad_norm": 0.905650687790831, "learning_rate": 5.645923530095342e-06, "loss": 0.038928598165512085, "step": 13340 }, { "epoch": 0.1256, "grad_norm": 0.5887069270387596, "learning_rate": 5.644865666540292e-06, "loss": 0.03147424459457397, "step": 13345 }, { "epoch": 0.12564705882352942, "grad_norm": 0.6480618290988296, "learning_rate": 5.643808397390801e-06, "loss": 0.03779705464839935, "step": 13350 }, { "epoch": 0.12569411764705882, "grad_norm": 0.6461464608968202, "learning_rate": 5.642751722090421e-06, "loss": 0.031268537044525146, "step": 13355 }, { "epoch": 0.12574117647058825, "grad_norm": 0.5028396655054149, "learning_rate": 5.641695640083439e-06, "loss": 0.03366979956626892, "step": 13360 }, { "epoch": 0.12578823529411765, "grad_norm": 0.8232253792524477, "learning_rate": 5.640640150814864e-06, "loss": 0.029447567462921143, "step": 13365 }, { "epoch": 0.12583529411764705, "grad_norm": 0.7195570603640107, "learning_rate": 5.639585253730437e-06, "loss": 0.031564530730247495, "step": 13370 }, { "epoch": 0.12588235294117647, "grad_norm": 0.5641641692631569, "learning_rate": 5.63853094827662e-06, "loss": 0.03915807604789734, "step": 13375 }, { "epoch": 0.12592941176470587, "grad_norm": 1.5994806505328838, "learning_rate": 5.637477233900603e-06, "loss": 0.035700494050979616, "step": 13380 }, { "epoch": 0.1259764705882353, "grad_norm": 0.747723119443558, "learning_rate": 5.636424110050295e-06, "loss": 0.02505301237106323, "step": 13385 }, { "epoch": 0.1260235294117647, "grad_norm": 1.0722755278514469, "learning_rate": 5.6353715761743275e-06, "loss": 0.03514629602432251, "step": 13390 }, { "epoch": 0.12607058823529413, "grad_norm": 0.6868410155437498, "learning_rate": 5.634319631722054e-06, "loss": 0.03126276433467865, "step": 13395 }, { "epoch": 0.12611764705882353, "grad_norm": 0.5265961165300644, "learning_rate": 5.633268276143549e-06, "loss": 0.034455567598342896, "step": 13400 }, { "epoch": 0.12616470588235293, "grad_norm": 0.6002222444306946, "learning_rate": 5.632217508889599e-06, "loss": 0.0380143016576767, "step": 13405 }, { "epoch": 0.12621176470588236, "grad_norm": 0.6028884763677622, "learning_rate": 5.631167329411714e-06, "loss": 0.03368644118309021, "step": 13410 }, { "epoch": 0.12625882352941176, "grad_norm": 0.6682156847300852, "learning_rate": 5.6301177371621144e-06, "loss": 0.029573458433151244, "step": 13415 }, { "epoch": 0.12630588235294118, "grad_norm": 0.7570357869388182, "learning_rate": 5.629068731593739e-06, "loss": 0.030998557806015015, "step": 13420 }, { "epoch": 0.12635294117647058, "grad_norm": 0.6157305024007164, "learning_rate": 5.628020312160236e-06, "loss": 0.03497365117073059, "step": 13425 }, { "epoch": 0.1264, "grad_norm": 0.8296606513663367, "learning_rate": 5.62697247831597e-06, "loss": 0.03694215416908264, "step": 13430 }, { "epoch": 0.1264470588235294, "grad_norm": 0.8737860411294357, "learning_rate": 5.625925229516014e-06, "loss": 0.04257403612136841, "step": 13435 }, { "epoch": 0.1264941176470588, "grad_norm": 0.7244667159941589, "learning_rate": 5.624878565216149e-06, "loss": 0.030035778880119324, "step": 13440 }, { "epoch": 0.12654117647058824, "grad_norm": 0.7199013420621623, "learning_rate": 5.623832484872867e-06, "loss": 0.0371870219707489, "step": 13445 }, { "epoch": 0.12658823529411764, "grad_norm": 0.8122562224171642, "learning_rate": 5.6227869879433695e-06, "loss": 0.03213080167770386, "step": 13450 }, { "epoch": 0.12663529411764707, "grad_norm": 0.5557941540247004, "learning_rate": 5.621742073885556e-06, "loss": 0.031230825185775756, "step": 13455 }, { "epoch": 0.12668235294117647, "grad_norm": 0.6014550164476298, "learning_rate": 5.620697742158041e-06, "loss": 0.033616486191749576, "step": 13460 }, { "epoch": 0.1267294117647059, "grad_norm": 0.7652919737422716, "learning_rate": 5.619653992220135e-06, "loss": 0.030807924270629884, "step": 13465 }, { "epoch": 0.1267764705882353, "grad_norm": 0.9228224282524013, "learning_rate": 5.618610823531855e-06, "loss": 0.045375674962997437, "step": 13470 }, { "epoch": 0.1268235294117647, "grad_norm": 0.726329286164751, "learning_rate": 5.6175682355539175e-06, "loss": 0.03265209794044495, "step": 13475 }, { "epoch": 0.12687058823529412, "grad_norm": 0.6380505355686162, "learning_rate": 5.61652622774774e-06, "loss": 0.030457109212875366, "step": 13480 }, { "epoch": 0.12691764705882352, "grad_norm": 0.6489992816492389, "learning_rate": 5.6154847995754414e-06, "loss": 0.028683310747146605, "step": 13485 }, { "epoch": 0.12696470588235295, "grad_norm": 0.6305785981548276, "learning_rate": 5.614443950499834e-06, "loss": 0.02965419888496399, "step": 13490 }, { "epoch": 0.12701176470588235, "grad_norm": 0.6368910312156243, "learning_rate": 5.613403679984429e-06, "loss": 0.03598098754882813, "step": 13495 }, { "epoch": 0.12705882352941175, "grad_norm": 0.6382044336058985, "learning_rate": 5.6123639874934346e-06, "loss": 0.031254523992538454, "step": 13500 }, { "epoch": 0.12710588235294118, "grad_norm": 0.62657494978925, "learning_rate": 5.6113248724917505e-06, "loss": 0.03318123519420624, "step": 13505 }, { "epoch": 0.12715294117647058, "grad_norm": 0.7397740259530402, "learning_rate": 5.610286334444971e-06, "loss": 0.031383943557739255, "step": 13510 }, { "epoch": 0.1272, "grad_norm": 0.49048030800629455, "learning_rate": 5.609248372819385e-06, "loss": 0.030536341667175292, "step": 13515 }, { "epoch": 0.1272470588235294, "grad_norm": 0.9944778254677348, "learning_rate": 5.6082109870819675e-06, "loss": 0.03608282208442688, "step": 13520 }, { "epoch": 0.12729411764705884, "grad_norm": 0.8450734747173498, "learning_rate": 5.607174176700388e-06, "loss": 0.030790776014328003, "step": 13525 }, { "epoch": 0.12734117647058824, "grad_norm": 0.6452387312745224, "learning_rate": 5.606137941142999e-06, "loss": 0.031719714403152466, "step": 13530 }, { "epoch": 0.12738823529411764, "grad_norm": 0.4843161597414873, "learning_rate": 5.605102279878848e-06, "loss": 0.028235119581222535, "step": 13535 }, { "epoch": 0.12743529411764706, "grad_norm": 0.5188722521795053, "learning_rate": 5.604067192377662e-06, "loss": 0.03156610727310181, "step": 13540 }, { "epoch": 0.12748235294117646, "grad_norm": 0.6073130720412029, "learning_rate": 5.603032678109859e-06, "loss": 0.033127039670944214, "step": 13545 }, { "epoch": 0.1275294117647059, "grad_norm": 1.023445921963998, "learning_rate": 5.601998736546533e-06, "loss": 0.04157683253288269, "step": 13550 }, { "epoch": 0.1275764705882353, "grad_norm": 0.6812664317976027, "learning_rate": 5.60096536715947e-06, "loss": 0.028826028108596802, "step": 13555 }, { "epoch": 0.12762352941176472, "grad_norm": 0.728253700700661, "learning_rate": 5.599932569421134e-06, "loss": 0.03436055183410645, "step": 13560 }, { "epoch": 0.12767058823529412, "grad_norm": 0.6712079470988676, "learning_rate": 5.598900342804667e-06, "loss": 0.029187658429145814, "step": 13565 }, { "epoch": 0.12771764705882352, "grad_norm": 0.7739378446381786, "learning_rate": 5.597868686783895e-06, "loss": 0.03126293122768402, "step": 13570 }, { "epoch": 0.12776470588235295, "grad_norm": 0.44419937964659073, "learning_rate": 5.596837600833321e-06, "loss": 0.029105889797210693, "step": 13575 }, { "epoch": 0.12781176470588235, "grad_norm": 0.6042730312475888, "learning_rate": 5.595807084428121e-06, "loss": 0.039990592002868655, "step": 13580 }, { "epoch": 0.12785882352941177, "grad_norm": 0.8682809788919054, "learning_rate": 5.594777137044155e-06, "loss": 0.04261008501052856, "step": 13585 }, { "epoch": 0.12790588235294117, "grad_norm": 0.538734445033724, "learning_rate": 5.593747758157953e-06, "loss": 0.029367968440055847, "step": 13590 }, { "epoch": 0.12795294117647057, "grad_norm": 0.457496167581341, "learning_rate": 5.5927189472467204e-06, "loss": 0.02631683349609375, "step": 13595 }, { "epoch": 0.128, "grad_norm": 0.48174562535755283, "learning_rate": 5.591690703788333e-06, "loss": 0.03164936304092407, "step": 13600 }, { "epoch": 0.1280470588235294, "grad_norm": 0.9379691121288217, "learning_rate": 5.590663027261341e-06, "loss": 0.038506314158439636, "step": 13605 }, { "epoch": 0.12809411764705883, "grad_norm": 0.5599893885684323, "learning_rate": 5.589635917144967e-06, "loss": 0.03587337136268616, "step": 13610 }, { "epoch": 0.12814117647058823, "grad_norm": 0.7498161959542118, "learning_rate": 5.5886093729190975e-06, "loss": 0.031453806161880496, "step": 13615 }, { "epoch": 0.12818823529411766, "grad_norm": 0.4409750545965154, "learning_rate": 5.587583394064292e-06, "loss": 0.028118354082107545, "step": 13620 }, { "epoch": 0.12823529411764706, "grad_norm": 0.6785988969318683, "learning_rate": 5.5865579800617754e-06, "loss": 0.02522073984146118, "step": 13625 }, { "epoch": 0.12828235294117646, "grad_norm": 0.4223352212305825, "learning_rate": 5.585533130393439e-06, "loss": 0.0225381076335907, "step": 13630 }, { "epoch": 0.1283294117647059, "grad_norm": 0.7675381993123069, "learning_rate": 5.584508844541839e-06, "loss": 0.0279252290725708, "step": 13635 }, { "epoch": 0.1283764705882353, "grad_norm": 0.6861231003462619, "learning_rate": 5.583485121990197e-06, "loss": 0.03523307144641876, "step": 13640 }, { "epoch": 0.12842352941176471, "grad_norm": 0.5179605453650505, "learning_rate": 5.582461962222396e-06, "loss": 0.03619098365306854, "step": 13645 }, { "epoch": 0.12847058823529411, "grad_norm": 0.7833125125334545, "learning_rate": 5.581439364722981e-06, "loss": 0.03824434578418732, "step": 13650 }, { "epoch": 0.12851764705882354, "grad_norm": 1.430364800147363, "learning_rate": 5.580417328977159e-06, "loss": 0.029851749539375305, "step": 13655 }, { "epoch": 0.12856470588235294, "grad_norm": 0.6890828973693287, "learning_rate": 5.579395854470793e-06, "loss": 0.03046141266822815, "step": 13660 }, { "epoch": 0.12861176470588234, "grad_norm": 0.5837208752023928, "learning_rate": 5.578374940690409e-06, "loss": 0.028353786468505858, "step": 13665 }, { "epoch": 0.12865882352941177, "grad_norm": 0.8123077787429066, "learning_rate": 5.577354587123188e-06, "loss": 0.0298711359500885, "step": 13670 }, { "epoch": 0.12870588235294117, "grad_norm": 0.9059189745667356, "learning_rate": 5.5763347932569675e-06, "loss": 0.032907629013061525, "step": 13675 }, { "epoch": 0.1287529411764706, "grad_norm": 0.8564287973855371, "learning_rate": 5.575315558580242e-06, "loss": 0.030849766731262208, "step": 13680 }, { "epoch": 0.1288, "grad_norm": 0.639990818760893, "learning_rate": 5.574296882582158e-06, "loss": 0.02840253710746765, "step": 13685 }, { "epoch": 0.12884705882352943, "grad_norm": 0.7815708896444675, "learning_rate": 5.573278764752515e-06, "loss": 0.029610246419906616, "step": 13690 }, { "epoch": 0.12889411764705883, "grad_norm": 0.7170236621809294, "learning_rate": 5.572261204581767e-06, "loss": 0.02680506408214569, "step": 13695 }, { "epoch": 0.12894117647058823, "grad_norm": 0.5980438259223148, "learning_rate": 5.571244201561018e-06, "loss": 0.026246616244316102, "step": 13700 }, { "epoch": 0.12898823529411765, "grad_norm": 0.6896653551529726, "learning_rate": 5.570227755182019e-06, "loss": 0.02980850338935852, "step": 13705 }, { "epoch": 0.12903529411764705, "grad_norm": 0.44873354680009725, "learning_rate": 5.569211864937175e-06, "loss": 0.028987568616867066, "step": 13710 }, { "epoch": 0.12908235294117648, "grad_norm": 0.801748439097349, "learning_rate": 5.568196530319535e-06, "loss": 0.03836403489112854, "step": 13715 }, { "epoch": 0.12912941176470588, "grad_norm": 0.5936365526773827, "learning_rate": 5.567181750822796e-06, "loss": 0.04305022656917572, "step": 13720 }, { "epoch": 0.12917647058823528, "grad_norm": 0.6579365739504089, "learning_rate": 5.566167525941299e-06, "loss": 0.03361262381076813, "step": 13725 }, { "epoch": 0.1292235294117647, "grad_norm": 0.5812873667183119, "learning_rate": 5.565153855170035e-06, "loss": 0.03834758400917053, "step": 13730 }, { "epoch": 0.1292705882352941, "grad_norm": 0.7237869758878741, "learning_rate": 5.56414073800463e-06, "loss": 0.031163206696510314, "step": 13735 }, { "epoch": 0.12931764705882354, "grad_norm": 0.6859365577003389, "learning_rate": 5.56312817394136e-06, "loss": 0.03479980826377869, "step": 13740 }, { "epoch": 0.12936470588235294, "grad_norm": 0.9703272019210588, "learning_rate": 5.562116162477139e-06, "loss": 0.03988204002380371, "step": 13745 }, { "epoch": 0.12941176470588237, "grad_norm": 0.7534129544592214, "learning_rate": 5.561104703109521e-06, "loss": 0.037726521492004395, "step": 13750 }, { "epoch": 0.12945882352941177, "grad_norm": 0.5329600233704197, "learning_rate": 5.560093795336703e-06, "loss": 0.03130621314048767, "step": 13755 }, { "epoch": 0.12950588235294117, "grad_norm": 0.7181741215114792, "learning_rate": 5.5590834386575145e-06, "loss": 0.029773962497711182, "step": 13760 }, { "epoch": 0.1295529411764706, "grad_norm": 0.6726822760254434, "learning_rate": 5.5580736325714265e-06, "loss": 0.037265661358833316, "step": 13765 }, { "epoch": 0.1296, "grad_norm": 0.8392292652111922, "learning_rate": 5.557064376578546e-06, "loss": 0.03911724090576172, "step": 13770 }, { "epoch": 0.12964705882352942, "grad_norm": 0.8382348079658891, "learning_rate": 5.5560556701796146e-06, "loss": 0.038758084177970886, "step": 13775 }, { "epoch": 0.12969411764705882, "grad_norm": 0.6867971919653252, "learning_rate": 5.5550475128760054e-06, "loss": 0.03897936344146728, "step": 13780 }, { "epoch": 0.12974117647058825, "grad_norm": 0.8112075072529572, "learning_rate": 5.554039904169729e-06, "loss": 0.04025636315345764, "step": 13785 }, { "epoch": 0.12978823529411765, "grad_norm": 0.5409737365336867, "learning_rate": 5.553032843563426e-06, "loss": 0.032257181406021115, "step": 13790 }, { "epoch": 0.12983529411764705, "grad_norm": 0.6123806850359292, "learning_rate": 5.552026330560367e-06, "loss": 0.03587201237678528, "step": 13795 }, { "epoch": 0.12988235294117648, "grad_norm": 0.9340849178668427, "learning_rate": 5.551020364664455e-06, "loss": 0.04040040373802185, "step": 13800 }, { "epoch": 0.12992941176470588, "grad_norm": 0.865963520613008, "learning_rate": 5.5500149453802175e-06, "loss": 0.036342006921768186, "step": 13805 }, { "epoch": 0.1299764705882353, "grad_norm": 0.637527470549622, "learning_rate": 5.549010072212817e-06, "loss": 0.03364822268486023, "step": 13810 }, { "epoch": 0.1300235294117647, "grad_norm": 0.6525328970758251, "learning_rate": 5.548005744668038e-06, "loss": 0.03631188273429871, "step": 13815 }, { "epoch": 0.1300705882352941, "grad_norm": 0.8038272822993848, "learning_rate": 5.547001962252292e-06, "loss": 0.03205921947956085, "step": 13820 }, { "epoch": 0.13011764705882353, "grad_norm": 0.8895824828101911, "learning_rate": 5.545998724472613e-06, "loss": 0.035700494050979616, "step": 13825 }, { "epoch": 0.13016470588235293, "grad_norm": 0.4827765138935593, "learning_rate": 5.544996030836665e-06, "loss": 0.026798486709594727, "step": 13830 }, { "epoch": 0.13021176470588236, "grad_norm": 0.5612035704346265, "learning_rate": 5.543993880852728e-06, "loss": 0.029943615198135376, "step": 13835 }, { "epoch": 0.13025882352941176, "grad_norm": 0.6245128316526357, "learning_rate": 5.542992274029708e-06, "loss": 0.03170762360095978, "step": 13840 }, { "epoch": 0.1303058823529412, "grad_norm": 0.5086853267018762, "learning_rate": 5.541991209877131e-06, "loss": 0.02723735272884369, "step": 13845 }, { "epoch": 0.1303529411764706, "grad_norm": 0.6542323803840647, "learning_rate": 5.540990687905143e-06, "loss": 0.038035187125205996, "step": 13850 }, { "epoch": 0.1304, "grad_norm": 0.8117085550329666, "learning_rate": 5.539990707624509e-06, "loss": 0.032417958974838255, "step": 13855 }, { "epoch": 0.13044705882352942, "grad_norm": 0.7307419970484988, "learning_rate": 5.53899126854661e-06, "loss": 0.03201699256896973, "step": 13860 }, { "epoch": 0.13049411764705882, "grad_norm": 0.9721501515581139, "learning_rate": 5.537992370183444e-06, "loss": 0.032233896851539615, "step": 13865 }, { "epoch": 0.13054117647058824, "grad_norm": 1.1838621043013786, "learning_rate": 5.53699401204763e-06, "loss": 0.03150632977485657, "step": 13870 }, { "epoch": 0.13058823529411764, "grad_norm": 0.6513423310275097, "learning_rate": 5.535996193652393e-06, "loss": 0.03219247460365295, "step": 13875 }, { "epoch": 0.13063529411764707, "grad_norm": 0.7106877572380942, "learning_rate": 5.53499891451158e-06, "loss": 0.034478840231895444, "step": 13880 }, { "epoch": 0.13068235294117647, "grad_norm": 0.8046392385725264, "learning_rate": 5.534002174139645e-06, "loss": 0.031781911849975586, "step": 13885 }, { "epoch": 0.13072941176470587, "grad_norm": 0.8324193136078692, "learning_rate": 5.533005972051658e-06, "loss": 0.029876911640167238, "step": 13890 }, { "epoch": 0.1307764705882353, "grad_norm": 0.899411856326074, "learning_rate": 5.532010307763297e-06, "loss": 0.029263800382614134, "step": 13895 }, { "epoch": 0.1308235294117647, "grad_norm": 0.6223112385070424, "learning_rate": 5.5310151807908514e-06, "loss": 0.03755910098552704, "step": 13900 }, { "epoch": 0.13087058823529413, "grad_norm": 0.5255325384692886, "learning_rate": 5.530020590651221e-06, "loss": 0.032255816459655764, "step": 13905 }, { "epoch": 0.13091764705882353, "grad_norm": 0.5360066344224401, "learning_rate": 5.529026536861908e-06, "loss": 0.028789588809013368, "step": 13910 }, { "epoch": 0.13096470588235293, "grad_norm": 1.3834890658612344, "learning_rate": 5.528033018941026e-06, "loss": 0.032376110553741455, "step": 13915 }, { "epoch": 0.13101176470588236, "grad_norm": 0.5346250362132423, "learning_rate": 5.527040036407294e-06, "loss": 0.03277804255485535, "step": 13920 }, { "epoch": 0.13105882352941176, "grad_norm": 0.9301146011728485, "learning_rate": 5.526047588780037e-06, "loss": 0.04054576754570007, "step": 13925 }, { "epoch": 0.13110588235294118, "grad_norm": 0.6454400164373368, "learning_rate": 5.525055675579179e-06, "loss": 0.03212403655052185, "step": 13930 }, { "epoch": 0.13115294117647058, "grad_norm": 0.7726852502723139, "learning_rate": 5.524064296325253e-06, "loss": 0.038525789976119995, "step": 13935 }, { "epoch": 0.1312, "grad_norm": 0.6664388660909654, "learning_rate": 5.52307345053939e-06, "loss": 0.030076193809509277, "step": 13940 }, { "epoch": 0.1312470588235294, "grad_norm": 0.6340231222263606, "learning_rate": 5.522083137743326e-06, "loss": 0.031611430644989016, "step": 13945 }, { "epoch": 0.1312941176470588, "grad_norm": 0.7324246417490771, "learning_rate": 5.521093357459392e-06, "loss": 0.03229658603668213, "step": 13950 }, { "epoch": 0.13134117647058824, "grad_norm": 0.5501813762855973, "learning_rate": 5.52010410921052e-06, "loss": 0.032827168703079224, "step": 13955 }, { "epoch": 0.13138823529411764, "grad_norm": 0.7461204276856097, "learning_rate": 5.519115392520243e-06, "loss": 0.030900794267654418, "step": 13960 }, { "epoch": 0.13143529411764707, "grad_norm": 0.7643386228998444, "learning_rate": 5.518127206912687e-06, "loss": 0.0377467542886734, "step": 13965 }, { "epoch": 0.13148235294117647, "grad_norm": 0.6035797933485156, "learning_rate": 5.517139551912577e-06, "loss": 0.02638685405254364, "step": 13970 }, { "epoch": 0.1315294117647059, "grad_norm": 0.7529722846252438, "learning_rate": 5.516152427045233e-06, "loss": 0.029425573348999024, "step": 13975 }, { "epoch": 0.1315764705882353, "grad_norm": 0.6034819622429786, "learning_rate": 5.51516583183657e-06, "loss": 0.027795678377151488, "step": 13980 }, { "epoch": 0.1316235294117647, "grad_norm": 0.801605055855549, "learning_rate": 5.514179765813092e-06, "loss": 0.031975561380386354, "step": 13985 }, { "epoch": 0.13167058823529412, "grad_norm": 0.492632435724352, "learning_rate": 5.513194228501899e-06, "loss": 0.03230412304401398, "step": 13990 }, { "epoch": 0.13171764705882352, "grad_norm": 0.6693171846792051, "learning_rate": 5.512209219430683e-06, "loss": 0.034048473834991454, "step": 13995 }, { "epoch": 0.13176470588235295, "grad_norm": 0.7036262559835573, "learning_rate": 5.511224738127726e-06, "loss": 0.028574150800704957, "step": 14000 }, { "epoch": 0.13181176470588235, "grad_norm": 0.7227526169655637, "learning_rate": 5.5102407841218964e-06, "loss": 0.03922942578792572, "step": 14005 }, { "epoch": 0.13185882352941178, "grad_norm": 0.6719708092607057, "learning_rate": 5.509257356942655e-06, "loss": 0.03482729196548462, "step": 14010 }, { "epoch": 0.13190588235294118, "grad_norm": 0.642257553894726, "learning_rate": 5.50827445612005e-06, "loss": 0.029954928159713744, "step": 14015 }, { "epoch": 0.13195294117647058, "grad_norm": 1.1964299971737762, "learning_rate": 5.507292081184713e-06, "loss": 0.030606240034103394, "step": 14020 }, { "epoch": 0.132, "grad_norm": 0.5997841855936565, "learning_rate": 5.5063102316678665e-06, "loss": 0.0333208292722702, "step": 14025 }, { "epoch": 0.1320470588235294, "grad_norm": 0.9758421457580362, "learning_rate": 5.5053289071013115e-06, "loss": 0.03710763454437256, "step": 14030 }, { "epoch": 0.13209411764705883, "grad_norm": 0.8479307464698378, "learning_rate": 5.504348107017439e-06, "loss": 0.03428418636322021, "step": 14035 }, { "epoch": 0.13214117647058823, "grad_norm": 1.4580472103652278, "learning_rate": 5.503367830949217e-06, "loss": 0.03586077094078064, "step": 14040 }, { "epoch": 0.13218823529411763, "grad_norm": 0.7187654482284074, "learning_rate": 5.502388078430202e-06, "loss": 0.037953895330429074, "step": 14045 }, { "epoch": 0.13223529411764706, "grad_norm": 0.5587369499883026, "learning_rate": 5.501408848994527e-06, "loss": 0.025888583064079283, "step": 14050 }, { "epoch": 0.13228235294117646, "grad_norm": 0.742440225054725, "learning_rate": 5.500430142176907e-06, "loss": 0.03198830783367157, "step": 14055 }, { "epoch": 0.1323294117647059, "grad_norm": 0.6066069926054526, "learning_rate": 5.499451957512635e-06, "loss": 0.03169106543064117, "step": 14060 }, { "epoch": 0.1323764705882353, "grad_norm": 0.7257042515162766, "learning_rate": 5.498474294537582e-06, "loss": 0.03543115258216858, "step": 14065 }, { "epoch": 0.13242352941176472, "grad_norm": 0.6269666244875908, "learning_rate": 5.497497152788199e-06, "loss": 0.03769022226333618, "step": 14070 }, { "epoch": 0.13247058823529412, "grad_norm": 0.7927089018388997, "learning_rate": 5.4965205318015116e-06, "loss": 0.03293606638908386, "step": 14075 }, { "epoch": 0.13251764705882352, "grad_norm": 0.6779592128408289, "learning_rate": 5.4955444311151195e-06, "loss": 0.031747865676879886, "step": 14080 }, { "epoch": 0.13256470588235295, "grad_norm": 0.5838965112048918, "learning_rate": 5.4945688502672e-06, "loss": 0.026716211438179018, "step": 14085 }, { "epoch": 0.13261176470588235, "grad_norm": 0.8853065764263464, "learning_rate": 5.493593788796502e-06, "loss": 0.0340876042842865, "step": 14090 }, { "epoch": 0.13265882352941177, "grad_norm": 0.546507733309483, "learning_rate": 5.4926192462423474e-06, "loss": 0.03405225872993469, "step": 14095 }, { "epoch": 0.13270588235294117, "grad_norm": 0.5417632022328692, "learning_rate": 5.4916452221446326e-06, "loss": 0.027916014194488525, "step": 14100 }, { "epoch": 0.1327529411764706, "grad_norm": 0.6960668976239032, "learning_rate": 5.4906717160438205e-06, "loss": 0.037732255458831784, "step": 14105 }, { "epoch": 0.1328, "grad_norm": 0.9452550438704236, "learning_rate": 5.4896987274809464e-06, "loss": 0.0316447913646698, "step": 14110 }, { "epoch": 0.1328470588235294, "grad_norm": 0.6832957912785176, "learning_rate": 5.488726255997615e-06, "loss": 0.03594295978546143, "step": 14115 }, { "epoch": 0.13289411764705883, "grad_norm": 0.8394048195826921, "learning_rate": 5.487754301136e-06, "loss": 0.03062642514705658, "step": 14120 }, { "epoch": 0.13294117647058823, "grad_norm": 0.586618338110392, "learning_rate": 5.48678286243884e-06, "loss": 0.03136046826839447, "step": 14125 }, { "epoch": 0.13298823529411766, "grad_norm": 0.7156068880406437, "learning_rate": 5.485811939449439e-06, "loss": 0.02707921266555786, "step": 14130 }, { "epoch": 0.13303529411764706, "grad_norm": 0.5839084368460317, "learning_rate": 5.484841531711672e-06, "loss": 0.032805907726287845, "step": 14135 }, { "epoch": 0.13308235294117646, "grad_norm": 0.46930120805160114, "learning_rate": 5.483871638769975e-06, "loss": 0.027813538908958435, "step": 14140 }, { "epoch": 0.13312941176470589, "grad_norm": 1.0539263262676006, "learning_rate": 5.482902260169347e-06, "loss": 0.03204401731491089, "step": 14145 }, { "epoch": 0.13317647058823529, "grad_norm": 0.942520805887193, "learning_rate": 5.481933395455352e-06, "loss": 0.03986449241638183, "step": 14150 }, { "epoch": 0.1332235294117647, "grad_norm": 0.7309052952679472, "learning_rate": 5.480965044174112e-06, "loss": 0.02667299211025238, "step": 14155 }, { "epoch": 0.1332705882352941, "grad_norm": 0.5904632099639343, "learning_rate": 5.479997205872316e-06, "loss": 0.034450864791870116, "step": 14160 }, { "epoch": 0.13331764705882354, "grad_norm": 0.7494855722926484, "learning_rate": 5.479029880097208e-06, "loss": 0.02954995632171631, "step": 14165 }, { "epoch": 0.13336470588235294, "grad_norm": 0.6346110521181013, "learning_rate": 5.478063066396594e-06, "loss": 0.03465659618377685, "step": 14170 }, { "epoch": 0.13341176470588234, "grad_norm": 0.4280608928292882, "learning_rate": 5.477096764318837e-06, "loss": 0.03121453821659088, "step": 14175 }, { "epoch": 0.13345882352941177, "grad_norm": 0.7783493402139501, "learning_rate": 5.476130973412858e-06, "loss": 0.03497443199157715, "step": 14180 }, { "epoch": 0.13350588235294117, "grad_norm": 2.1498221324170013, "learning_rate": 5.4751656932281336e-06, "loss": 0.038055920600891115, "step": 14185 }, { "epoch": 0.1335529411764706, "grad_norm": 1.0138207953670366, "learning_rate": 5.4742009233147e-06, "loss": 0.03573296368122101, "step": 14190 }, { "epoch": 0.1336, "grad_norm": 0.7434173514653755, "learning_rate": 5.4732366632231424e-06, "loss": 0.0318257749080658, "step": 14195 }, { "epoch": 0.13364705882352942, "grad_norm": 0.5887195054406159, "learning_rate": 5.472272912504604e-06, "loss": 0.03506190180778503, "step": 14200 }, { "epoch": 0.13369411764705882, "grad_norm": 0.6948789430909249, "learning_rate": 5.471309670710779e-06, "loss": 0.03374133706092834, "step": 14205 }, { "epoch": 0.13374117647058822, "grad_norm": 0.63132096552691, "learning_rate": 5.470346937393916e-06, "loss": 0.044961458444595336, "step": 14210 }, { "epoch": 0.13378823529411765, "grad_norm": 0.7538764703841163, "learning_rate": 5.469384712106812e-06, "loss": 0.03496087789535522, "step": 14215 }, { "epoch": 0.13383529411764705, "grad_norm": 0.839668744521117, "learning_rate": 5.468422994402819e-06, "loss": 0.028087007999420165, "step": 14220 }, { "epoch": 0.13388235294117648, "grad_norm": 0.4232377238957358, "learning_rate": 5.4674617838358315e-06, "loss": 0.026316967606544495, "step": 14225 }, { "epoch": 0.13392941176470588, "grad_norm": 0.6826532737506406, "learning_rate": 5.466501079960299e-06, "loss": 0.034760463237762454, "step": 14230 }, { "epoch": 0.1339764705882353, "grad_norm": 0.6262636047882564, "learning_rate": 5.465540882331217e-06, "loss": 0.024907347559928895, "step": 14235 }, { "epoch": 0.1340235294117647, "grad_norm": 0.6499063414565585, "learning_rate": 5.464581190504126e-06, "loss": 0.03384014368057251, "step": 14240 }, { "epoch": 0.1340705882352941, "grad_norm": 0.6902586908559516, "learning_rate": 5.463622004035117e-06, "loss": 0.037448936700820924, "step": 14245 }, { "epoch": 0.13411764705882354, "grad_norm": 0.8853337864045993, "learning_rate": 5.462663322480819e-06, "loss": 0.03663384318351746, "step": 14250 }, { "epoch": 0.13416470588235294, "grad_norm": 0.627596592855335, "learning_rate": 5.461705145398414e-06, "loss": 0.034906649589538576, "step": 14255 }, { "epoch": 0.13421176470588236, "grad_norm": 0.6669074502698402, "learning_rate": 5.460747472345621e-06, "loss": 0.03035165071487427, "step": 14260 }, { "epoch": 0.13425882352941176, "grad_norm": 0.8736176404222672, "learning_rate": 5.459790302880705e-06, "loss": 0.03004782795906067, "step": 14265 }, { "epoch": 0.13430588235294116, "grad_norm": 0.8044201545076362, "learning_rate": 5.458833636562472e-06, "loss": 0.03186747431755066, "step": 14270 }, { "epoch": 0.1343529411764706, "grad_norm": 0.7913444865127593, "learning_rate": 5.457877472950267e-06, "loss": 0.02669689655303955, "step": 14275 }, { "epoch": 0.1344, "grad_norm": 0.6666642704433152, "learning_rate": 5.456921811603978e-06, "loss": 0.031921207904815674, "step": 14280 }, { "epoch": 0.13444705882352942, "grad_norm": 0.84804095465588, "learning_rate": 5.455966652084031e-06, "loss": 0.029948705434799196, "step": 14285 }, { "epoch": 0.13449411764705882, "grad_norm": 0.5820270177066204, "learning_rate": 5.455011993951393e-06, "loss": 0.028620603680610656, "step": 14290 }, { "epoch": 0.13454117647058825, "grad_norm": 0.9377691993476995, "learning_rate": 5.454057836767561e-06, "loss": 0.032320863008499144, "step": 14295 }, { "epoch": 0.13458823529411765, "grad_norm": 0.6890067181737142, "learning_rate": 5.453104180094579e-06, "loss": 0.029891854524612425, "step": 14300 }, { "epoch": 0.13463529411764705, "grad_norm": 0.6148958225761018, "learning_rate": 5.452151023495018e-06, "loss": 0.02835797965526581, "step": 14305 }, { "epoch": 0.13468235294117648, "grad_norm": 0.6447106831512697, "learning_rate": 5.451198366531988e-06, "loss": 0.030442127585411073, "step": 14310 }, { "epoch": 0.13472941176470588, "grad_norm": 0.655958450550425, "learning_rate": 5.4502462087691345e-06, "loss": 0.035392552614212036, "step": 14315 }, { "epoch": 0.1347764705882353, "grad_norm": 0.911628252924098, "learning_rate": 5.449294549770634e-06, "loss": 0.028690442442893982, "step": 14320 }, { "epoch": 0.1348235294117647, "grad_norm": 0.8906578063040379, "learning_rate": 5.448343389101195e-06, "loss": 0.027636954188346864, "step": 14325 }, { "epoch": 0.13487058823529413, "grad_norm": 0.6481550625446862, "learning_rate": 5.447392726326061e-06, "loss": 0.03119848668575287, "step": 14330 }, { "epoch": 0.13491764705882353, "grad_norm": 0.4943514763590652, "learning_rate": 5.446442561011002e-06, "loss": 0.03633966445922852, "step": 14335 }, { "epoch": 0.13496470588235293, "grad_norm": 0.6609661303194321, "learning_rate": 5.445492892722322e-06, "loss": 0.028402036428451537, "step": 14340 }, { "epoch": 0.13501176470588236, "grad_norm": 0.7668784218764074, "learning_rate": 5.444543721026851e-06, "loss": 0.0319782018661499, "step": 14345 }, { "epoch": 0.13505882352941176, "grad_norm": 1.4617899371240357, "learning_rate": 5.443595045491949e-06, "loss": 0.03079531490802765, "step": 14350 }, { "epoch": 0.1351058823529412, "grad_norm": 0.580057272302386, "learning_rate": 5.442646865685502e-06, "loss": 0.039338767528533936, "step": 14355 }, { "epoch": 0.1351529411764706, "grad_norm": 0.8001058141048758, "learning_rate": 5.441699181175928e-06, "loss": 0.03416953682899475, "step": 14360 }, { "epoch": 0.1352, "grad_norm": 0.5996110791150171, "learning_rate": 5.440751991532162e-06, "loss": 0.03427160680294037, "step": 14365 }, { "epoch": 0.13524705882352941, "grad_norm": 0.6013688377051117, "learning_rate": 5.439805296323671e-06, "loss": 0.028138113021850587, "step": 14370 }, { "epoch": 0.13529411764705881, "grad_norm": 0.8741808657276854, "learning_rate": 5.438859095120443e-06, "loss": 0.03796155452728271, "step": 14375 }, { "epoch": 0.13534117647058824, "grad_norm": 0.5625517066691638, "learning_rate": 5.437913387492993e-06, "loss": 0.03030121624469757, "step": 14380 }, { "epoch": 0.13538823529411764, "grad_norm": 0.6126195068282024, "learning_rate": 5.4369681730123525e-06, "loss": 0.03245899677276611, "step": 14385 }, { "epoch": 0.13543529411764707, "grad_norm": 0.6808019980198362, "learning_rate": 5.436023451250081e-06, "loss": 0.028560370206832886, "step": 14390 }, { "epoch": 0.13548235294117647, "grad_norm": 0.7678934587010464, "learning_rate": 5.435079221778254e-06, "loss": 0.035806530714035036, "step": 14395 }, { "epoch": 0.13552941176470587, "grad_norm": 0.591319451803678, "learning_rate": 5.434135484169468e-06, "loss": 0.03084932565689087, "step": 14400 }, { "epoch": 0.1355764705882353, "grad_norm": 0.6694540899104744, "learning_rate": 5.433192237996845e-06, "loss": 0.026224666833877565, "step": 14405 }, { "epoch": 0.1356235294117647, "grad_norm": 0.48381698160912506, "learning_rate": 5.4322494828340166e-06, "loss": 0.023095104098320007, "step": 14410 }, { "epoch": 0.13567058823529413, "grad_norm": 1.0445476572994141, "learning_rate": 5.431307218255137e-06, "loss": 0.029816699028015137, "step": 14415 }, { "epoch": 0.13571764705882353, "grad_norm": 0.5555642975774214, "learning_rate": 5.430365443834876e-06, "loss": 0.03373660445213318, "step": 14420 }, { "epoch": 0.13576470588235295, "grad_norm": 0.898270960967488, "learning_rate": 5.429424159148421e-06, "loss": 0.03428331017494202, "step": 14425 }, { "epoch": 0.13581176470588235, "grad_norm": 0.9990116743913209, "learning_rate": 5.428483363771473e-06, "loss": 0.024563135206699373, "step": 14430 }, { "epoch": 0.13585882352941175, "grad_norm": 0.6200200593725896, "learning_rate": 5.4275430572802476e-06, "loss": 0.03216401040554047, "step": 14435 }, { "epoch": 0.13590588235294118, "grad_norm": 0.7536981262060971, "learning_rate": 5.426603239251474e-06, "loss": 0.032440537214279176, "step": 14440 }, { "epoch": 0.13595294117647058, "grad_norm": 0.8033198326078809, "learning_rate": 5.425663909262396e-06, "loss": 0.02974311411380768, "step": 14445 }, { "epoch": 0.136, "grad_norm": 0.911380839403401, "learning_rate": 5.424725066890767e-06, "loss": 0.03557202517986298, "step": 14450 }, { "epoch": 0.1360470588235294, "grad_norm": 0.6323895094959305, "learning_rate": 5.423786711714854e-06, "loss": 0.02644728124141693, "step": 14455 }, { "epoch": 0.1360941176470588, "grad_norm": 1.033763827706758, "learning_rate": 5.422848843313432e-06, "loss": 0.03416312336921692, "step": 14460 }, { "epoch": 0.13614117647058824, "grad_norm": 0.6515706732901093, "learning_rate": 5.421911461265788e-06, "loss": 0.028371673822402955, "step": 14465 }, { "epoch": 0.13618823529411764, "grad_norm": 0.6177692782384927, "learning_rate": 5.4209745651517165e-06, "loss": 0.03627237975597382, "step": 14470 }, { "epoch": 0.13623529411764707, "grad_norm": 0.6762636158569948, "learning_rate": 5.420038154551522e-06, "loss": 0.028930652141571044, "step": 14475 }, { "epoch": 0.13628235294117647, "grad_norm": 0.6044806294420527, "learning_rate": 5.4191022290460115e-06, "loss": 0.03265725374221802, "step": 14480 }, { "epoch": 0.1363294117647059, "grad_norm": 0.6830828037778673, "learning_rate": 5.418166788216504e-06, "loss": 0.0278777539730072, "step": 14485 }, { "epoch": 0.1363764705882353, "grad_norm": 0.4926138365920069, "learning_rate": 5.417231831644821e-06, "loss": 0.03270229399204254, "step": 14490 }, { "epoch": 0.1364235294117647, "grad_norm": 0.9236672099581588, "learning_rate": 5.416297358913293e-06, "loss": 0.041624560952186584, "step": 14495 }, { "epoch": 0.13647058823529412, "grad_norm": 0.9538345625939273, "learning_rate": 5.415363369604748e-06, "loss": 0.030811387300491332, "step": 14500 }, { "epoch": 0.13651764705882352, "grad_norm": 0.5632026290167096, "learning_rate": 5.414429863302523e-06, "loss": 0.030437681078910827, "step": 14505 }, { "epoch": 0.13656470588235295, "grad_norm": 0.6362527435676844, "learning_rate": 5.413496839590456e-06, "loss": 0.03586138486862182, "step": 14510 }, { "epoch": 0.13661176470588235, "grad_norm": 0.7224779502845559, "learning_rate": 5.412564298052884e-06, "loss": 0.032056450843811035, "step": 14515 }, { "epoch": 0.13665882352941178, "grad_norm": 0.7052341887472433, "learning_rate": 5.41163223827465e-06, "loss": 0.03739778399467468, "step": 14520 }, { "epoch": 0.13670588235294118, "grad_norm": 0.615854845467619, "learning_rate": 5.410700659841094e-06, "loss": 0.033133527636528014, "step": 14525 }, { "epoch": 0.13675294117647058, "grad_norm": 0.6076876477194484, "learning_rate": 5.4097695623380565e-06, "loss": 0.027601510286331177, "step": 14530 }, { "epoch": 0.1368, "grad_norm": 0.7697171108686361, "learning_rate": 5.408838945351876e-06, "loss": 0.034676623344421384, "step": 14535 }, { "epoch": 0.1368470588235294, "grad_norm": 0.5936370682342624, "learning_rate": 5.407908808469391e-06, "loss": 0.032131677865982054, "step": 14540 }, { "epoch": 0.13689411764705883, "grad_norm": 0.975453184072293, "learning_rate": 5.406979151277934e-06, "loss": 0.031925898790359494, "step": 14545 }, { "epoch": 0.13694117647058823, "grad_norm": 0.6928222051221248, "learning_rate": 5.406049973365335e-06, "loss": 0.03008500337600708, "step": 14550 }, { "epoch": 0.13698823529411766, "grad_norm": 0.6849047978168212, "learning_rate": 5.405121274319922e-06, "loss": 0.031487184762954715, "step": 14555 }, { "epoch": 0.13703529411764706, "grad_norm": 0.6622585846007988, "learning_rate": 5.404193053730515e-06, "loss": 0.03512322306632996, "step": 14560 }, { "epoch": 0.13708235294117646, "grad_norm": 0.6646088819291057, "learning_rate": 5.403265311186431e-06, "loss": 0.027960577607154848, "step": 14565 }, { "epoch": 0.1371294117647059, "grad_norm": 1.031267821195405, "learning_rate": 5.402338046277475e-06, "loss": 0.030295974016189574, "step": 14570 }, { "epoch": 0.1371764705882353, "grad_norm": 0.6651296339064879, "learning_rate": 5.401411258593952e-06, "loss": 0.03355205655097961, "step": 14575 }, { "epoch": 0.13722352941176472, "grad_norm": 0.6504286175104048, "learning_rate": 5.400484947726651e-06, "loss": 0.03178062438964844, "step": 14580 }, { "epoch": 0.13727058823529412, "grad_norm": 0.865066416293786, "learning_rate": 5.3995591132668604e-06, "loss": 0.037317830324172976, "step": 14585 }, { "epoch": 0.13731764705882352, "grad_norm": 0.8077960576977122, "learning_rate": 5.3986337548063505e-06, "loss": 0.030862876772880556, "step": 14590 }, { "epoch": 0.13736470588235294, "grad_norm": 1.075471278428871, "learning_rate": 5.397708871937388e-06, "loss": 0.02964836359024048, "step": 14595 }, { "epoch": 0.13741176470588234, "grad_norm": 0.7132959078943779, "learning_rate": 5.396784464252724e-06, "loss": 0.02947066128253937, "step": 14600 }, { "epoch": 0.13745882352941177, "grad_norm": 0.957348894164688, "learning_rate": 5.395860531345602e-06, "loss": 0.032760947942733765, "step": 14605 }, { "epoch": 0.13750588235294117, "grad_norm": 0.6248689147881906, "learning_rate": 5.394937072809745e-06, "loss": 0.030763772130012513, "step": 14610 }, { "epoch": 0.1375529411764706, "grad_norm": 0.5559646887359814, "learning_rate": 5.394014088239371e-06, "loss": 0.0346372127532959, "step": 14615 }, { "epoch": 0.1376, "grad_norm": 0.6673229554949183, "learning_rate": 5.39309157722918e-06, "loss": 0.033849304914474486, "step": 14620 }, { "epoch": 0.1376470588235294, "grad_norm": 0.4867130266673068, "learning_rate": 5.392169539374358e-06, "loss": 0.02693074941635132, "step": 14625 }, { "epoch": 0.13769411764705883, "grad_norm": 0.5085397392951787, "learning_rate": 5.3912479742705736e-06, "loss": 0.027558553218841552, "step": 14630 }, { "epoch": 0.13774117647058823, "grad_norm": 0.5971184458412307, "learning_rate": 5.39032688151398e-06, "loss": 0.03355790674686432, "step": 14635 }, { "epoch": 0.13778823529411766, "grad_norm": 0.8907957039659972, "learning_rate": 5.389406260701214e-06, "loss": 0.03184449076652527, "step": 14640 }, { "epoch": 0.13783529411764706, "grad_norm": 0.8963284838001712, "learning_rate": 5.388486111429394e-06, "loss": 0.03018048405647278, "step": 14645 }, { "epoch": 0.13788235294117648, "grad_norm": 0.6204017933491331, "learning_rate": 5.387566433296119e-06, "loss": 0.03288467526435852, "step": 14650 }, { "epoch": 0.13792941176470588, "grad_norm": 0.8502167489824154, "learning_rate": 5.3866472258994684e-06, "loss": 0.032530495524406434, "step": 14655 }, { "epoch": 0.13797647058823528, "grad_norm": 0.6123709416353659, "learning_rate": 5.385728488838003e-06, "loss": 0.03026779294013977, "step": 14660 }, { "epoch": 0.1380235294117647, "grad_norm": 0.5409753235294019, "learning_rate": 5.384810221710761e-06, "loss": 0.02641051113605499, "step": 14665 }, { "epoch": 0.1380705882352941, "grad_norm": 0.6817608112040232, "learning_rate": 5.383892424117262e-06, "loss": 0.03199461102485657, "step": 14670 }, { "epoch": 0.13811764705882354, "grad_norm": 0.8098028388593681, "learning_rate": 5.382975095657499e-06, "loss": 0.033729666471481325, "step": 14675 }, { "epoch": 0.13816470588235294, "grad_norm": 0.5995144369181868, "learning_rate": 5.382058235931945e-06, "loss": 0.030753591656684877, "step": 14680 }, { "epoch": 0.13821176470588234, "grad_norm": 0.9430563537698874, "learning_rate": 5.381141844541548e-06, "loss": 0.04433723390102386, "step": 14685 }, { "epoch": 0.13825882352941177, "grad_norm": 0.6323654952123713, "learning_rate": 5.380225921087731e-06, "loss": 0.03275440037250519, "step": 14690 }, { "epoch": 0.13830588235294117, "grad_norm": 0.5672746098424086, "learning_rate": 5.379310465172393e-06, "loss": 0.02854100465774536, "step": 14695 }, { "epoch": 0.1383529411764706, "grad_norm": 0.9628464493164727, "learning_rate": 5.378395476397905e-06, "loss": 0.03445138931274414, "step": 14700 }, { "epoch": 0.1384, "grad_norm": 0.6398934008243716, "learning_rate": 5.377480954367117e-06, "loss": 0.033738231658935545, "step": 14705 }, { "epoch": 0.13844705882352942, "grad_norm": 0.47065089137398936, "learning_rate": 5.376566898683342e-06, "loss": 0.02966102957725525, "step": 14710 }, { "epoch": 0.13849411764705882, "grad_norm": 0.6876384221380422, "learning_rate": 5.375653308950372e-06, "loss": 0.028327751159667968, "step": 14715 }, { "epoch": 0.13854117647058822, "grad_norm": 0.46104573074179567, "learning_rate": 5.37474018477247e-06, "loss": 0.03407562971115112, "step": 14720 }, { "epoch": 0.13858823529411765, "grad_norm": 0.5956953337746909, "learning_rate": 5.373827525754366e-06, "loss": 0.028458574414253236, "step": 14725 }, { "epoch": 0.13863529411764705, "grad_norm": 0.47764214245064207, "learning_rate": 5.37291533150126e-06, "loss": 0.03670371174812317, "step": 14730 }, { "epoch": 0.13868235294117648, "grad_norm": 0.8036470910238891, "learning_rate": 5.372003601618825e-06, "loss": 0.0328134149312973, "step": 14735 }, { "epoch": 0.13872941176470588, "grad_norm": 0.5750671189249238, "learning_rate": 5.371092335713196e-06, "loss": 0.02792399227619171, "step": 14740 }, { "epoch": 0.1387764705882353, "grad_norm": 0.5784444958333267, "learning_rate": 5.3701815333909825e-06, "loss": 0.02632942795753479, "step": 14745 }, { "epoch": 0.1388235294117647, "grad_norm": 0.6751655437715335, "learning_rate": 5.3692711942592555e-06, "loss": 0.0416896253824234, "step": 14750 }, { "epoch": 0.1388705882352941, "grad_norm": 0.7339531853453191, "learning_rate": 5.368361317925553e-06, "loss": 0.02851350009441376, "step": 14755 }, { "epoch": 0.13891764705882353, "grad_norm": 0.7688597751040362, "learning_rate": 5.36745190399788e-06, "loss": 0.03436779379844666, "step": 14760 }, { "epoch": 0.13896470588235293, "grad_norm": 0.5644656974207518, "learning_rate": 5.366542952084707e-06, "loss": 0.031123992800712586, "step": 14765 }, { "epoch": 0.13901176470588236, "grad_norm": 0.598937165663213, "learning_rate": 5.365634461794965e-06, "loss": 0.026286488771438597, "step": 14770 }, { "epoch": 0.13905882352941176, "grad_norm": 0.7034170631251848, "learning_rate": 5.36472643273805e-06, "loss": 0.03141607046127319, "step": 14775 }, { "epoch": 0.1391058823529412, "grad_norm": 0.8361004701418854, "learning_rate": 5.363818864523821e-06, "loss": 0.03534872531890869, "step": 14780 }, { "epoch": 0.1391529411764706, "grad_norm": 0.5485481469557578, "learning_rate": 5.3629117567626e-06, "loss": 0.028325870633125305, "step": 14785 }, { "epoch": 0.1392, "grad_norm": 1.0246684006971554, "learning_rate": 5.362005109065167e-06, "loss": 0.0395535409450531, "step": 14790 }, { "epoch": 0.13924705882352942, "grad_norm": 0.6439569424701688, "learning_rate": 5.361098921042766e-06, "loss": 0.03427715003490448, "step": 14795 }, { "epoch": 0.13929411764705882, "grad_norm": 0.63008821050164, "learning_rate": 5.360193192307096e-06, "loss": 0.031155937910079957, "step": 14800 }, { "epoch": 0.13934117647058825, "grad_norm": 1.0254826270763138, "learning_rate": 5.35928792247032e-06, "loss": 0.034743040800094604, "step": 14805 }, { "epoch": 0.13938823529411765, "grad_norm": 0.6324958006397775, "learning_rate": 5.3583831111450566e-06, "loss": 0.03142064809799194, "step": 14810 }, { "epoch": 0.13943529411764705, "grad_norm": 0.5242076346219458, "learning_rate": 5.357478757944384e-06, "loss": 0.022482968866825104, "step": 14815 }, { "epoch": 0.13948235294117647, "grad_norm": 0.5483516480446794, "learning_rate": 5.356574862481836e-06, "loss": 0.03137465119361878, "step": 14820 }, { "epoch": 0.13952941176470587, "grad_norm": 0.6898943980904018, "learning_rate": 5.3556714243714e-06, "loss": 0.027100342512130737, "step": 14825 }, { "epoch": 0.1395764705882353, "grad_norm": 0.7307886442256554, "learning_rate": 5.354768443227526e-06, "loss": 0.02977488040924072, "step": 14830 }, { "epoch": 0.1396235294117647, "grad_norm": 0.7251668374274081, "learning_rate": 5.353865918665112e-06, "loss": 0.033688515424728394, "step": 14835 }, { "epoch": 0.13967058823529413, "grad_norm": 0.905339745304083, "learning_rate": 5.352963850299513e-06, "loss": 0.03299137353897095, "step": 14840 }, { "epoch": 0.13971764705882353, "grad_norm": 0.7745463995688452, "learning_rate": 5.3520622377465385e-06, "loss": 0.03330678343772888, "step": 14845 }, { "epoch": 0.13976470588235293, "grad_norm": 0.6670709765303382, "learning_rate": 5.351161080622448e-06, "loss": 0.028285223245620727, "step": 14850 }, { "epoch": 0.13981176470588236, "grad_norm": 0.5918188654578459, "learning_rate": 5.350260378543956e-06, "loss": 0.021666842699050903, "step": 14855 }, { "epoch": 0.13985882352941176, "grad_norm": 1.2408961646465537, "learning_rate": 5.349360131128228e-06, "loss": 0.034076863527297975, "step": 14860 }, { "epoch": 0.13990588235294119, "grad_norm": 0.6732428131298933, "learning_rate": 5.348460337992878e-06, "loss": 0.032374399900436404, "step": 14865 }, { "epoch": 0.13995294117647059, "grad_norm": 0.6344571562390413, "learning_rate": 5.3475609987559714e-06, "loss": 0.036966067552566526, "step": 14870 }, { "epoch": 0.14, "grad_norm": 0.6489552753840792, "learning_rate": 5.346662113036025e-06, "loss": 0.036713513731956485, "step": 14875 }, { "epoch": 0.1400470588235294, "grad_norm": 2.6023757882709897, "learning_rate": 5.345763680452002e-06, "loss": 0.029283398389816286, "step": 14880 }, { "epoch": 0.1400941176470588, "grad_norm": 0.7995827002176208, "learning_rate": 5.344865700623314e-06, "loss": 0.03599819839000702, "step": 14885 }, { "epoch": 0.14014117647058824, "grad_norm": 0.9239531685666433, "learning_rate": 5.343968173169819e-06, "loss": 0.03186903595924377, "step": 14890 }, { "epoch": 0.14018823529411764, "grad_norm": 0.7809853747005954, "learning_rate": 5.343071097711825e-06, "loss": 0.029783570766448976, "step": 14895 }, { "epoch": 0.14023529411764707, "grad_norm": 0.7766888296711263, "learning_rate": 5.342174473870082e-06, "loss": 0.03449026346206665, "step": 14900 }, { "epoch": 0.14028235294117647, "grad_norm": 0.935762467116178, "learning_rate": 5.341278301265788e-06, "loss": 0.03304874897003174, "step": 14905 }, { "epoch": 0.14032941176470587, "grad_norm": 0.7347878801944584, "learning_rate": 5.340382579520584e-06, "loss": 0.02808782160282135, "step": 14910 }, { "epoch": 0.1403764705882353, "grad_norm": 0.7911530038717672, "learning_rate": 5.339487308256558e-06, "loss": 0.03505178391933441, "step": 14915 }, { "epoch": 0.1404235294117647, "grad_norm": 0.7637817981186719, "learning_rate": 5.338592487096237e-06, "loss": 0.03477252721786499, "step": 14920 }, { "epoch": 0.14047058823529412, "grad_norm": 0.7367739406351954, "learning_rate": 5.337698115662594e-06, "loss": 0.028046479821205138, "step": 14925 }, { "epoch": 0.14051764705882352, "grad_norm": 0.8083475088400116, "learning_rate": 5.3368041935790425e-06, "loss": 0.028848737478256226, "step": 14930 }, { "epoch": 0.14056470588235295, "grad_norm": 0.8123504369925219, "learning_rate": 5.335910720469438e-06, "loss": 0.035100039839744565, "step": 14935 }, { "epoch": 0.14061176470588235, "grad_norm": 0.6550438896950685, "learning_rate": 5.3350176959580765e-06, "loss": 0.028003168106079102, "step": 14940 }, { "epoch": 0.14065882352941175, "grad_norm": 0.7036800498084554, "learning_rate": 5.334125119669693e-06, "loss": 0.031524908542633054, "step": 14945 }, { "epoch": 0.14070588235294118, "grad_norm": 0.5242677362945334, "learning_rate": 5.3332329912294646e-06, "loss": 0.028888171911239623, "step": 14950 }, { "epoch": 0.14075294117647058, "grad_norm": 0.4799882620613411, "learning_rate": 5.332341310263003e-06, "loss": 0.026935264468193054, "step": 14955 }, { "epoch": 0.1408, "grad_norm": 0.7914481569951834, "learning_rate": 5.331450076396361e-06, "loss": 0.030904191732406616, "step": 14960 }, { "epoch": 0.1408470588235294, "grad_norm": 0.8630797452354374, "learning_rate": 5.330559289256028e-06, "loss": 0.03238801956176758, "step": 14965 }, { "epoch": 0.14089411764705884, "grad_norm": 0.7016038759383493, "learning_rate": 5.329668948468929e-06, "loss": 0.0383946418762207, "step": 14970 }, { "epoch": 0.14094117647058824, "grad_norm": 0.6533967680995073, "learning_rate": 5.328779053662429e-06, "loss": 0.03154750466346741, "step": 14975 }, { "epoch": 0.14098823529411764, "grad_norm": 0.6883424653523813, "learning_rate": 5.327889604464322e-06, "loss": 0.03191099762916565, "step": 14980 }, { "epoch": 0.14103529411764706, "grad_norm": 0.5971181824781723, "learning_rate": 5.32700060050284e-06, "loss": 0.02760363221168518, "step": 14985 }, { "epoch": 0.14108235294117646, "grad_norm": 0.7905203316005485, "learning_rate": 5.326112041406652e-06, "loss": 0.030827933549880983, "step": 14990 }, { "epoch": 0.1411294117647059, "grad_norm": 0.6722290838929763, "learning_rate": 5.325223926804854e-06, "loss": 0.03099483847618103, "step": 14995 }, { "epoch": 0.1411764705882353, "grad_norm": 0.5433967670019817, "learning_rate": 5.324336256326983e-06, "loss": 0.029137074947357178, "step": 15000 }, { "epoch": 0.1412235294117647, "grad_norm": 0.7759205997158956, "learning_rate": 5.323449029603001e-06, "loss": 0.03243307471275329, "step": 15005 }, { "epoch": 0.14127058823529412, "grad_norm": 0.6604571645944285, "learning_rate": 5.3225622462633046e-06, "loss": 0.033543598651885984, "step": 15010 }, { "epoch": 0.14131764705882352, "grad_norm": 0.455844212246341, "learning_rate": 5.321675905938718e-06, "loss": 0.025480270385742188, "step": 15015 }, { "epoch": 0.14136470588235295, "grad_norm": 0.9132454354612204, "learning_rate": 5.3207900082605026e-06, "loss": 0.03274402618408203, "step": 15020 }, { "epoch": 0.14141176470588235, "grad_norm": 0.8506109636092666, "learning_rate": 5.319904552860343e-06, "loss": 0.028078728914260866, "step": 15025 }, { "epoch": 0.14145882352941178, "grad_norm": 0.5304337232003223, "learning_rate": 5.319019539370353e-06, "loss": 0.035905280709266664, "step": 15030 }, { "epoch": 0.14150588235294118, "grad_norm": 0.7562167007416243, "learning_rate": 5.31813496742308e-06, "loss": 0.034207704663276675, "step": 15035 }, { "epoch": 0.14155294117647058, "grad_norm": 0.7467340125184343, "learning_rate": 5.317250836651493e-06, "loss": 0.033900278806686404, "step": 15040 }, { "epoch": 0.1416, "grad_norm": 0.6191906663623109, "learning_rate": 5.316367146688991e-06, "loss": 0.02659323811531067, "step": 15045 }, { "epoch": 0.1416470588235294, "grad_norm": 0.7124452817495138, "learning_rate": 5.315483897169399e-06, "loss": 0.038339799642562865, "step": 15050 }, { "epoch": 0.14169411764705883, "grad_norm": 0.7508851737324812, "learning_rate": 5.314601087726967e-06, "loss": 0.03685992956161499, "step": 15055 }, { "epoch": 0.14174117647058823, "grad_norm": 0.7086834549322611, "learning_rate": 5.313718717996371e-06, "loss": 0.0347710132598877, "step": 15060 }, { "epoch": 0.14178823529411766, "grad_norm": 0.6335978594155969, "learning_rate": 5.3128367876127105e-06, "loss": 0.02668941915035248, "step": 15065 }, { "epoch": 0.14183529411764706, "grad_norm": 0.5052392971829341, "learning_rate": 5.311955296211508e-06, "loss": 0.02900709807872772, "step": 15070 }, { "epoch": 0.14188235294117646, "grad_norm": 0.5844472018325455, "learning_rate": 5.311074243428714e-06, "loss": 0.026715508103370665, "step": 15075 }, { "epoch": 0.1419294117647059, "grad_norm": 0.7686161194169399, "learning_rate": 5.3101936289006965e-06, "loss": 0.03288988173007965, "step": 15080 }, { "epoch": 0.1419764705882353, "grad_norm": 0.6500334578731524, "learning_rate": 5.309313452264244e-06, "loss": 0.03073737621307373, "step": 15085 }, { "epoch": 0.14202352941176472, "grad_norm": 0.6312608948849237, "learning_rate": 5.308433713156572e-06, "loss": 0.03488674759864807, "step": 15090 }, { "epoch": 0.14207058823529412, "grad_norm": 0.7014135090903418, "learning_rate": 5.307554411215315e-06, "loss": 0.040708410739898684, "step": 15095 }, { "epoch": 0.14211764705882354, "grad_norm": 0.6771488877668116, "learning_rate": 5.306675546078522e-06, "loss": 0.03231067061424255, "step": 15100 }, { "epoch": 0.14216470588235294, "grad_norm": 0.6499207635001522, "learning_rate": 5.305797117384669e-06, "loss": 0.02791833281517029, "step": 15105 }, { "epoch": 0.14221176470588234, "grad_norm": 0.5403046700424391, "learning_rate": 5.3049191247726475e-06, "loss": 0.028313860297203064, "step": 15110 }, { "epoch": 0.14225882352941177, "grad_norm": 0.7396725199140863, "learning_rate": 5.304041567881765e-06, "loss": 0.029570001363754272, "step": 15115 }, { "epoch": 0.14230588235294117, "grad_norm": 0.5876225326280404, "learning_rate": 5.303164446351752e-06, "loss": 0.030955755710601808, "step": 15120 }, { "epoch": 0.1423529411764706, "grad_norm": 0.7945201930713213, "learning_rate": 5.3022877598227496e-06, "loss": 0.035432368516922, "step": 15125 }, { "epoch": 0.1424, "grad_norm": 0.5779918617624925, "learning_rate": 5.30141150793532e-06, "loss": 0.03195926249027252, "step": 15130 }, { "epoch": 0.1424470588235294, "grad_norm": 0.9868622064908601, "learning_rate": 5.300535690330437e-06, "loss": 0.03284239172935486, "step": 15135 }, { "epoch": 0.14249411764705883, "grad_norm": 0.7163501759586551, "learning_rate": 5.299660306649496e-06, "loss": 0.03591565489768982, "step": 15140 }, { "epoch": 0.14254117647058823, "grad_norm": 0.6590296413373119, "learning_rate": 5.298785356534297e-06, "loss": 0.028242149949073793, "step": 15145 }, { "epoch": 0.14258823529411765, "grad_norm": 0.7337810384349798, "learning_rate": 5.2979108396270625e-06, "loss": 0.03258380889892578, "step": 15150 }, { "epoch": 0.14263529411764705, "grad_norm": 0.6895806998340357, "learning_rate": 5.297036755570426e-06, "loss": 0.033931964635849, "step": 15155 }, { "epoch": 0.14268235294117648, "grad_norm": 0.7285270432589337, "learning_rate": 5.296163104007431e-06, "loss": 0.031964045763015744, "step": 15160 }, { "epoch": 0.14272941176470588, "grad_norm": 0.6321566385760223, "learning_rate": 5.295289884581534e-06, "loss": 0.030842313170433046, "step": 15165 }, { "epoch": 0.14277647058823528, "grad_norm": 0.9635080728602482, "learning_rate": 5.294417096936606e-06, "loss": 0.032489392161369327, "step": 15170 }, { "epoch": 0.1428235294117647, "grad_norm": 1.1849855236917535, "learning_rate": 5.293544740716923e-06, "loss": 0.032753747701644895, "step": 15175 }, { "epoch": 0.1428705882352941, "grad_norm": 0.7164859897614811, "learning_rate": 5.292672815567177e-06, "loss": 0.028213053941726685, "step": 15180 }, { "epoch": 0.14291764705882354, "grad_norm": 0.7173756393962488, "learning_rate": 5.291801321132466e-06, "loss": 0.02627492547035217, "step": 15185 }, { "epoch": 0.14296470588235294, "grad_norm": 0.7669801630556864, "learning_rate": 5.290930257058297e-06, "loss": 0.03157526552677155, "step": 15190 }, { "epoch": 0.14301176470588237, "grad_norm": 0.6627226621180268, "learning_rate": 5.290059622990588e-06, "loss": 0.0376299798488617, "step": 15195 }, { "epoch": 0.14305882352941177, "grad_norm": 0.674142905515126, "learning_rate": 5.289189418575661e-06, "loss": 0.028884246945381165, "step": 15200 }, { "epoch": 0.14310588235294117, "grad_norm": 0.5521236476936702, "learning_rate": 5.288319643460248e-06, "loss": 0.025436243414878844, "step": 15205 }, { "epoch": 0.1431529411764706, "grad_norm": 0.7853456772610021, "learning_rate": 5.287450297291486e-06, "loss": 0.0317612886428833, "step": 15210 }, { "epoch": 0.1432, "grad_norm": 0.6327371606748184, "learning_rate": 5.286581379716921e-06, "loss": 0.02627232074737549, "step": 15215 }, { "epoch": 0.14324705882352942, "grad_norm": 0.8953913296364018, "learning_rate": 5.285712890384499e-06, "loss": 0.029092222452163696, "step": 15220 }, { "epoch": 0.14329411764705882, "grad_norm": 0.9532755524089437, "learning_rate": 5.284844828942573e-06, "loss": 0.03702435493469238, "step": 15225 }, { "epoch": 0.14334117647058822, "grad_norm": 0.7567407757603841, "learning_rate": 5.283977195039903e-06, "loss": 0.04042520225048065, "step": 15230 }, { "epoch": 0.14338823529411765, "grad_norm": 0.7450776568377613, "learning_rate": 5.283109988325649e-06, "loss": 0.03435211181640625, "step": 15235 }, { "epoch": 0.14343529411764705, "grad_norm": 0.5192576854623054, "learning_rate": 5.282243208449374e-06, "loss": 0.029478248953819276, "step": 15240 }, { "epoch": 0.14348235294117648, "grad_norm": 0.6388922270413847, "learning_rate": 5.281376855061045e-06, "loss": 0.028812313079833986, "step": 15245 }, { "epoch": 0.14352941176470588, "grad_norm": 0.8471983690079244, "learning_rate": 5.280510927811032e-06, "loss": 0.03707075119018555, "step": 15250 }, { "epoch": 0.1435764705882353, "grad_norm": 0.5881846456516938, "learning_rate": 5.279645426350102e-06, "loss": 0.027546292543411253, "step": 15255 }, { "epoch": 0.1436235294117647, "grad_norm": 0.6672783110843662, "learning_rate": 5.278780350329426e-06, "loss": 0.0434820294380188, "step": 15260 }, { "epoch": 0.1436705882352941, "grad_norm": 0.7118836906256012, "learning_rate": 5.277915699400573e-06, "loss": 0.035973259806633, "step": 15265 }, { "epoch": 0.14371764705882353, "grad_norm": 0.8143294070550547, "learning_rate": 5.2770514732155134e-06, "loss": 0.026472240686416626, "step": 15270 }, { "epoch": 0.14376470588235293, "grad_norm": 0.5650215529281771, "learning_rate": 5.276187671426613e-06, "loss": 0.04724902510643005, "step": 15275 }, { "epoch": 0.14381176470588236, "grad_norm": 0.7540731558526417, "learning_rate": 5.27532429368664e-06, "loss": 0.03036237359046936, "step": 15280 }, { "epoch": 0.14385882352941176, "grad_norm": 0.6143972900900274, "learning_rate": 5.274461339648757e-06, "loss": 0.03533928394317627, "step": 15285 }, { "epoch": 0.1439058823529412, "grad_norm": 0.7864136150093988, "learning_rate": 5.273598808966526e-06, "loss": 0.0444486141204834, "step": 15290 }, { "epoch": 0.1439529411764706, "grad_norm": 0.5177303343262543, "learning_rate": 5.272736701293904e-06, "loss": 0.02654026448726654, "step": 15295 }, { "epoch": 0.144, "grad_norm": 0.5532201973612922, "learning_rate": 5.27187501628524e-06, "loss": 0.0387414813041687, "step": 15300 }, { "epoch": 0.14404705882352942, "grad_norm": 0.9088189661304603, "learning_rate": 5.271013753595289e-06, "loss": 0.02589961588382721, "step": 15305 }, { "epoch": 0.14409411764705882, "grad_norm": 0.6185720782606658, "learning_rate": 5.270152912879191e-06, "loss": 0.026505106687545778, "step": 15310 }, { "epoch": 0.14414117647058824, "grad_norm": 0.5803397625777867, "learning_rate": 5.269292493792482e-06, "loss": 0.030601608753204345, "step": 15315 }, { "epoch": 0.14418823529411764, "grad_norm": 0.6512183978182573, "learning_rate": 5.268432495991094e-06, "loss": 0.03540622293949127, "step": 15320 }, { "epoch": 0.14423529411764707, "grad_norm": 0.5817260476192497, "learning_rate": 5.267572919131349e-06, "loss": 0.025266095995903015, "step": 15325 }, { "epoch": 0.14428235294117647, "grad_norm": 0.5439377038758966, "learning_rate": 5.266713762869966e-06, "loss": 0.03375990688800812, "step": 15330 }, { "epoch": 0.14432941176470587, "grad_norm": 0.6846514034474623, "learning_rate": 5.2658550268640515e-06, "loss": 0.027796679735183717, "step": 15335 }, { "epoch": 0.1443764705882353, "grad_norm": 0.9942141922278409, "learning_rate": 5.264996710771104e-06, "loss": 0.03006909489631653, "step": 15340 }, { "epoch": 0.1444235294117647, "grad_norm": 0.7084855558324377, "learning_rate": 5.264138814249014e-06, "loss": 0.031756526231765746, "step": 15345 }, { "epoch": 0.14447058823529413, "grad_norm": 0.5407861352283406, "learning_rate": 5.26328133695606e-06, "loss": 0.029890072345733643, "step": 15350 }, { "epoch": 0.14451764705882353, "grad_norm": 1.1400032715767006, "learning_rate": 5.262424278550913e-06, "loss": 0.03542294502258301, "step": 15355 }, { "epoch": 0.14456470588235293, "grad_norm": 0.5751312096753578, "learning_rate": 5.261567638692631e-06, "loss": 0.03028545677661896, "step": 15360 }, { "epoch": 0.14461176470588236, "grad_norm": 0.8105662410938305, "learning_rate": 5.260711417040659e-06, "loss": 0.033184167742729184, "step": 15365 }, { "epoch": 0.14465882352941176, "grad_norm": 0.5743868080537081, "learning_rate": 5.2598556132548325e-06, "loss": 0.03291049599647522, "step": 15370 }, { "epoch": 0.14470588235294118, "grad_norm": 0.6375061926804049, "learning_rate": 5.259000226995375e-06, "loss": 0.03657311797142029, "step": 15375 }, { "epoch": 0.14475294117647058, "grad_norm": 0.5624796413735965, "learning_rate": 5.258145257922893e-06, "loss": 0.02963564991950989, "step": 15380 }, { "epoch": 0.1448, "grad_norm": 0.8034439545756147, "learning_rate": 5.2572907056983805e-06, "loss": 0.038947826623916625, "step": 15385 }, { "epoch": 0.1448470588235294, "grad_norm": 0.7968246694780203, "learning_rate": 5.256436569983219e-06, "loss": 0.02866775691509247, "step": 15390 }, { "epoch": 0.1448941176470588, "grad_norm": 0.6433611554470232, "learning_rate": 5.255582850439172e-06, "loss": 0.033385634422302246, "step": 15395 }, { "epoch": 0.14494117647058824, "grad_norm": 0.7324281866197057, "learning_rate": 5.25472954672839e-06, "loss": 0.02765069603919983, "step": 15400 }, { "epoch": 0.14498823529411764, "grad_norm": 0.8039142188848197, "learning_rate": 5.253876658513405e-06, "loss": 0.03626055419445038, "step": 15405 }, { "epoch": 0.14503529411764707, "grad_norm": 0.5904192132385505, "learning_rate": 5.253024185457136e-06, "loss": 0.030198556184768677, "step": 15410 }, { "epoch": 0.14508235294117647, "grad_norm": 0.8708553067171709, "learning_rate": 5.2521721272228794e-06, "loss": 0.031144288182258607, "step": 15415 }, { "epoch": 0.1451294117647059, "grad_norm": 0.624425717178811, "learning_rate": 5.251320483474319e-06, "loss": 0.028777897357940674, "step": 15420 }, { "epoch": 0.1451764705882353, "grad_norm": 0.4213255444931418, "learning_rate": 5.250469253875516e-06, "loss": 0.029822558164596558, "step": 15425 }, { "epoch": 0.1452235294117647, "grad_norm": 0.7053036584274003, "learning_rate": 5.249618438090916e-06, "loss": 0.031856173276901247, "step": 15430 }, { "epoch": 0.14527058823529412, "grad_norm": 1.0084809097950713, "learning_rate": 5.248768035785343e-06, "loss": 0.03623401522636414, "step": 15435 }, { "epoch": 0.14531764705882352, "grad_norm": 0.6621088231098321, "learning_rate": 5.247918046624002e-06, "loss": 0.030112087726593018, "step": 15440 }, { "epoch": 0.14536470588235295, "grad_norm": 0.7408439671737098, "learning_rate": 5.247068470272475e-06, "loss": 0.027580484747886658, "step": 15445 }, { "epoch": 0.14541176470588235, "grad_norm": 0.8491268964050177, "learning_rate": 5.246219306396728e-06, "loss": 0.03359156548976898, "step": 15450 }, { "epoch": 0.14545882352941175, "grad_norm": 0.7414096217401939, "learning_rate": 5.245370554663099e-06, "loss": 0.030267733335494994, "step": 15455 }, { "epoch": 0.14550588235294118, "grad_norm": 0.6018953144366702, "learning_rate": 5.24452221473831e-06, "loss": 0.029243999719619752, "step": 15460 }, { "epoch": 0.14555294117647058, "grad_norm": 0.5935283917623387, "learning_rate": 5.243674286289455e-06, "loss": 0.024292196333408355, "step": 15465 }, { "epoch": 0.1456, "grad_norm": 0.664575364497414, "learning_rate": 5.242826768984007e-06, "loss": 0.03696857988834381, "step": 15470 }, { "epoch": 0.1456470588235294, "grad_norm": 0.7381387702306544, "learning_rate": 5.241979662489815e-06, "loss": 0.028466325998306275, "step": 15475 }, { "epoch": 0.14569411764705884, "grad_norm": 0.5465948556553621, "learning_rate": 5.241132966475103e-06, "loss": 0.025064116716384886, "step": 15480 }, { "epoch": 0.14574117647058824, "grad_norm": 0.8066106007876592, "learning_rate": 5.240286680608471e-06, "loss": 0.02755550146102905, "step": 15485 }, { "epoch": 0.14578823529411764, "grad_norm": 0.8063846266714073, "learning_rate": 5.239440804558892e-06, "loss": 0.030774539709091185, "step": 15490 }, { "epoch": 0.14583529411764706, "grad_norm": 0.7113510337714214, "learning_rate": 5.238595337995715e-06, "loss": 0.02982746958732605, "step": 15495 }, { "epoch": 0.14588235294117646, "grad_norm": 0.6028465542503959, "learning_rate": 5.237750280588659e-06, "loss": 0.02622116208076477, "step": 15500 }, { "epoch": 0.1459294117647059, "grad_norm": 0.5878320707222208, "learning_rate": 5.23690563200782e-06, "loss": 0.02998981773853302, "step": 15505 }, { "epoch": 0.1459764705882353, "grad_norm": 0.8528361294135295, "learning_rate": 5.236061391923662e-06, "loss": 0.027223774790763856, "step": 15510 }, { "epoch": 0.14602352941176472, "grad_norm": 0.7021388302018323, "learning_rate": 5.235217560007026e-06, "loss": 0.0374181866645813, "step": 15515 }, { "epoch": 0.14607058823529412, "grad_norm": 0.5419872523907879, "learning_rate": 5.2343741359291165e-06, "loss": 0.02992631196975708, "step": 15520 }, { "epoch": 0.14611764705882352, "grad_norm": 0.8353357195605033, "learning_rate": 5.233531119361519e-06, "loss": 0.026446688175201415, "step": 15525 }, { "epoch": 0.14616470588235295, "grad_norm": 0.9130656860005651, "learning_rate": 5.2326885099761805e-06, "loss": 0.024298474192619324, "step": 15530 }, { "epoch": 0.14621176470588235, "grad_norm": 0.6542279282783698, "learning_rate": 5.23184630744542e-06, "loss": 0.026308566331863403, "step": 15535 }, { "epoch": 0.14625882352941177, "grad_norm": 0.5972162033287209, "learning_rate": 5.231004511441926e-06, "loss": 0.03455531597137451, "step": 15540 }, { "epoch": 0.14630588235294117, "grad_norm": 0.6969350984234687, "learning_rate": 5.230163121638758e-06, "loss": 0.024569690227508545, "step": 15545 }, { "epoch": 0.14635294117647057, "grad_norm": 0.7770108235051243, "learning_rate": 5.2293221377093395e-06, "loss": 0.03135707974433899, "step": 15550 }, { "epoch": 0.1464, "grad_norm": 0.8330142636652139, "learning_rate": 5.228481559327463e-06, "loss": 0.0350987434387207, "step": 15555 }, { "epoch": 0.1464470588235294, "grad_norm": 0.8151818571255923, "learning_rate": 5.227641386167288e-06, "loss": 0.0321222186088562, "step": 15560 }, { "epoch": 0.14649411764705883, "grad_norm": 0.6332066074484884, "learning_rate": 5.226801617903342e-06, "loss": 0.036185812950134275, "step": 15565 }, { "epoch": 0.14654117647058823, "grad_norm": 0.5379431777174936, "learning_rate": 5.225962254210515e-06, "loss": 0.02715873420238495, "step": 15570 }, { "epoch": 0.14658823529411766, "grad_norm": 0.6792787618029082, "learning_rate": 5.225123294764064e-06, "loss": 0.03523942828178406, "step": 15575 }, { "epoch": 0.14663529411764706, "grad_norm": 0.4962359646080022, "learning_rate": 5.224284739239612e-06, "loss": 0.03285256028175354, "step": 15580 }, { "epoch": 0.14668235294117646, "grad_norm": 0.764496662829767, "learning_rate": 5.223446587313146e-06, "loss": 0.030168548226356506, "step": 15585 }, { "epoch": 0.14672941176470589, "grad_norm": 0.8960374889939772, "learning_rate": 5.222608838661015e-06, "loss": 0.034833645820617674, "step": 15590 }, { "epoch": 0.14677647058823529, "grad_norm": 0.5530820609026275, "learning_rate": 5.221771492959931e-06, "loss": 0.040103203058242796, "step": 15595 }, { "epoch": 0.1468235294117647, "grad_norm": 0.5368072922654767, "learning_rate": 5.220934549886972e-06, "loss": 0.027826735377311708, "step": 15600 }, { "epoch": 0.14687058823529411, "grad_norm": 0.6810980604318063, "learning_rate": 5.220098009119575e-06, "loss": 0.02812379002571106, "step": 15605 }, { "epoch": 0.14691764705882354, "grad_norm": 0.6600906146705275, "learning_rate": 5.21926187033554e-06, "loss": 0.03239341080188751, "step": 15610 }, { "epoch": 0.14696470588235294, "grad_norm": 0.7556060894031887, "learning_rate": 5.21842613321303e-06, "loss": 0.04724055528640747, "step": 15615 }, { "epoch": 0.14701176470588234, "grad_norm": 0.5298049920471416, "learning_rate": 5.217590797430564e-06, "loss": 0.029161292314529418, "step": 15620 }, { "epoch": 0.14705882352941177, "grad_norm": 0.8091230003209564, "learning_rate": 5.216755862667025e-06, "loss": 0.034540361166000365, "step": 15625 }, { "epoch": 0.14710588235294117, "grad_norm": 0.65615362900381, "learning_rate": 5.215921328601653e-06, "loss": 0.02795901894569397, "step": 15630 }, { "epoch": 0.1471529411764706, "grad_norm": 0.7442785282469085, "learning_rate": 5.2150871949140495e-06, "loss": 0.038495379686355594, "step": 15635 }, { "epoch": 0.1472, "grad_norm": 0.6008478721657527, "learning_rate": 5.2142534612841735e-06, "loss": 0.030672627687454223, "step": 15640 }, { "epoch": 0.14724705882352943, "grad_norm": 0.5664337569954416, "learning_rate": 5.213420127392341e-06, "loss": 0.026431626081466673, "step": 15645 }, { "epoch": 0.14729411764705883, "grad_norm": 0.7011675694868552, "learning_rate": 5.212587192919227e-06, "loss": 0.026186016201972962, "step": 15650 }, { "epoch": 0.14734117647058823, "grad_norm": 0.5953629442556746, "learning_rate": 5.211754657545864e-06, "loss": 0.02359088510274887, "step": 15655 }, { "epoch": 0.14738823529411765, "grad_norm": 1.176313241518055, "learning_rate": 5.2109225209536396e-06, "loss": 0.027772140502929688, "step": 15660 }, { "epoch": 0.14743529411764705, "grad_norm": 0.8411250198826973, "learning_rate": 5.210090782824296e-06, "loss": 0.03369602560997009, "step": 15665 }, { "epoch": 0.14748235294117648, "grad_norm": 0.6463731199082138, "learning_rate": 5.209259442839936e-06, "loss": 0.03032677173614502, "step": 15670 }, { "epoch": 0.14752941176470588, "grad_norm": 0.5479393667356584, "learning_rate": 5.2084285006830105e-06, "loss": 0.033243227005004886, "step": 15675 }, { "epoch": 0.14757647058823528, "grad_norm": 0.6046249873002658, "learning_rate": 5.20759795603633e-06, "loss": 0.03708118498325348, "step": 15680 }, { "epoch": 0.1476235294117647, "grad_norm": 0.8254709075150806, "learning_rate": 5.206767808583058e-06, "loss": 0.03835427165031433, "step": 15685 }, { "epoch": 0.1476705882352941, "grad_norm": 0.7321352253584296, "learning_rate": 5.2059380580067095e-06, "loss": 0.03562070727348328, "step": 15690 }, { "epoch": 0.14771764705882354, "grad_norm": 0.6278037830543258, "learning_rate": 5.205108703991154e-06, "loss": 0.030987432599067687, "step": 15695 }, { "epoch": 0.14776470588235294, "grad_norm": 0.7570896118394167, "learning_rate": 5.204279746220614e-06, "loss": 0.02996712625026703, "step": 15700 }, { "epoch": 0.14781176470588236, "grad_norm": 0.7325217800398929, "learning_rate": 5.203451184379662e-06, "loss": 0.027895301580429077, "step": 15705 }, { "epoch": 0.14785882352941176, "grad_norm": 0.7443174761153435, "learning_rate": 5.202623018153224e-06, "loss": 0.03672636747360229, "step": 15710 }, { "epoch": 0.14790588235294116, "grad_norm": 0.6722018640451838, "learning_rate": 5.2017952472265755e-06, "loss": 0.032624661922454834, "step": 15715 }, { "epoch": 0.1479529411764706, "grad_norm": 0.7127028798888096, "learning_rate": 5.2009678712853415e-06, "loss": 0.02687527537345886, "step": 15720 }, { "epoch": 0.148, "grad_norm": 0.5479535513094302, "learning_rate": 5.2001408900155e-06, "loss": 0.025613772869110107, "step": 15725 }, { "epoch": 0.14804705882352942, "grad_norm": 0.6578565907115034, "learning_rate": 5.1993143031033755e-06, "loss": 0.03381556272506714, "step": 15730 }, { "epoch": 0.14809411764705882, "grad_norm": 0.8037153162071189, "learning_rate": 5.1984881102356434e-06, "loss": 0.029262706637382507, "step": 15735 }, { "epoch": 0.14814117647058825, "grad_norm": 0.7061419893675168, "learning_rate": 5.197662311099325e-06, "loss": 0.03331086337566376, "step": 15740 }, { "epoch": 0.14818823529411765, "grad_norm": 0.645887941314284, "learning_rate": 5.196836905381794e-06, "loss": 0.03759458661079407, "step": 15745 }, { "epoch": 0.14823529411764705, "grad_norm": 0.4362537538859834, "learning_rate": 5.196011892770765e-06, "loss": 0.03254222273826599, "step": 15750 }, { "epoch": 0.14828235294117648, "grad_norm": 0.5876401030474732, "learning_rate": 5.1951872729543065e-06, "loss": 0.029465746879577637, "step": 15755 }, { "epoch": 0.14832941176470588, "grad_norm": 0.720056174428489, "learning_rate": 5.194363045620827e-06, "loss": 0.027592891454696657, "step": 15760 }, { "epoch": 0.1483764705882353, "grad_norm": 0.592111687210348, "learning_rate": 5.193539210459088e-06, "loss": 0.027770039439201356, "step": 15765 }, { "epoch": 0.1484235294117647, "grad_norm": 0.7242804479472744, "learning_rate": 5.192715767158189e-06, "loss": 0.028555914759635925, "step": 15770 }, { "epoch": 0.1484705882352941, "grad_norm": 0.7762308256724417, "learning_rate": 5.191892715407578e-06, "loss": 0.028796255588531494, "step": 15775 }, { "epoch": 0.14851764705882353, "grad_norm": 0.7241743118784473, "learning_rate": 5.1910700548970496e-06, "loss": 0.030862975120544433, "step": 15780 }, { "epoch": 0.14856470588235293, "grad_norm": 0.5872576645956487, "learning_rate": 5.1902477853167385e-06, "loss": 0.026168900728225707, "step": 15785 }, { "epoch": 0.14861176470588236, "grad_norm": 0.6485439551511806, "learning_rate": 5.189425906357124e-06, "loss": 0.030520015954971315, "step": 15790 }, { "epoch": 0.14865882352941176, "grad_norm": 0.5714199005585839, "learning_rate": 5.188604417709031e-06, "loss": 0.027091792225837706, "step": 15795 }, { "epoch": 0.1487058823529412, "grad_norm": 0.736980291202519, "learning_rate": 5.187783319063624e-06, "loss": 0.029937499761581422, "step": 15800 }, { "epoch": 0.1487529411764706, "grad_norm": 0.7360347210948097, "learning_rate": 5.186962610112408e-06, "loss": 0.02993021011352539, "step": 15805 }, { "epoch": 0.1488, "grad_norm": 0.5856368016679238, "learning_rate": 5.186142290547235e-06, "loss": 0.03197084069252014, "step": 15810 }, { "epoch": 0.14884705882352942, "grad_norm": 0.6647192080970409, "learning_rate": 5.1853223600602934e-06, "loss": 0.03167186975479126, "step": 15815 }, { "epoch": 0.14889411764705882, "grad_norm": 0.7001836670269751, "learning_rate": 5.184502818344113e-06, "loss": 0.026633316278457643, "step": 15820 }, { "epoch": 0.14894117647058824, "grad_norm": 0.7450465759996154, "learning_rate": 5.183683665091565e-06, "loss": 0.027897077798843383, "step": 15825 }, { "epoch": 0.14898823529411764, "grad_norm": 0.7528645296853531, "learning_rate": 5.182864899995859e-06, "loss": 0.030964970588684082, "step": 15830 }, { "epoch": 0.14903529411764707, "grad_norm": 0.5346507503523081, "learning_rate": 5.182046522750544e-06, "loss": 0.034866327047348024, "step": 15835 }, { "epoch": 0.14908235294117647, "grad_norm": 1.011410311172516, "learning_rate": 5.181228533049507e-06, "loss": 0.02942442297935486, "step": 15840 }, { "epoch": 0.14912941176470587, "grad_norm": 0.9046363943717487, "learning_rate": 5.180410930586975e-06, "loss": 0.032815021276473996, "step": 15845 }, { "epoch": 0.1491764705882353, "grad_norm": 0.4339752280831148, "learning_rate": 5.179593715057512e-06, "loss": 0.02513173222541809, "step": 15850 }, { "epoch": 0.1492235294117647, "grad_norm": 0.6578474163490774, "learning_rate": 5.17877688615602e-06, "loss": 0.03450251519680023, "step": 15855 }, { "epoch": 0.14927058823529413, "grad_norm": 0.7876310316326156, "learning_rate": 5.177960443577731e-06, "loss": 0.033179575204849245, "step": 15860 }, { "epoch": 0.14931764705882353, "grad_norm": 0.7825728831549992, "learning_rate": 5.177144387018224e-06, "loss": 0.03140793442726135, "step": 15865 }, { "epoch": 0.14936470588235295, "grad_norm": 0.9928325083221107, "learning_rate": 5.176328716173404e-06, "loss": 0.036137694120407106, "step": 15870 }, { "epoch": 0.14941176470588236, "grad_norm": 0.43730974540899464, "learning_rate": 5.17551343073952e-06, "loss": 0.028142786026000975, "step": 15875 }, { "epoch": 0.14945882352941176, "grad_norm": 0.8714270603571512, "learning_rate": 5.174698530413148e-06, "loss": 0.03010314702987671, "step": 15880 }, { "epoch": 0.14950588235294118, "grad_norm": 0.8478602085541856, "learning_rate": 5.173884014891203e-06, "loss": 0.029241722822189332, "step": 15885 }, { "epoch": 0.14955294117647058, "grad_norm": 0.8091404314735086, "learning_rate": 5.173069883870933e-06, "loss": 0.03135644793510437, "step": 15890 }, { "epoch": 0.1496, "grad_norm": 0.7786133086522266, "learning_rate": 5.172256137049918e-06, "loss": 0.035453307628631595, "step": 15895 }, { "epoch": 0.1496470588235294, "grad_norm": 0.6781170982506904, "learning_rate": 5.171442774126072e-06, "loss": 0.030562537908554076, "step": 15900 }, { "epoch": 0.1496941176470588, "grad_norm": 0.6954739802516612, "learning_rate": 5.170629794797641e-06, "loss": 0.02937036156654358, "step": 15905 }, { "epoch": 0.14974117647058824, "grad_norm": 0.6558490278023079, "learning_rate": 5.1698171987632054e-06, "loss": 0.028124165534973145, "step": 15910 }, { "epoch": 0.14978823529411764, "grad_norm": 0.7160911718505507, "learning_rate": 5.169004985721671e-06, "loss": 0.029833078384399414, "step": 15915 }, { "epoch": 0.14983529411764707, "grad_norm": 0.5976576953569868, "learning_rate": 5.168193155372281e-06, "loss": 0.031204771995544434, "step": 15920 }, { "epoch": 0.14988235294117647, "grad_norm": 0.4864615138343035, "learning_rate": 5.167381707414606e-06, "loss": 0.027066779136657716, "step": 15925 }, { "epoch": 0.1499294117647059, "grad_norm": 0.5405641966239593, "learning_rate": 5.166570641548548e-06, "loss": 0.023769447207450868, "step": 15930 }, { "epoch": 0.1499764705882353, "grad_norm": 0.8285372475702869, "learning_rate": 5.165759957474337e-06, "loss": 0.027665752172470092, "step": 15935 }, { "epoch": 0.1500235294117647, "grad_norm": 0.6396904410130061, "learning_rate": 5.164949654892534e-06, "loss": 0.02477274090051651, "step": 15940 }, { "epoch": 0.15007058823529412, "grad_norm": 0.7685814337899869, "learning_rate": 5.164139733504028e-06, "loss": 0.030570459365844727, "step": 15945 }, { "epoch": 0.15011764705882352, "grad_norm": 0.8707457795709539, "learning_rate": 5.163330193010035e-06, "loss": 0.03709557056427002, "step": 15950 }, { "epoch": 0.15016470588235295, "grad_norm": 0.7924926479309871, "learning_rate": 5.1625210331120985e-06, "loss": 0.03426143825054169, "step": 15955 }, { "epoch": 0.15021176470588235, "grad_norm": 0.7008160989892794, "learning_rate": 5.1617122535120945e-06, "loss": 0.03491925597190857, "step": 15960 }, { "epoch": 0.15025882352941178, "grad_norm": 0.774706026074872, "learning_rate": 5.160903853912218e-06, "loss": 0.02828519344329834, "step": 15965 }, { "epoch": 0.15030588235294118, "grad_norm": 0.6497868557721018, "learning_rate": 5.160095834014997e-06, "loss": 0.032787656784057616, "step": 15970 }, { "epoch": 0.15035294117647058, "grad_norm": 0.5767928166903635, "learning_rate": 5.159288193523281e-06, "loss": 0.03060457706451416, "step": 15975 }, { "epoch": 0.1504, "grad_norm": 0.6043505739461693, "learning_rate": 5.158480932140249e-06, "loss": 0.030171185731887817, "step": 15980 }, { "epoch": 0.1504470588235294, "grad_norm": 0.47580657482294225, "learning_rate": 5.1576740495694e-06, "loss": 0.026017701625823973, "step": 15985 }, { "epoch": 0.15049411764705883, "grad_norm": 0.611624256924496, "learning_rate": 5.15686754551456e-06, "loss": 0.02560448944568634, "step": 15990 }, { "epoch": 0.15054117647058823, "grad_norm": 0.834450788436514, "learning_rate": 5.1560614196798816e-06, "loss": 0.028528112173080444, "step": 15995 }, { "epoch": 0.15058823529411763, "grad_norm": 0.7099147906323398, "learning_rate": 5.155255671769839e-06, "loss": 0.028118523955345153, "step": 16000 }, { "epoch": 0.15063529411764706, "grad_norm": 0.6057339206564466, "learning_rate": 5.154450301489226e-06, "loss": 0.030386367440223695, "step": 16005 }, { "epoch": 0.15068235294117646, "grad_norm": 0.7443159339682073, "learning_rate": 5.153645308543165e-06, "loss": 0.033953338861465454, "step": 16010 }, { "epoch": 0.1507294117647059, "grad_norm": 0.6621651154514234, "learning_rate": 5.152840692637097e-06, "loss": 0.02808401584625244, "step": 16015 }, { "epoch": 0.1507764705882353, "grad_norm": 1.2755977028987275, "learning_rate": 5.152036453476789e-06, "loss": 0.029789641499519348, "step": 16020 }, { "epoch": 0.15082352941176472, "grad_norm": 1.1909686351548732, "learning_rate": 5.151232590768321e-06, "loss": 0.02939611077308655, "step": 16025 }, { "epoch": 0.15087058823529412, "grad_norm": 0.6799891777053488, "learning_rate": 5.150429104218103e-06, "loss": 0.029062873125076293, "step": 16030 }, { "epoch": 0.15091764705882352, "grad_norm": 0.6251805249957965, "learning_rate": 5.14962599353286e-06, "loss": 0.028216254711151124, "step": 16035 }, { "epoch": 0.15096470588235295, "grad_norm": 0.8013446262214114, "learning_rate": 5.1488232584196405e-06, "loss": 0.02575908601284027, "step": 16040 }, { "epoch": 0.15101176470588235, "grad_norm": 0.6947696705787287, "learning_rate": 5.1480208985858096e-06, "loss": 0.031932389736175536, "step": 16045 }, { "epoch": 0.15105882352941177, "grad_norm": 0.7808040883248907, "learning_rate": 5.147218913739051e-06, "loss": 0.03161938190460205, "step": 16050 }, { "epoch": 0.15110588235294117, "grad_norm": 0.6885528720134683, "learning_rate": 5.146417303587372e-06, "loss": 0.032173562049865725, "step": 16055 }, { "epoch": 0.1511529411764706, "grad_norm": 0.6467320843707101, "learning_rate": 5.145616067839092e-06, "loss": 0.03023894429206848, "step": 16060 }, { "epoch": 0.1512, "grad_norm": 0.8026864194738735, "learning_rate": 5.144815206202852e-06, "loss": 0.031235671043395995, "step": 16065 }, { "epoch": 0.1512470588235294, "grad_norm": 0.6870369481459715, "learning_rate": 5.144014718387611e-06, "loss": 0.028577557206153868, "step": 16070 }, { "epoch": 0.15129411764705883, "grad_norm": 0.6066968443966728, "learning_rate": 5.14321460410264e-06, "loss": 0.0308133065700531, "step": 16075 }, { "epoch": 0.15134117647058823, "grad_norm": 0.5830197742245647, "learning_rate": 5.1424148630575325e-06, "loss": 0.024287010729312896, "step": 16080 }, { "epoch": 0.15138823529411766, "grad_norm": 0.7989321773011682, "learning_rate": 5.1416154949621936e-06, "loss": 0.031972634792327884, "step": 16085 }, { "epoch": 0.15143529411764706, "grad_norm": 0.6299027352613801, "learning_rate": 5.140816499526846e-06, "loss": 0.03293861150741577, "step": 16090 }, { "epoch": 0.15148235294117646, "grad_norm": 0.5724167256675244, "learning_rate": 5.140017876462027e-06, "loss": 0.023361703753471373, "step": 16095 }, { "epoch": 0.15152941176470588, "grad_norm": 0.5533367231809283, "learning_rate": 5.139219625478589e-06, "loss": 0.023041833937168122, "step": 16100 }, { "epoch": 0.15157647058823528, "grad_norm": 0.5966706727426945, "learning_rate": 5.138421746287697e-06, "loss": 0.03600850403308868, "step": 16105 }, { "epoch": 0.1516235294117647, "grad_norm": 0.47624407951914344, "learning_rate": 5.137624238600833e-06, "loss": 0.02608453631401062, "step": 16110 }, { "epoch": 0.1516705882352941, "grad_norm": 0.6384041778135909, "learning_rate": 5.136827102129789e-06, "loss": 0.03487610220909119, "step": 16115 }, { "epoch": 0.15171764705882354, "grad_norm": 0.7894132770338159, "learning_rate": 5.136030336586672e-06, "loss": 0.03229211866855621, "step": 16120 }, { "epoch": 0.15176470588235294, "grad_norm": 0.7146109366902147, "learning_rate": 5.135233941683902e-06, "loss": 0.031649327278137206, "step": 16125 }, { "epoch": 0.15181176470588234, "grad_norm": 0.5707969006569935, "learning_rate": 5.134437917134207e-06, "loss": 0.032928472757339476, "step": 16130 }, { "epoch": 0.15185882352941177, "grad_norm": 0.6931497937297056, "learning_rate": 5.133642262650631e-06, "loss": 0.029658329486846925, "step": 16135 }, { "epoch": 0.15190588235294117, "grad_norm": 0.5805386062003686, "learning_rate": 5.132846977946529e-06, "loss": 0.03575715124607086, "step": 16140 }, { "epoch": 0.1519529411764706, "grad_norm": 0.6205732884008597, "learning_rate": 5.132052062735563e-06, "loss": 0.02958545684814453, "step": 16145 }, { "epoch": 0.152, "grad_norm": 0.8683988707755298, "learning_rate": 5.131257516731709e-06, "loss": 0.028202998638153075, "step": 16150 }, { "epoch": 0.15204705882352942, "grad_norm": 0.6282336903439556, "learning_rate": 5.130463339649253e-06, "loss": 0.03190901279449463, "step": 16155 }, { "epoch": 0.15209411764705882, "grad_norm": 0.8651452917150649, "learning_rate": 5.129669531202786e-06, "loss": 0.030388537049293517, "step": 16160 }, { "epoch": 0.15214117647058822, "grad_norm": 0.6751035098759081, "learning_rate": 5.128876091107213e-06, "loss": 0.030872607231140138, "step": 16165 }, { "epoch": 0.15218823529411765, "grad_norm": 0.4937721789926128, "learning_rate": 5.128083019077746e-06, "loss": 0.03322733938694, "step": 16170 }, { "epoch": 0.15223529411764705, "grad_norm": 0.6240536946584382, "learning_rate": 5.127290314829904e-06, "loss": 0.033364272117614745, "step": 16175 }, { "epoch": 0.15228235294117648, "grad_norm": 0.606458267784484, "learning_rate": 5.126497978079515e-06, "loss": 0.02511160969734192, "step": 16180 }, { "epoch": 0.15232941176470588, "grad_norm": 0.7865609720275649, "learning_rate": 5.125706008542713e-06, "loss": 0.030925899744033813, "step": 16185 }, { "epoch": 0.1523764705882353, "grad_norm": 1.0063170981315195, "learning_rate": 5.124914405935942e-06, "loss": 0.03709331750869751, "step": 16190 }, { "epoch": 0.1524235294117647, "grad_norm": 0.5769577026598915, "learning_rate": 5.124123169975948e-06, "loss": 0.03361348509788513, "step": 16195 }, { "epoch": 0.1524705882352941, "grad_norm": 0.6161157156318796, "learning_rate": 5.1233323003797854e-06, "loss": 0.030716198682785033, "step": 16200 }, { "epoch": 0.15251764705882354, "grad_norm": 0.7037342905747639, "learning_rate": 5.122541796864815e-06, "loss": 0.03228173553943634, "step": 16205 }, { "epoch": 0.15256470588235294, "grad_norm": 0.7533532838501248, "learning_rate": 5.121751659148702e-06, "loss": 0.025285172462463378, "step": 16210 }, { "epoch": 0.15261176470588236, "grad_norm": 0.6457798320948639, "learning_rate": 5.120961886949415e-06, "loss": 0.033136993646621704, "step": 16215 }, { "epoch": 0.15265882352941176, "grad_norm": 0.4936978437016117, "learning_rate": 5.120172479985228e-06, "loss": 0.027092087268829345, "step": 16220 }, { "epoch": 0.15270588235294116, "grad_norm": 0.7603606071511353, "learning_rate": 5.11938343797472e-06, "loss": 0.03378167152404785, "step": 16225 }, { "epoch": 0.1527529411764706, "grad_norm": 0.5820891193472949, "learning_rate": 5.118594760636772e-06, "loss": 0.029441970586776733, "step": 16230 }, { "epoch": 0.1528, "grad_norm": 0.5820531264772492, "learning_rate": 5.11780644769057e-06, "loss": 0.03230807483196259, "step": 16235 }, { "epoch": 0.15284705882352942, "grad_norm": 0.6719749238980199, "learning_rate": 5.1170184988555984e-06, "loss": 0.03412299156188965, "step": 16240 }, { "epoch": 0.15289411764705882, "grad_norm": 0.5255439686981824, "learning_rate": 5.1162309138516485e-06, "loss": 0.03496609926223755, "step": 16245 }, { "epoch": 0.15294117647058825, "grad_norm": 0.5801067113619808, "learning_rate": 5.11544369239881e-06, "loss": 0.02669042944908142, "step": 16250 }, { "epoch": 0.15298823529411765, "grad_norm": 0.6369685662608536, "learning_rate": 5.1146568342174785e-06, "loss": 0.029755601286888124, "step": 16255 }, { "epoch": 0.15303529411764705, "grad_norm": 0.45019918261055397, "learning_rate": 5.1138703390283455e-06, "loss": 0.030487334728240965, "step": 16260 }, { "epoch": 0.15308235294117647, "grad_norm": 0.7057820456056456, "learning_rate": 5.113084206552404e-06, "loss": 0.028456443548202516, "step": 16265 }, { "epoch": 0.15312941176470588, "grad_norm": 0.7074825674938352, "learning_rate": 5.112298436510951e-06, "loss": 0.031409209966659545, "step": 16270 }, { "epoch": 0.1531764705882353, "grad_norm": 0.5966725782623558, "learning_rate": 5.111513028625577e-06, "loss": 0.033598718047142026, "step": 16275 }, { "epoch": 0.1532235294117647, "grad_norm": 0.5705757297962369, "learning_rate": 5.11072798261818e-06, "loss": 0.031073790788650513, "step": 16280 }, { "epoch": 0.15327058823529413, "grad_norm": 0.6509071797589681, "learning_rate": 5.109943298210949e-06, "loss": 0.0356388509273529, "step": 16285 }, { "epoch": 0.15331764705882353, "grad_norm": 0.6639470236558049, "learning_rate": 5.109158975126374e-06, "loss": 0.034092217683792114, "step": 16290 }, { "epoch": 0.15336470588235293, "grad_norm": 0.5727164866334878, "learning_rate": 5.108375013087245e-06, "loss": 0.03199748396873474, "step": 16295 }, { "epoch": 0.15341176470588236, "grad_norm": 1.0042492645110015, "learning_rate": 5.10759141181665e-06, "loss": 0.03164700865745544, "step": 16300 }, { "epoch": 0.15345882352941176, "grad_norm": 0.5844948760161325, "learning_rate": 5.10680817103797e-06, "loss": 0.029687806963920593, "step": 16305 }, { "epoch": 0.1535058823529412, "grad_norm": 0.7295663723012988, "learning_rate": 5.106025290474887e-06, "loss": 0.0261183500289917, "step": 16310 }, { "epoch": 0.1535529411764706, "grad_norm": 0.6286815092205326, "learning_rate": 5.105242769851379e-06, "loss": 0.0313495934009552, "step": 16315 }, { "epoch": 0.1536, "grad_norm": 0.7101775025475238, "learning_rate": 5.104460608891716e-06, "loss": 0.03293226361274719, "step": 16320 }, { "epoch": 0.15364705882352941, "grad_norm": 0.4799680700910569, "learning_rate": 5.103678807320469e-06, "loss": 0.030797076225280762, "step": 16325 }, { "epoch": 0.15369411764705881, "grad_norm": 0.5717922763426122, "learning_rate": 5.1028973648625015e-06, "loss": 0.03288981318473816, "step": 16330 }, { "epoch": 0.15374117647058824, "grad_norm": 0.5742312294595183, "learning_rate": 5.1021162812429705e-06, "loss": 0.02494399845600128, "step": 16335 }, { "epoch": 0.15378823529411764, "grad_norm": 0.8395344486098493, "learning_rate": 5.101335556187331e-06, "loss": 0.03634035885334015, "step": 16340 }, { "epoch": 0.15383529411764707, "grad_norm": 0.6924955376594676, "learning_rate": 5.100555189421329e-06, "loss": 0.03114665746688843, "step": 16345 }, { "epoch": 0.15388235294117647, "grad_norm": 0.5605649168769176, "learning_rate": 5.099775180671005e-06, "loss": 0.030508485436439515, "step": 16350 }, { "epoch": 0.15392941176470587, "grad_norm": 0.9876939298993171, "learning_rate": 5.098995529662692e-06, "loss": 0.03067377209663391, "step": 16355 }, { "epoch": 0.1539764705882353, "grad_norm": 0.6718685364696872, "learning_rate": 5.098216236123018e-06, "loss": 0.029608964920043945, "step": 16360 }, { "epoch": 0.1540235294117647, "grad_norm": 0.8630955335823708, "learning_rate": 5.097437299778902e-06, "loss": 0.03060879707336426, "step": 16365 }, { "epoch": 0.15407058823529413, "grad_norm": 0.8400530923022405, "learning_rate": 5.096658720357554e-06, "loss": 0.03469933271408081, "step": 16370 }, { "epoch": 0.15411764705882353, "grad_norm": 0.7262460347788308, "learning_rate": 5.095880497586475e-06, "loss": 0.029829120635986327, "step": 16375 }, { "epoch": 0.15416470588235295, "grad_norm": 0.6851546875239829, "learning_rate": 5.095102631193461e-06, "loss": 0.02800266146659851, "step": 16380 }, { "epoch": 0.15421176470588235, "grad_norm": 0.7510551126294753, "learning_rate": 5.094325120906596e-06, "loss": 0.028117236495018006, "step": 16385 }, { "epoch": 0.15425882352941175, "grad_norm": 0.8096629860315586, "learning_rate": 5.093547966454255e-06, "loss": 0.03155402541160583, "step": 16390 }, { "epoch": 0.15430588235294118, "grad_norm": 0.7178916624797826, "learning_rate": 5.0927711675651e-06, "loss": 0.029017516970634462, "step": 16395 }, { "epoch": 0.15435294117647058, "grad_norm": 0.6420870293619735, "learning_rate": 5.091994723968088e-06, "loss": 0.029734310507774354, "step": 16400 }, { "epoch": 0.1544, "grad_norm": 0.8830779726012984, "learning_rate": 5.091218635392461e-06, "loss": 0.037660795450210574, "step": 16405 }, { "epoch": 0.1544470588235294, "grad_norm": 0.6301729062846867, "learning_rate": 5.0904429015677536e-06, "loss": 0.02841517925262451, "step": 16410 }, { "epoch": 0.15449411764705884, "grad_norm": 0.7464527385026218, "learning_rate": 5.089667522223784e-06, "loss": 0.029856750369071962, "step": 16415 }, { "epoch": 0.15454117647058824, "grad_norm": 0.7040366118690722, "learning_rate": 5.08889249709066e-06, "loss": 0.03111388683319092, "step": 16420 }, { "epoch": 0.15458823529411764, "grad_norm": 0.701699199083085, "learning_rate": 5.088117825898781e-06, "loss": 0.030423635244369508, "step": 16425 }, { "epoch": 0.15463529411764707, "grad_norm": 0.6130650050096557, "learning_rate": 5.087343508378828e-06, "loss": 0.03298150599002838, "step": 16430 }, { "epoch": 0.15468235294117647, "grad_norm": 0.6077788304278572, "learning_rate": 5.086569544261772e-06, "loss": 0.029372647404670715, "step": 16435 }, { "epoch": 0.1547294117647059, "grad_norm": 0.7898975679391385, "learning_rate": 5.085795933278871e-06, "loss": 0.031252545118331906, "step": 16440 }, { "epoch": 0.1547764705882353, "grad_norm": 0.689232229976232, "learning_rate": 5.0850226751616645e-06, "loss": 0.02765588164329529, "step": 16445 }, { "epoch": 0.1548235294117647, "grad_norm": 0.7499133663980738, "learning_rate": 5.084249769641985e-06, "loss": 0.029506582021713256, "step": 16450 }, { "epoch": 0.15487058823529412, "grad_norm": 0.7315088266020937, "learning_rate": 5.083477216451942e-06, "loss": 0.034387990832328796, "step": 16455 }, { "epoch": 0.15491764705882352, "grad_norm": 0.5216210247299395, "learning_rate": 5.082705015323935e-06, "loss": 0.02688090205192566, "step": 16460 }, { "epoch": 0.15496470588235295, "grad_norm": 0.6696998806860831, "learning_rate": 5.081933165990648e-06, "loss": 0.028478020429611207, "step": 16465 }, { "epoch": 0.15501176470588235, "grad_norm": 0.6889644964816042, "learning_rate": 5.081161668185047e-06, "loss": 0.03172431290149689, "step": 16470 }, { "epoch": 0.15505882352941178, "grad_norm": 0.7364833081374675, "learning_rate": 5.080390521640383e-06, "loss": 0.03168922066688538, "step": 16475 }, { "epoch": 0.15510588235294118, "grad_norm": 0.6826551636252699, "learning_rate": 5.079619726090189e-06, "loss": 0.039723625779151915, "step": 16480 }, { "epoch": 0.15515294117647058, "grad_norm": 0.6497712878311043, "learning_rate": 5.078849281268283e-06, "loss": 0.0310631662607193, "step": 16485 }, { "epoch": 0.1552, "grad_norm": 0.5144157657025642, "learning_rate": 5.078079186908765e-06, "loss": 0.030981600284576416, "step": 16490 }, { "epoch": 0.1552470588235294, "grad_norm": 0.7893387760160112, "learning_rate": 5.077309442746015e-06, "loss": 0.03205354213714599, "step": 16495 }, { "epoch": 0.15529411764705883, "grad_norm": 0.44985604411511465, "learning_rate": 5.076540048514695e-06, "loss": 0.026846295595169066, "step": 16500 }, { "epoch": 0.15534117647058823, "grad_norm": 0.7164685407551701, "learning_rate": 5.075771003949753e-06, "loss": 0.02894875705242157, "step": 16505 }, { "epoch": 0.15538823529411766, "grad_norm": 0.9119259312356085, "learning_rate": 5.075002308786412e-06, "loss": 0.027179181575775146, "step": 16510 }, { "epoch": 0.15543529411764706, "grad_norm": 0.5709693647711439, "learning_rate": 5.07423396276018e-06, "loss": 0.031036099791526793, "step": 16515 }, { "epoch": 0.15548235294117646, "grad_norm": 0.9289606305153404, "learning_rate": 5.0734659656068406e-06, "loss": 0.03269059658050537, "step": 16520 }, { "epoch": 0.1555294117647059, "grad_norm": 0.6443068867558535, "learning_rate": 5.072698317062465e-06, "loss": 0.030139243602752684, "step": 16525 }, { "epoch": 0.1555764705882353, "grad_norm": 0.5390761717375449, "learning_rate": 5.071931016863392e-06, "loss": 0.034214138984680176, "step": 16530 }, { "epoch": 0.15562352941176472, "grad_norm": 0.7339519021114276, "learning_rate": 5.071164064746252e-06, "loss": 0.03506521582603454, "step": 16535 }, { "epoch": 0.15567058823529412, "grad_norm": 0.47292754308940643, "learning_rate": 5.070397460447947e-06, "loss": 0.028101837635040282, "step": 16540 }, { "epoch": 0.15571764705882352, "grad_norm": 0.7612807302763104, "learning_rate": 5.069631203705658e-06, "loss": 0.028405606746673584, "step": 16545 }, { "epoch": 0.15576470588235294, "grad_norm": 0.60438016415505, "learning_rate": 5.068865294256845e-06, "loss": 0.03273497521877289, "step": 16550 }, { "epoch": 0.15581176470588234, "grad_norm": 0.5498099520681708, "learning_rate": 5.068099731839247e-06, "loss": 0.03237583637237549, "step": 16555 }, { "epoch": 0.15585882352941177, "grad_norm": 0.820529031757297, "learning_rate": 5.067334516190875e-06, "loss": 0.040871435403823854, "step": 16560 }, { "epoch": 0.15590588235294117, "grad_norm": 0.5792207260133933, "learning_rate": 5.066569647050024e-06, "loss": 0.02956947684288025, "step": 16565 }, { "epoch": 0.1559529411764706, "grad_norm": 0.6279420179718864, "learning_rate": 5.06580512415526e-06, "loss": 0.029584747552871705, "step": 16570 }, { "epoch": 0.156, "grad_norm": 0.764397383979587, "learning_rate": 5.0650409472454255e-06, "loss": 0.03169445097446442, "step": 16575 }, { "epoch": 0.1560470588235294, "grad_norm": 0.6098730894672496, "learning_rate": 5.0642771160596425e-06, "loss": 0.02818734645843506, "step": 16580 }, { "epoch": 0.15609411764705883, "grad_norm": 0.6180604170369622, "learning_rate": 5.063513630337306e-06, "loss": 0.03176636397838593, "step": 16585 }, { "epoch": 0.15614117647058823, "grad_norm": 0.699452201957924, "learning_rate": 5.062750489818083e-06, "loss": 0.028012070059776305, "step": 16590 }, { "epoch": 0.15618823529411766, "grad_norm": 0.8214187709680765, "learning_rate": 5.06198769424192e-06, "loss": 0.030105894804000853, "step": 16595 }, { "epoch": 0.15623529411764706, "grad_norm": 0.48721339401580316, "learning_rate": 5.061225243349034e-06, "loss": 0.028738945722579956, "step": 16600 }, { "epoch": 0.15628235294117648, "grad_norm": 0.6812183197793854, "learning_rate": 5.060463136879919e-06, "loss": 0.027648967504501343, "step": 16605 }, { "epoch": 0.15632941176470588, "grad_norm": 0.717719976682713, "learning_rate": 5.05970137457534e-06, "loss": 0.030976077914237975, "step": 16610 }, { "epoch": 0.15637647058823528, "grad_norm": 0.8080152568412673, "learning_rate": 5.058939956176337e-06, "loss": 0.028262558579444885, "step": 16615 }, { "epoch": 0.1564235294117647, "grad_norm": 0.6784831163706231, "learning_rate": 5.058178881424219e-06, "loss": 0.03657498359680176, "step": 16620 }, { "epoch": 0.1564705882352941, "grad_norm": 0.8424087765001081, "learning_rate": 5.0574181500605705e-06, "loss": 0.0328577995300293, "step": 16625 }, { "epoch": 0.15651764705882354, "grad_norm": 0.7107152105277994, "learning_rate": 5.0566577618272494e-06, "loss": 0.027003148198127748, "step": 16630 }, { "epoch": 0.15656470588235294, "grad_norm": 0.5289533175070091, "learning_rate": 5.055897716466381e-06, "loss": 0.031171461939811705, "step": 16635 }, { "epoch": 0.15661176470588234, "grad_norm": 0.5629556759719739, "learning_rate": 5.055138013720366e-06, "loss": 0.02534675896167755, "step": 16640 }, { "epoch": 0.15665882352941177, "grad_norm": 0.5542560172903772, "learning_rate": 5.054378653331873e-06, "loss": 0.030048206448554993, "step": 16645 }, { "epoch": 0.15670588235294117, "grad_norm": 0.7742555541597135, "learning_rate": 5.053619635043842e-06, "loss": 0.03239170908927917, "step": 16650 }, { "epoch": 0.1567529411764706, "grad_norm": 0.45017939404027774, "learning_rate": 5.052860958599483e-06, "loss": 0.02708718180656433, "step": 16655 }, { "epoch": 0.1568, "grad_norm": 0.8564149675817931, "learning_rate": 5.0521026237422755e-06, "loss": 0.03753558397293091, "step": 16660 }, { "epoch": 0.15684705882352942, "grad_norm": 0.6015677031240411, "learning_rate": 5.051344630215969e-06, "loss": 0.0319699227809906, "step": 16665 }, { "epoch": 0.15689411764705882, "grad_norm": 0.754749589302786, "learning_rate": 5.050586977764585e-06, "loss": 0.036786210536956784, "step": 16670 }, { "epoch": 0.15694117647058822, "grad_norm": 0.6474529326013316, "learning_rate": 5.0498296661324075e-06, "loss": 0.03334023058414459, "step": 16675 }, { "epoch": 0.15698823529411765, "grad_norm": 0.6500923748316887, "learning_rate": 5.0490726950639936e-06, "loss": 0.032581824064254764, "step": 16680 }, { "epoch": 0.15703529411764705, "grad_norm": 0.6460308133694751, "learning_rate": 5.048316064304164e-06, "loss": 0.03140260875225067, "step": 16685 }, { "epoch": 0.15708235294117648, "grad_norm": 0.7326289255193816, "learning_rate": 5.047559773598013e-06, "loss": 0.028465089201927186, "step": 16690 }, { "epoch": 0.15712941176470588, "grad_norm": 0.744556163166475, "learning_rate": 5.046803822690898e-06, "loss": 0.030105486512184143, "step": 16695 }, { "epoch": 0.1571764705882353, "grad_norm": 1.1096175288805972, "learning_rate": 5.046048211328441e-06, "loss": 0.03089225888252258, "step": 16700 }, { "epoch": 0.1572235294117647, "grad_norm": 0.5499904369954008, "learning_rate": 5.045292939256539e-06, "loss": 0.033368921279907225, "step": 16705 }, { "epoch": 0.1572705882352941, "grad_norm": 0.6130419531620099, "learning_rate": 5.0445380062213455e-06, "loss": 0.02824927568435669, "step": 16710 }, { "epoch": 0.15731764705882353, "grad_norm": 0.8518294881839247, "learning_rate": 5.043783411969286e-06, "loss": 0.027057012915611266, "step": 16715 }, { "epoch": 0.15736470588235293, "grad_norm": 0.781080675054476, "learning_rate": 5.043029156247049e-06, "loss": 0.026948902010917663, "step": 16720 }, { "epoch": 0.15741176470588236, "grad_norm": 0.6128565831090728, "learning_rate": 5.04227523880159e-06, "loss": 0.028052401542663575, "step": 16725 }, { "epoch": 0.15745882352941176, "grad_norm": 0.8342426144166909, "learning_rate": 5.041521659380125e-06, "loss": 0.028672194480895995, "step": 16730 }, { "epoch": 0.1575058823529412, "grad_norm": 0.6643042270551263, "learning_rate": 5.04076841773014e-06, "loss": 0.03074551224708557, "step": 16735 }, { "epoch": 0.1575529411764706, "grad_norm": 0.5363527005905957, "learning_rate": 5.04001551359938e-06, "loss": 0.025217092037200926, "step": 16740 }, { "epoch": 0.1576, "grad_norm": 0.6591149547519648, "learning_rate": 5.0392629467358576e-06, "loss": 0.027948933839797973, "step": 16745 }, { "epoch": 0.15764705882352942, "grad_norm": 0.8200159747352113, "learning_rate": 5.038510716887847e-06, "loss": 0.03441989123821258, "step": 16750 }, { "epoch": 0.15769411764705882, "grad_norm": 0.6582362816868076, "learning_rate": 5.037758823803885e-06, "loss": 0.029557681083679198, "step": 16755 }, { "epoch": 0.15774117647058825, "grad_norm": 0.6859177728909515, "learning_rate": 5.037007267232771e-06, "loss": 0.029513520002365113, "step": 16760 }, { "epoch": 0.15778823529411765, "grad_norm": 0.46162516090616906, "learning_rate": 5.036256046923567e-06, "loss": 0.026588305830955505, "step": 16765 }, { "epoch": 0.15783529411764705, "grad_norm": 0.5777840412418913, "learning_rate": 5.035505162625597e-06, "loss": 0.02788977324962616, "step": 16770 }, { "epoch": 0.15788235294117647, "grad_norm": 0.6123925051432683, "learning_rate": 5.034754614088446e-06, "loss": 0.025855690240859985, "step": 16775 }, { "epoch": 0.15792941176470587, "grad_norm": 0.7045281683101314, "learning_rate": 5.034004401061962e-06, "loss": 0.027135780453681944, "step": 16780 }, { "epoch": 0.1579764705882353, "grad_norm": 0.7901809540221615, "learning_rate": 5.0332545232962515e-06, "loss": 0.026764526963233948, "step": 16785 }, { "epoch": 0.1580235294117647, "grad_norm": 0.5648437156239479, "learning_rate": 5.032504980541682e-06, "loss": 0.026423192024230956, "step": 16790 }, { "epoch": 0.15807058823529413, "grad_norm": 0.8239173258360498, "learning_rate": 5.031755772548884e-06, "loss": 0.031321993470191954, "step": 16795 }, { "epoch": 0.15811764705882353, "grad_norm": 0.5577526813073779, "learning_rate": 5.031006899068743e-06, "loss": 0.030038037896156312, "step": 16800 }, { "epoch": 0.15816470588235293, "grad_norm": 0.6118003018713472, "learning_rate": 5.0302583598524076e-06, "loss": 0.029886892437934874, "step": 16805 }, { "epoch": 0.15821176470588236, "grad_norm": 0.5440588140322292, "learning_rate": 5.029510154651284e-06, "loss": 0.027837714552879332, "step": 16810 }, { "epoch": 0.15825882352941176, "grad_norm": 0.46923374146867897, "learning_rate": 5.028762283217037e-06, "loss": 0.02719072699546814, "step": 16815 }, { "epoch": 0.15830588235294119, "grad_norm": 0.6732008820107627, "learning_rate": 5.028014745301591e-06, "loss": 0.027130645513534547, "step": 16820 }, { "epoch": 0.15835294117647059, "grad_norm": 0.6550611359833955, "learning_rate": 5.027267540657126e-06, "loss": 0.028745189309120178, "step": 16825 }, { "epoch": 0.1584, "grad_norm": 0.5831019665185104, "learning_rate": 5.026520669036083e-06, "loss": 0.03820033073425293, "step": 16830 }, { "epoch": 0.1584470588235294, "grad_norm": 0.5056240311674854, "learning_rate": 5.02577413019116e-06, "loss": 0.02687678337097168, "step": 16835 }, { "epoch": 0.1584941176470588, "grad_norm": 0.5737477165492986, "learning_rate": 5.0250279238753085e-06, "loss": 0.026705282926559448, "step": 16840 }, { "epoch": 0.15854117647058824, "grad_norm": 0.8997939251753704, "learning_rate": 5.02428204984174e-06, "loss": 0.031237953901290895, "step": 16845 }, { "epoch": 0.15858823529411764, "grad_norm": 0.5480658248780654, "learning_rate": 5.02353650784392e-06, "loss": 0.027390426397323607, "step": 16850 }, { "epoch": 0.15863529411764707, "grad_norm": 0.9528421183250644, "learning_rate": 5.022791297635574e-06, "loss": 0.029181912541389465, "step": 16855 }, { "epoch": 0.15868235294117647, "grad_norm": 0.8517480016252639, "learning_rate": 5.022046418970679e-06, "loss": 0.032196253538131714, "step": 16860 }, { "epoch": 0.15872941176470587, "grad_norm": 0.5557885705849637, "learning_rate": 5.02130187160347e-06, "loss": 0.029116272926330566, "step": 16865 }, { "epoch": 0.1587764705882353, "grad_norm": 0.76181135358676, "learning_rate": 5.020557655288434e-06, "loss": 0.0243422731757164, "step": 16870 }, { "epoch": 0.1588235294117647, "grad_norm": 0.6850071864736634, "learning_rate": 5.019813769780314e-06, "loss": 0.03707626461982727, "step": 16875 }, { "epoch": 0.15887058823529412, "grad_norm": 0.5785725786353146, "learning_rate": 5.019070214834111e-06, "loss": 0.026590341329574586, "step": 16880 }, { "epoch": 0.15891764705882352, "grad_norm": 0.5286521397476884, "learning_rate": 5.018326990205073e-06, "loss": 0.027663147449493407, "step": 16885 }, { "epoch": 0.15896470588235295, "grad_norm": 0.7247403649706525, "learning_rate": 5.017584095648709e-06, "loss": 0.028553378582000733, "step": 16890 }, { "epoch": 0.15901176470588235, "grad_norm": 1.0376995128665387, "learning_rate": 5.0168415309207744e-06, "loss": 0.0312794029712677, "step": 16895 }, { "epoch": 0.15905882352941175, "grad_norm": 0.5944487682005584, "learning_rate": 5.016099295777282e-06, "loss": 0.024394214153289795, "step": 16900 }, { "epoch": 0.15910588235294118, "grad_norm": 0.46283437096104896, "learning_rate": 5.015357389974497e-06, "loss": 0.03201163411140442, "step": 16905 }, { "epoch": 0.15915294117647058, "grad_norm": 0.6823922730941204, "learning_rate": 5.0146158132689345e-06, "loss": 0.02688503861427307, "step": 16910 }, { "epoch": 0.1592, "grad_norm": 0.6921789551872843, "learning_rate": 5.013874565417362e-06, "loss": 0.033752846717834475, "step": 16915 }, { "epoch": 0.1592470588235294, "grad_norm": 0.5039248540137303, "learning_rate": 5.013133646176804e-06, "loss": 0.026317697763442994, "step": 16920 }, { "epoch": 0.15929411764705884, "grad_norm": 0.5702709877399302, "learning_rate": 5.0123930553045265e-06, "loss": 0.03213106393814087, "step": 16925 }, { "epoch": 0.15934117647058824, "grad_norm": 0.39238520370329205, "learning_rate": 5.011652792558054e-06, "loss": 0.02720859944820404, "step": 16930 }, { "epoch": 0.15938823529411764, "grad_norm": 0.6016290198994567, "learning_rate": 5.010912857695159e-06, "loss": 0.03370964527130127, "step": 16935 }, { "epoch": 0.15943529411764706, "grad_norm": 0.47469575180492296, "learning_rate": 5.010173250473865e-06, "loss": 0.02573694586753845, "step": 16940 }, { "epoch": 0.15948235294117646, "grad_norm": 0.7021385033626544, "learning_rate": 5.009433970652445e-06, "loss": 0.029241195321083067, "step": 16945 }, { "epoch": 0.1595294117647059, "grad_norm": 0.7087280789088083, "learning_rate": 5.008695017989421e-06, "loss": 0.03130761981010437, "step": 16950 }, { "epoch": 0.1595764705882353, "grad_norm": 0.7021759179556278, "learning_rate": 5.0079563922435655e-06, "loss": 0.02971363961696625, "step": 16955 }, { "epoch": 0.15962352941176472, "grad_norm": 0.6585015567092187, "learning_rate": 5.007218093173899e-06, "loss": 0.031483253836631774, "step": 16960 }, { "epoch": 0.15967058823529412, "grad_norm": 0.6616634705377042, "learning_rate": 5.0064801205396906e-06, "loss": 0.030354350805282593, "step": 16965 }, { "epoch": 0.15971764705882352, "grad_norm": 0.6777154020499655, "learning_rate": 5.005742474100459e-06, "loss": 0.029963207244873048, "step": 16970 }, { "epoch": 0.15976470588235295, "grad_norm": 0.44092537965374634, "learning_rate": 5.005005153615968e-06, "loss": 0.025972414016723632, "step": 16975 }, { "epoch": 0.15981176470588235, "grad_norm": 0.6345251725714081, "learning_rate": 5.004268158846233e-06, "loss": 0.033685019612312316, "step": 16980 }, { "epoch": 0.15985882352941178, "grad_norm": 0.9366959344855342, "learning_rate": 5.003531489551514e-06, "loss": 0.03001253604888916, "step": 16985 }, { "epoch": 0.15990588235294118, "grad_norm": 1.0565004525306936, "learning_rate": 5.002795145492317e-06, "loss": 0.036589395999908444, "step": 16990 }, { "epoch": 0.15995294117647058, "grad_norm": 0.4573215623842136, "learning_rate": 5.002059126429398e-06, "loss": 0.02864466905593872, "step": 16995 }, { "epoch": 0.16, "grad_norm": 0.7448275470345895, "learning_rate": 5.001323432123757e-06, "loss": 0.027745270729064943, "step": 17000 }, { "epoch": 0.1600470588235294, "grad_norm": 0.6907386253628109, "learning_rate": 5.000588062336641e-06, "loss": 0.0229175865650177, "step": 17005 }, { "epoch": 0.16009411764705883, "grad_norm": 0.7862132575058148, "learning_rate": 4.999853016829542e-06, "loss": 0.029947766661643983, "step": 17010 }, { "epoch": 0.16014117647058823, "grad_norm": 0.4928077240757689, "learning_rate": 4.999118295364196e-06, "loss": 0.03170168399810791, "step": 17015 }, { "epoch": 0.16018823529411766, "grad_norm": 0.7352417645726611, "learning_rate": 4.998383897702587e-06, "loss": 0.03145758509635925, "step": 17020 }, { "epoch": 0.16023529411764706, "grad_norm": 3.8741646054194416, "learning_rate": 4.997649823606942e-06, "loss": 0.02931252121925354, "step": 17025 }, { "epoch": 0.16028235294117646, "grad_norm": 0.7368541824345082, "learning_rate": 4.9969160728397335e-06, "loss": 0.03369482457637787, "step": 17030 }, { "epoch": 0.1603294117647059, "grad_norm": 0.7604336731781571, "learning_rate": 4.996182645163674e-06, "loss": 0.034254437685012816, "step": 17035 }, { "epoch": 0.1603764705882353, "grad_norm": 0.9577837037831766, "learning_rate": 4.9954495403417255e-06, "loss": 0.032937997579574586, "step": 17040 }, { "epoch": 0.16042352941176471, "grad_norm": 0.5401305113200616, "learning_rate": 4.994716758137092e-06, "loss": 0.031137341260910036, "step": 17045 }, { "epoch": 0.16047058823529411, "grad_norm": 0.7676719787926358, "learning_rate": 4.9939842983132145e-06, "loss": 0.031653383374214174, "step": 17050 }, { "epoch": 0.16051764705882354, "grad_norm": 0.7381224664172936, "learning_rate": 4.993252160633787e-06, "loss": 0.028995984792709352, "step": 17055 }, { "epoch": 0.16056470588235294, "grad_norm": 0.5818178954612977, "learning_rate": 4.992520344862737e-06, "loss": 0.02929267883300781, "step": 17060 }, { "epoch": 0.16061176470588234, "grad_norm": 0.45591685291115264, "learning_rate": 4.991788850764239e-06, "loss": 0.030420327186584474, "step": 17065 }, { "epoch": 0.16065882352941177, "grad_norm": 0.722961219037112, "learning_rate": 4.991057678102707e-06, "loss": 0.027722427248954774, "step": 17070 }, { "epoch": 0.16070588235294117, "grad_norm": 0.7639297313136523, "learning_rate": 4.990326826642799e-06, "loss": 0.02865999937057495, "step": 17075 }, { "epoch": 0.1607529411764706, "grad_norm": 0.8728748940995891, "learning_rate": 4.989596296149411e-06, "loss": 0.03799639642238617, "step": 17080 }, { "epoch": 0.1608, "grad_norm": 0.7662758681743967, "learning_rate": 4.988866086387683e-06, "loss": 0.028279438614845276, "step": 17085 }, { "epoch": 0.1608470588235294, "grad_norm": 0.5993346417083912, "learning_rate": 4.988136197122992e-06, "loss": 0.02970918416976929, "step": 17090 }, { "epoch": 0.16089411764705883, "grad_norm": 0.7625110448424066, "learning_rate": 4.9874066281209595e-06, "loss": 0.029532569646835326, "step": 17095 }, { "epoch": 0.16094117647058823, "grad_norm": 0.6983945682303891, "learning_rate": 4.986677379147443e-06, "loss": 0.03275848031044006, "step": 17100 }, { "epoch": 0.16098823529411765, "grad_norm": 0.5773577764021596, "learning_rate": 4.9859484499685405e-06, "loss": 0.03122832775115967, "step": 17105 }, { "epoch": 0.16103529411764705, "grad_norm": 0.7915269749056503, "learning_rate": 4.9852198403505904e-06, "loss": 0.02977156937122345, "step": 17110 }, { "epoch": 0.16108235294117648, "grad_norm": 0.762632418466217, "learning_rate": 4.984491550060172e-06, "loss": 0.03031458258628845, "step": 17115 }, { "epoch": 0.16112941176470588, "grad_norm": 0.5392787659511911, "learning_rate": 4.983763578864097e-06, "loss": 0.02889847755432129, "step": 17120 }, { "epoch": 0.16117647058823528, "grad_norm": 0.9864637386032155, "learning_rate": 4.983035926529422e-06, "loss": 0.031028938293457032, "step": 17125 }, { "epoch": 0.1612235294117647, "grad_norm": 0.6043083350861067, "learning_rate": 4.9823085928234375e-06, "loss": 0.030004394054412842, "step": 17130 }, { "epoch": 0.1612705882352941, "grad_norm": 0.8053731658084935, "learning_rate": 4.9815815775136716e-06, "loss": 0.03246696889400482, "step": 17135 }, { "epoch": 0.16131764705882354, "grad_norm": 0.6567212888890619, "learning_rate": 4.980854880367895e-06, "loss": 0.025935643911361696, "step": 17140 }, { "epoch": 0.16136470588235294, "grad_norm": 1.032168974247481, "learning_rate": 4.980128501154109e-06, "loss": 0.02897987365722656, "step": 17145 }, { "epoch": 0.16141176470588237, "grad_norm": 0.6355791269298844, "learning_rate": 4.9794024396405545e-06, "loss": 0.02791149914264679, "step": 17150 }, { "epoch": 0.16145882352941177, "grad_norm": 0.5097503156365059, "learning_rate": 4.97867669559571e-06, "loss": 0.0271270751953125, "step": 17155 }, { "epoch": 0.16150588235294117, "grad_norm": 0.7601915289898608, "learning_rate": 4.977951268788286e-06, "loss": 0.02545560896396637, "step": 17160 }, { "epoch": 0.1615529411764706, "grad_norm": 0.5488705750428768, "learning_rate": 4.977226158987236e-06, "loss": 0.026547056436538697, "step": 17165 }, { "epoch": 0.1616, "grad_norm": 0.6592356445324731, "learning_rate": 4.976501365961741e-06, "loss": 0.028581503033638, "step": 17170 }, { "epoch": 0.16164705882352942, "grad_norm": 0.34891787241730965, "learning_rate": 4.975776889481222e-06, "loss": 0.02720775008201599, "step": 17175 }, { "epoch": 0.16169411764705882, "grad_norm": 0.9580023025643414, "learning_rate": 4.975052729315335e-06, "loss": 0.024698691070079805, "step": 17180 }, { "epoch": 0.16174117647058822, "grad_norm": 0.5647850269648179, "learning_rate": 4.974328885233968e-06, "loss": 0.027754607796669006, "step": 17185 }, { "epoch": 0.16178823529411765, "grad_norm": 1.1384419792973792, "learning_rate": 4.973605357007244e-06, "loss": 0.031227022409439087, "step": 17190 }, { "epoch": 0.16183529411764705, "grad_norm": 0.6184919391657717, "learning_rate": 4.972882144405522e-06, "loss": 0.030847796797752382, "step": 17195 }, { "epoch": 0.16188235294117648, "grad_norm": 0.7849198329939419, "learning_rate": 4.972159247199394e-06, "loss": 0.038226759433746337, "step": 17200 }, { "epoch": 0.16192941176470588, "grad_norm": 0.5960971684263913, "learning_rate": 4.9714366651596825e-06, "loss": 0.04322270154953003, "step": 17205 }, { "epoch": 0.1619764705882353, "grad_norm": 0.6922702410658896, "learning_rate": 4.970714398057449e-06, "loss": 0.02606317400932312, "step": 17210 }, { "epoch": 0.1620235294117647, "grad_norm": 1.021538480936799, "learning_rate": 4.969992445663978e-06, "loss": 0.03783715069293976, "step": 17215 }, { "epoch": 0.1620705882352941, "grad_norm": 0.5567183994137831, "learning_rate": 4.969270807750799e-06, "loss": 0.028122764825820924, "step": 17220 }, { "epoch": 0.16211764705882353, "grad_norm": 0.8130682632602646, "learning_rate": 4.968549484089663e-06, "loss": 0.027898740768432618, "step": 17225 }, { "epoch": 0.16216470588235293, "grad_norm": 0.8288171820544759, "learning_rate": 4.967828474452559e-06, "loss": 0.032646948099136354, "step": 17230 }, { "epoch": 0.16221176470588236, "grad_norm": 0.6340697615768708, "learning_rate": 4.967107778611705e-06, "loss": 0.028280693292617797, "step": 17235 }, { "epoch": 0.16225882352941176, "grad_norm": 0.6072541085013046, "learning_rate": 4.96638739633955e-06, "loss": 0.035190466046333316, "step": 17240 }, { "epoch": 0.1623058823529412, "grad_norm": 0.6589245698767126, "learning_rate": 4.965667327408776e-06, "loss": 0.02863301634788513, "step": 17245 }, { "epoch": 0.1623529411764706, "grad_norm": 0.9377132414786225, "learning_rate": 4.964947571592293e-06, "loss": 0.030200773477554323, "step": 17250 }, { "epoch": 0.1624, "grad_norm": 0.4609029954011948, "learning_rate": 4.964228128663245e-06, "loss": 0.02971399128437042, "step": 17255 }, { "epoch": 0.16244705882352942, "grad_norm": 0.7805965170785024, "learning_rate": 4.963508998395003e-06, "loss": 0.028028592467308044, "step": 17260 }, { "epoch": 0.16249411764705882, "grad_norm": 0.5980627827237939, "learning_rate": 4.962790180561167e-06, "loss": 0.028068077564239503, "step": 17265 }, { "epoch": 0.16254117647058824, "grad_norm": 0.8310617094300715, "learning_rate": 4.962071674935568e-06, "loss": 0.03848951756954193, "step": 17270 }, { "epoch": 0.16258823529411764, "grad_norm": 0.9146455780118308, "learning_rate": 4.9613534812922685e-06, "loss": 0.0338320255279541, "step": 17275 }, { "epoch": 0.16263529411764707, "grad_norm": 0.4622887127951593, "learning_rate": 4.960635599405556e-06, "loss": 0.028308779001235962, "step": 17280 }, { "epoch": 0.16268235294117647, "grad_norm": 0.6075688951033424, "learning_rate": 4.959918029049947e-06, "loss": 0.02676251530647278, "step": 17285 }, { "epoch": 0.16272941176470587, "grad_norm": 1.0112424678139387, "learning_rate": 4.959200770000189e-06, "loss": 0.031764864921569824, "step": 17290 }, { "epoch": 0.1627764705882353, "grad_norm": 0.6574004958222233, "learning_rate": 4.958483822031254e-06, "loss": 0.03968207240104675, "step": 17295 }, { "epoch": 0.1628235294117647, "grad_norm": 0.6860381767787569, "learning_rate": 4.957767184918345e-06, "loss": 0.038776105642318724, "step": 17300 }, { "epoch": 0.16287058823529413, "grad_norm": 0.63435713856503, "learning_rate": 4.957050858436891e-06, "loss": 0.025230544805526733, "step": 17305 }, { "epoch": 0.16291764705882353, "grad_norm": 0.5374817026514181, "learning_rate": 4.956334842362546e-06, "loss": 0.029996371269226073, "step": 17310 }, { "epoch": 0.16296470588235293, "grad_norm": 0.6097586950644522, "learning_rate": 4.955619136471194e-06, "loss": 0.029371201992034912, "step": 17315 }, { "epoch": 0.16301176470588236, "grad_norm": 0.5513305016567164, "learning_rate": 4.9549037405389436e-06, "loss": 0.03439826965332031, "step": 17320 }, { "epoch": 0.16305882352941176, "grad_norm": 0.6950162402679056, "learning_rate": 4.954188654342131e-06, "loss": 0.031068527698516847, "step": 17325 }, { "epoch": 0.16310588235294118, "grad_norm": 1.0094403072298015, "learning_rate": 4.953473877657315e-06, "loss": 0.027041298151016236, "step": 17330 }, { "epoch": 0.16315294117647058, "grad_norm": 0.5704670725200679, "learning_rate": 4.952759410261284e-06, "loss": 0.03335337638854981, "step": 17335 }, { "epoch": 0.1632, "grad_norm": 0.7096252141710987, "learning_rate": 4.952045251931049e-06, "loss": 0.0296183317899704, "step": 17340 }, { "epoch": 0.1632470588235294, "grad_norm": 0.5580555659803378, "learning_rate": 4.951331402443848e-06, "loss": 0.02862926423549652, "step": 17345 }, { "epoch": 0.1632941176470588, "grad_norm": 0.6838526960348629, "learning_rate": 4.950617861577144e-06, "loss": 0.033543407917022705, "step": 17350 }, { "epoch": 0.16334117647058824, "grad_norm": 0.6695939181377714, "learning_rate": 4.9499046291086205e-06, "loss": 0.029847294092178345, "step": 17355 }, { "epoch": 0.16338823529411764, "grad_norm": 0.6715233652282264, "learning_rate": 4.9491917048161895e-06, "loss": 0.03273212909698486, "step": 17360 }, { "epoch": 0.16343529411764707, "grad_norm": 1.038282041526494, "learning_rate": 4.948479088477985e-06, "loss": 0.031322598457336426, "step": 17365 }, { "epoch": 0.16348235294117647, "grad_norm": 0.542805024687249, "learning_rate": 4.947766779872363e-06, "loss": 0.03177362084388733, "step": 17370 }, { "epoch": 0.1635294117647059, "grad_norm": 0.7151448122712278, "learning_rate": 4.947054778777905e-06, "loss": 0.028206965327262877, "step": 17375 }, { "epoch": 0.1635764705882353, "grad_norm": 0.6529322022352321, "learning_rate": 4.9463430849734174e-06, "loss": 0.0316206693649292, "step": 17380 }, { "epoch": 0.1636235294117647, "grad_norm": 0.6759751629442541, "learning_rate": 4.945631698237926e-06, "loss": 0.032531121373176576, "step": 17385 }, { "epoch": 0.16367058823529412, "grad_norm": 0.6955450939352483, "learning_rate": 4.944920618350677e-06, "loss": 0.032833915948867795, "step": 17390 }, { "epoch": 0.16371764705882352, "grad_norm": 0.9891389339052559, "learning_rate": 4.944209845091144e-06, "loss": 0.029502108693122864, "step": 17395 }, { "epoch": 0.16376470588235295, "grad_norm": 0.6259420709511779, "learning_rate": 4.94349937823902e-06, "loss": 0.028878217935562132, "step": 17400 }, { "epoch": 0.16381176470588235, "grad_norm": 0.7513817388262959, "learning_rate": 4.942789217574219e-06, "loss": 0.02792375683784485, "step": 17405 }, { "epoch": 0.16385882352941175, "grad_norm": 0.8792980563887136, "learning_rate": 4.942079362876878e-06, "loss": 0.034821122884750366, "step": 17410 }, { "epoch": 0.16390588235294118, "grad_norm": 0.8193016480000509, "learning_rate": 4.941369813927351e-06, "loss": 0.04078676700592041, "step": 17415 }, { "epoch": 0.16395294117647058, "grad_norm": 0.6116669896432613, "learning_rate": 4.940660570506218e-06, "loss": 0.034367543458938596, "step": 17420 }, { "epoch": 0.164, "grad_norm": 0.8183238043303438, "learning_rate": 4.939951632394275e-06, "loss": 0.03354405164718628, "step": 17425 }, { "epoch": 0.1640470588235294, "grad_norm": 0.7395668527117789, "learning_rate": 4.939242999372541e-06, "loss": 0.024699606001377106, "step": 17430 }, { "epoch": 0.16409411764705883, "grad_norm": 0.4787451168007125, "learning_rate": 4.938534671222254e-06, "loss": 0.025441074371337892, "step": 17435 }, { "epoch": 0.16414117647058823, "grad_norm": 0.6914495101085467, "learning_rate": 4.93782664772487e-06, "loss": 0.029475587606430053, "step": 17440 }, { "epoch": 0.16418823529411763, "grad_norm": 0.6777630883129055, "learning_rate": 4.937118928662066e-06, "loss": 0.024926865100860597, "step": 17445 }, { "epoch": 0.16423529411764706, "grad_norm": 1.0526692513914382, "learning_rate": 4.936411513815739e-06, "loss": 0.03651758432388306, "step": 17450 }, { "epoch": 0.16428235294117646, "grad_norm": 0.5791881812622252, "learning_rate": 4.9357044029680025e-06, "loss": 0.02703816294670105, "step": 17455 }, { "epoch": 0.1643294117647059, "grad_norm": 0.9039648230301449, "learning_rate": 4.934997595901186e-06, "loss": 0.038988083600997925, "step": 17460 }, { "epoch": 0.1643764705882353, "grad_norm": 0.5939705719547602, "learning_rate": 4.934291092397845e-06, "loss": 0.027489298582077028, "step": 17465 }, { "epoch": 0.16442352941176472, "grad_norm": 0.6049013202004394, "learning_rate": 4.933584892240746e-06, "loss": 0.026560932397842407, "step": 17470 }, { "epoch": 0.16447058823529412, "grad_norm": 0.5925183994147837, "learning_rate": 4.9328789952128754e-06, "loss": 0.028045237064361572, "step": 17475 }, { "epoch": 0.16451764705882352, "grad_norm": 0.5535028673786871, "learning_rate": 4.932173401097436e-06, "loss": 0.025021946430206297, "step": 17480 }, { "epoch": 0.16456470588235295, "grad_norm": 0.6891121724597894, "learning_rate": 4.9314681096778495e-06, "loss": 0.0326633632183075, "step": 17485 }, { "epoch": 0.16461176470588235, "grad_norm": 0.5898343336166126, "learning_rate": 4.9307631207377525e-06, "loss": 0.025238621234893798, "step": 17490 }, { "epoch": 0.16465882352941177, "grad_norm": 0.4888403895156699, "learning_rate": 4.9300584340609985e-06, "loss": 0.028102612495422362, "step": 17495 }, { "epoch": 0.16470588235294117, "grad_norm": 0.5499973626417932, "learning_rate": 4.929354049431657e-06, "loss": 0.02542133629322052, "step": 17500 }, { "epoch": 0.1647529411764706, "grad_norm": 0.576504658749046, "learning_rate": 4.928649966634013e-06, "loss": 0.032391464710235594, "step": 17505 }, { "epoch": 0.1648, "grad_norm": 0.5888646699866175, "learning_rate": 4.927946185452571e-06, "loss": 0.030810242891311644, "step": 17510 }, { "epoch": 0.1648470588235294, "grad_norm": 0.6137435116484047, "learning_rate": 4.927242705672046e-06, "loss": 0.028783762454986574, "step": 17515 }, { "epoch": 0.16489411764705883, "grad_norm": 0.8144456169748516, "learning_rate": 4.926539527077369e-06, "loss": 0.02994285225868225, "step": 17520 }, { "epoch": 0.16494117647058823, "grad_norm": 0.521886824396825, "learning_rate": 4.9258366494536876e-06, "loss": 0.02419031858444214, "step": 17525 }, { "epoch": 0.16498823529411766, "grad_norm": 0.725488685148327, "learning_rate": 4.9251340725863615e-06, "loss": 0.028816798329353334, "step": 17530 }, { "epoch": 0.16503529411764706, "grad_norm": 0.476181389028887, "learning_rate": 4.924431796260967e-06, "loss": 0.0281638503074646, "step": 17535 }, { "epoch": 0.16508235294117646, "grad_norm": 0.64395505009788, "learning_rate": 4.9237298202632936e-06, "loss": 0.028294622898101807, "step": 17540 }, { "epoch": 0.16512941176470589, "grad_norm": 0.49232431535011434, "learning_rate": 4.9230281443793434e-06, "loss": 0.023336635529994966, "step": 17545 }, { "epoch": 0.16517647058823529, "grad_norm": 0.8239072363787118, "learning_rate": 4.922326768395333e-06, "loss": 0.03311874270439148, "step": 17550 }, { "epoch": 0.1652235294117647, "grad_norm": 1.4270247215016825, "learning_rate": 4.9216256920976934e-06, "loss": 0.03287478089332581, "step": 17555 }, { "epoch": 0.1652705882352941, "grad_norm": 0.5094862093607859, "learning_rate": 4.920924915273065e-06, "loss": 0.026262122392654418, "step": 17560 }, { "epoch": 0.16531764705882354, "grad_norm": 0.4902117441729775, "learning_rate": 4.9202244377083046e-06, "loss": 0.026004493236541748, "step": 17565 }, { "epoch": 0.16536470588235294, "grad_norm": 0.7482243274254556, "learning_rate": 4.9195242591904775e-06, "loss": 0.028095442056655883, "step": 17570 }, { "epoch": 0.16541176470588234, "grad_norm": 0.6244705260495286, "learning_rate": 4.918824379506865e-06, "loss": 0.03268746733665466, "step": 17575 }, { "epoch": 0.16545882352941177, "grad_norm": 0.8325169526461125, "learning_rate": 4.9181247984449565e-06, "loss": 0.03394051194190979, "step": 17580 }, { "epoch": 0.16550588235294117, "grad_norm": 0.7728093377618329, "learning_rate": 4.917425515792457e-06, "loss": 0.030124932527542114, "step": 17585 }, { "epoch": 0.1655529411764706, "grad_norm": 12.77875840769575, "learning_rate": 4.916726531337276e-06, "loss": 0.027178871631622314, "step": 17590 }, { "epoch": 0.1656, "grad_norm": 0.8554344617340504, "learning_rate": 4.916027844867543e-06, "loss": 0.03263351619243622, "step": 17595 }, { "epoch": 0.16564705882352943, "grad_norm": 0.7451831958378314, "learning_rate": 4.9153294561715906e-06, "loss": 0.029914325475692748, "step": 17600 }, { "epoch": 0.16569411764705883, "grad_norm": 1.005584309652991, "learning_rate": 4.914631365037967e-06, "loss": 0.025408482551574706, "step": 17605 }, { "epoch": 0.16574117647058823, "grad_norm": 0.6594014883973848, "learning_rate": 4.913933571255428e-06, "loss": 0.025090163946151732, "step": 17610 }, { "epoch": 0.16578823529411765, "grad_norm": 0.5461074178863211, "learning_rate": 4.913236074612938e-06, "loss": 0.025516173243522643, "step": 17615 }, { "epoch": 0.16583529411764705, "grad_norm": 0.6538818527042187, "learning_rate": 4.912538874899673e-06, "loss": 0.02676338851451874, "step": 17620 }, { "epoch": 0.16588235294117648, "grad_norm": 0.6835219150632887, "learning_rate": 4.9118419719050175e-06, "loss": 0.029627305269241334, "step": 17625 }, { "epoch": 0.16592941176470588, "grad_norm": 0.6111417625395849, "learning_rate": 4.9111453654185685e-06, "loss": 0.03004190921783447, "step": 17630 }, { "epoch": 0.16597647058823528, "grad_norm": 0.6554642184346154, "learning_rate": 4.910449055230126e-06, "loss": 0.028307679295539855, "step": 17635 }, { "epoch": 0.1660235294117647, "grad_norm": 0.7326017778302089, "learning_rate": 4.909753041129704e-06, "loss": 0.030801692605018617, "step": 17640 }, { "epoch": 0.1660705882352941, "grad_norm": 0.5533862411068445, "learning_rate": 4.909057322907518e-06, "loss": 0.026175099611282348, "step": 17645 }, { "epoch": 0.16611764705882354, "grad_norm": 0.789807582951383, "learning_rate": 4.908361900354001e-06, "loss": 0.02722461223602295, "step": 17650 }, { "epoch": 0.16616470588235294, "grad_norm": 0.7619579997088978, "learning_rate": 4.907666773259785e-06, "loss": 0.03329780697822571, "step": 17655 }, { "epoch": 0.16621176470588236, "grad_norm": 0.8038809678614846, "learning_rate": 4.906971941415714e-06, "loss": 0.029856395721435548, "step": 17660 }, { "epoch": 0.16625882352941176, "grad_norm": 0.55106739907861, "learning_rate": 4.906277404612839e-06, "loss": 0.02942531108856201, "step": 17665 }, { "epoch": 0.16630588235294116, "grad_norm": 0.7746674666565811, "learning_rate": 4.905583162642416e-06, "loss": 0.03184332549571991, "step": 17670 }, { "epoch": 0.1663529411764706, "grad_norm": 0.5600478268005928, "learning_rate": 4.904889215295911e-06, "loss": 0.029919201135635377, "step": 17675 }, { "epoch": 0.1664, "grad_norm": 0.5893677188103706, "learning_rate": 4.904195562364992e-06, "loss": 0.030141669511795043, "step": 17680 }, { "epoch": 0.16644705882352942, "grad_norm": 0.4788836072531777, "learning_rate": 4.903502203641536e-06, "loss": 0.024037468433380126, "step": 17685 }, { "epoch": 0.16649411764705882, "grad_norm": 0.6688892574814929, "learning_rate": 4.902809138917626e-06, "loss": 0.02399197220802307, "step": 17690 }, { "epoch": 0.16654117647058825, "grad_norm": 0.7762487006203408, "learning_rate": 4.9021163679855514e-06, "loss": 0.03135247230529785, "step": 17695 }, { "epoch": 0.16658823529411765, "grad_norm": 0.5978763329540118, "learning_rate": 4.9014238906378025e-06, "loss": 0.02876065969467163, "step": 17700 }, { "epoch": 0.16663529411764705, "grad_norm": 0.6979752608003821, "learning_rate": 4.900731706667082e-06, "loss": 0.032847648859024046, "step": 17705 }, { "epoch": 0.16668235294117648, "grad_norm": 1.7095141685626185, "learning_rate": 4.900039815866288e-06, "loss": 0.03651600182056427, "step": 17710 }, { "epoch": 0.16672941176470588, "grad_norm": 0.8329582229657578, "learning_rate": 4.899348218028536e-06, "loss": 0.030130422115325926, "step": 17715 }, { "epoch": 0.1667764705882353, "grad_norm": 0.7717476584487877, "learning_rate": 4.898656912947132e-06, "loss": 0.035420936346054074, "step": 17720 }, { "epoch": 0.1668235294117647, "grad_norm": 0.6110222913373656, "learning_rate": 4.897965900415594e-06, "loss": 0.030936557054519653, "step": 17725 }, { "epoch": 0.1668705882352941, "grad_norm": 0.6775175092312243, "learning_rate": 4.897275180227645e-06, "loss": 0.02736116647720337, "step": 17730 }, { "epoch": 0.16691764705882353, "grad_norm": 0.7341661392280564, "learning_rate": 4.896584752177207e-06, "loss": 0.03668130040168762, "step": 17735 }, { "epoch": 0.16696470588235293, "grad_norm": 0.9013087467957946, "learning_rate": 4.895894616058406e-06, "loss": 0.033233314752578735, "step": 17740 }, { "epoch": 0.16701176470588236, "grad_norm": 0.5163884363903051, "learning_rate": 4.895204771665576e-06, "loss": 0.02756824791431427, "step": 17745 }, { "epoch": 0.16705882352941176, "grad_norm": 0.5464555764664378, "learning_rate": 4.8945152187932475e-06, "loss": 0.028359276056289674, "step": 17750 }, { "epoch": 0.1671058823529412, "grad_norm": 0.582739910622761, "learning_rate": 4.893825957236156e-06, "loss": 0.031051552295684813, "step": 17755 }, { "epoch": 0.1671529411764706, "grad_norm": 0.5485778187223016, "learning_rate": 4.89313698678924e-06, "loss": 0.027702131867408754, "step": 17760 }, { "epoch": 0.1672, "grad_norm": 0.7288637473797447, "learning_rate": 4.892448307247641e-06, "loss": 0.03013724684715271, "step": 17765 }, { "epoch": 0.16724705882352942, "grad_norm": 0.46612826344355157, "learning_rate": 4.891759918406697e-06, "loss": 0.027565178275108338, "step": 17770 }, { "epoch": 0.16729411764705882, "grad_norm": 0.8582130607065385, "learning_rate": 4.891071820061956e-06, "loss": 0.03237345814704895, "step": 17775 }, { "epoch": 0.16734117647058824, "grad_norm": 0.6656427373027667, "learning_rate": 4.890384012009158e-06, "loss": 0.02901321053504944, "step": 17780 }, { "epoch": 0.16738823529411764, "grad_norm": 0.5504343622511183, "learning_rate": 4.889696494044252e-06, "loss": 0.03375886678695679, "step": 17785 }, { "epoch": 0.16743529411764707, "grad_norm": 0.4871235886490608, "learning_rate": 4.889009265963383e-06, "loss": 0.026838135719299317, "step": 17790 }, { "epoch": 0.16748235294117647, "grad_norm": 0.8152198443179071, "learning_rate": 4.888322327562896e-06, "loss": 0.030875542759895326, "step": 17795 }, { "epoch": 0.16752941176470587, "grad_norm": 0.5791345141009628, "learning_rate": 4.8876356786393435e-06, "loss": 0.032159310579299924, "step": 17800 }, { "epoch": 0.1675764705882353, "grad_norm": 0.6325340001229455, "learning_rate": 4.886949318989466e-06, "loss": 0.029303091764450073, "step": 17805 }, { "epoch": 0.1676235294117647, "grad_norm": 0.5059530472128408, "learning_rate": 4.8862632484102145e-06, "loss": 0.025816985964775087, "step": 17810 }, { "epoch": 0.16767058823529413, "grad_norm": 0.4968891027067397, "learning_rate": 4.885577466698733e-06, "loss": 0.021852375566959382, "step": 17815 }, { "epoch": 0.16771764705882353, "grad_norm": 0.49327163195258933, "learning_rate": 4.884891973652368e-06, "loss": 0.028123992681503295, "step": 17820 }, { "epoch": 0.16776470588235295, "grad_norm": 0.6305866507148806, "learning_rate": 4.884206769068665e-06, "loss": 0.024328020215034486, "step": 17825 }, { "epoch": 0.16781176470588235, "grad_norm": 0.5063689189530121, "learning_rate": 4.883521852745364e-06, "loss": 0.02386813163757324, "step": 17830 }, { "epoch": 0.16785882352941175, "grad_norm": 0.5965252727337264, "learning_rate": 4.882837224480411e-06, "loss": 0.024998733401298524, "step": 17835 }, { "epoch": 0.16790588235294118, "grad_norm": 1.0439236843021735, "learning_rate": 4.882152884071943e-06, "loss": 0.03211246430873871, "step": 17840 }, { "epoch": 0.16795294117647058, "grad_norm": 2.2873906312306374, "learning_rate": 4.881468831318299e-06, "loss": 0.0305949866771698, "step": 17845 }, { "epoch": 0.168, "grad_norm": 0.7027131440564263, "learning_rate": 4.880785066018014e-06, "loss": 0.027828428149223327, "step": 17850 }, { "epoch": 0.1680470588235294, "grad_norm": 1.3260688195658161, "learning_rate": 4.880101587969821e-06, "loss": 0.03580198884010315, "step": 17855 }, { "epoch": 0.1680941176470588, "grad_norm": 0.6100308647088363, "learning_rate": 4.879418396972652e-06, "loss": 0.036575281620025636, "step": 17860 }, { "epoch": 0.16814117647058824, "grad_norm": 0.5643816656320712, "learning_rate": 4.878735492825632e-06, "loss": 0.024699072539806365, "step": 17865 }, { "epoch": 0.16818823529411764, "grad_norm": 0.7298083979049597, "learning_rate": 4.878052875328087e-06, "loss": 0.030653393268585204, "step": 17870 }, { "epoch": 0.16823529411764707, "grad_norm": 0.6812338739151333, "learning_rate": 4.877370544279537e-06, "loss": 0.028691762685775758, "step": 17875 }, { "epoch": 0.16828235294117647, "grad_norm": 0.7366593307217356, "learning_rate": 4.876688499479699e-06, "loss": 0.032763606309890746, "step": 17880 }, { "epoch": 0.1683294117647059, "grad_norm": 0.7976521859219617, "learning_rate": 4.876006740728484e-06, "loss": 0.025261521339416504, "step": 17885 }, { "epoch": 0.1683764705882353, "grad_norm": 0.4757070342982204, "learning_rate": 4.875325267826004e-06, "loss": 0.024884612858295442, "step": 17890 }, { "epoch": 0.1684235294117647, "grad_norm": 0.540747077839818, "learning_rate": 4.87464408057256e-06, "loss": 0.031271180510520934, "step": 17895 }, { "epoch": 0.16847058823529412, "grad_norm": 0.6326340488710483, "learning_rate": 4.873963178768653e-06, "loss": 0.02930106520652771, "step": 17900 }, { "epoch": 0.16851764705882352, "grad_norm": 0.5466410152503303, "learning_rate": 4.873282562214977e-06, "loss": 0.03162875473499298, "step": 17905 }, { "epoch": 0.16856470588235295, "grad_norm": 0.7796495730008159, "learning_rate": 4.8726022307124195e-06, "loss": 0.029622435569763184, "step": 17910 }, { "epoch": 0.16861176470588235, "grad_norm": 0.954693968642471, "learning_rate": 4.871922184062067e-06, "loss": 0.030246782302856445, "step": 17915 }, { "epoch": 0.16865882352941178, "grad_norm": 0.6816411718748999, "learning_rate": 4.871242422065197e-06, "loss": 0.03255150616168976, "step": 17920 }, { "epoch": 0.16870588235294118, "grad_norm": 0.55112060452482, "learning_rate": 4.870562944523279e-06, "loss": 0.026586848497390746, "step": 17925 }, { "epoch": 0.16875294117647058, "grad_norm": 0.8059836309203636, "learning_rate": 4.8698837512379805e-06, "loss": 0.026449960470199586, "step": 17930 }, { "epoch": 0.1688, "grad_norm": 0.700365175847512, "learning_rate": 4.86920484201116e-06, "loss": 0.030797472596168517, "step": 17935 }, { "epoch": 0.1688470588235294, "grad_norm": 1.3510116591585408, "learning_rate": 4.868526216644872e-06, "loss": 0.036037024855613706, "step": 17940 }, { "epoch": 0.16889411764705883, "grad_norm": 0.7512903334320081, "learning_rate": 4.86784787494136e-06, "loss": 0.030407989025115968, "step": 17945 }, { "epoch": 0.16894117647058823, "grad_norm": 0.6585046543686339, "learning_rate": 4.867169816703063e-06, "loss": 0.029025980830192567, "step": 17950 }, { "epoch": 0.16898823529411763, "grad_norm": 0.7846937589738163, "learning_rate": 4.8664920417326125e-06, "loss": 0.037295037508010866, "step": 17955 }, { "epoch": 0.16903529411764706, "grad_norm": 0.596219006640828, "learning_rate": 4.865814549832833e-06, "loss": 0.03346549868583679, "step": 17960 }, { "epoch": 0.16908235294117646, "grad_norm": 0.6866598921514039, "learning_rate": 4.865137340806737e-06, "loss": 0.02777700126171112, "step": 17965 }, { "epoch": 0.1691294117647059, "grad_norm": 0.6897524990241646, "learning_rate": 4.8644604144575345e-06, "loss": 0.028073802590370178, "step": 17970 }, { "epoch": 0.1691764705882353, "grad_norm": 0.560664062667513, "learning_rate": 4.863783770588624e-06, "loss": 0.028172528743743895, "step": 17975 }, { "epoch": 0.16922352941176472, "grad_norm": 0.39145031677168945, "learning_rate": 4.863107409003595e-06, "loss": 0.023207753896713257, "step": 17980 }, { "epoch": 0.16927058823529412, "grad_norm": 0.5191146091515642, "learning_rate": 4.862431329506228e-06, "loss": 0.029923009872436523, "step": 17985 }, { "epoch": 0.16931764705882352, "grad_norm": 0.6490951714229044, "learning_rate": 4.861755531900498e-06, "loss": 0.026772844791412353, "step": 17990 }, { "epoch": 0.16936470588235294, "grad_norm": 0.5445399881765974, "learning_rate": 4.861080015990567e-06, "loss": 0.032196080684661864, "step": 17995 }, { "epoch": 0.16941176470588235, "grad_norm": 0.6888183707507861, "learning_rate": 4.860404781580787e-06, "loss": 0.033218914270401, "step": 18000 }, { "epoch": 0.16945882352941177, "grad_norm": 0.5745638741158628, "learning_rate": 4.859729828475704e-06, "loss": 0.0237667977809906, "step": 18005 }, { "epoch": 0.16950588235294117, "grad_norm": 0.6944677517838453, "learning_rate": 4.859055156480051e-06, "loss": 0.03133420348167419, "step": 18010 }, { "epoch": 0.1695529411764706, "grad_norm": 0.4639919568024003, "learning_rate": 4.85838076539875e-06, "loss": 0.03519392013549805, "step": 18015 }, { "epoch": 0.1696, "grad_norm": 0.4462695896678374, "learning_rate": 4.857706655036914e-06, "loss": 0.03008861243724823, "step": 18020 }, { "epoch": 0.1696470588235294, "grad_norm": 0.6181697290404934, "learning_rate": 4.857032825199846e-06, "loss": 0.03160216212272644, "step": 18025 }, { "epoch": 0.16969411764705883, "grad_norm": 0.7937193144429033, "learning_rate": 4.856359275693038e-06, "loss": 0.030181092023849488, "step": 18030 }, { "epoch": 0.16974117647058823, "grad_norm": 0.5343339282135201, "learning_rate": 4.855686006322166e-06, "loss": 0.02142745703458786, "step": 18035 }, { "epoch": 0.16978823529411766, "grad_norm": 0.6528193177815127, "learning_rate": 4.855013016893101e-06, "loss": 0.027272903919219972, "step": 18040 }, { "epoch": 0.16983529411764706, "grad_norm": 0.5496801317166773, "learning_rate": 4.8543403072119e-06, "loss": 0.027116408944129942, "step": 18045 }, { "epoch": 0.16988235294117648, "grad_norm": 0.7638522688731576, "learning_rate": 4.853667877084807e-06, "loss": 0.024697257578372954, "step": 18050 }, { "epoch": 0.16992941176470588, "grad_norm": 1.2398026635536226, "learning_rate": 4.852995726318253e-06, "loss": 0.030012696981430054, "step": 18055 }, { "epoch": 0.16997647058823528, "grad_norm": 0.7303571253700046, "learning_rate": 4.852323854718861e-06, "loss": 0.03059617280960083, "step": 18060 }, { "epoch": 0.1700235294117647, "grad_norm": 0.55021879469248, "learning_rate": 4.8516522620934365e-06, "loss": 0.02954998016357422, "step": 18065 }, { "epoch": 0.1700705882352941, "grad_norm": 0.667945949959203, "learning_rate": 4.850980948248973e-06, "loss": 0.02968626022338867, "step": 18070 }, { "epoch": 0.17011764705882354, "grad_norm": 0.7086420838933293, "learning_rate": 4.850309912992654e-06, "loss": 0.027285993099212646, "step": 18075 }, { "epoch": 0.17016470588235294, "grad_norm": 0.622703256780851, "learning_rate": 4.849639156131848e-06, "loss": 0.029823583364486695, "step": 18080 }, { "epoch": 0.17021176470588234, "grad_norm": 0.4493311574529013, "learning_rate": 4.848968677474106e-06, "loss": 0.030192065238952636, "step": 18085 }, { "epoch": 0.17025882352941177, "grad_norm": 0.5415529444041581, "learning_rate": 4.8482984768271715e-06, "loss": 0.02313884347677231, "step": 18090 }, { "epoch": 0.17030588235294117, "grad_norm": 0.4180305061881715, "learning_rate": 4.847628553998971e-06, "loss": 0.026008743047714233, "step": 18095 }, { "epoch": 0.1703529411764706, "grad_norm": 0.5150397310462673, "learning_rate": 4.846958908797616e-06, "loss": 0.0217447966337204, "step": 18100 }, { "epoch": 0.1704, "grad_norm": 0.5707736548539617, "learning_rate": 4.846289541031406e-06, "loss": 0.024643781781196594, "step": 18105 }, { "epoch": 0.17044705882352942, "grad_norm": 0.7621441671498513, "learning_rate": 4.845620450508821e-06, "loss": 0.025768017768859862, "step": 18110 }, { "epoch": 0.17049411764705882, "grad_norm": 0.678077409643778, "learning_rate": 4.844951637038532e-06, "loss": 0.03052402138710022, "step": 18115 }, { "epoch": 0.17054117647058822, "grad_norm": 0.8662992356441238, "learning_rate": 4.844283100429392e-06, "loss": 0.03161599636077881, "step": 18120 }, { "epoch": 0.17058823529411765, "grad_norm": 0.6718724858793298, "learning_rate": 4.843614840490437e-06, "loss": 0.023296457529067994, "step": 18125 }, { "epoch": 0.17063529411764705, "grad_norm": 0.6125305274173177, "learning_rate": 4.84294685703089e-06, "loss": 0.031631487607955935, "step": 18130 }, { "epoch": 0.17068235294117648, "grad_norm": 0.6112547126451298, "learning_rate": 4.842279149860157e-06, "loss": 0.03153960704803467, "step": 18135 }, { "epoch": 0.17072941176470588, "grad_norm": 0.6368920632834464, "learning_rate": 4.841611718787827e-06, "loss": 0.02820965051651001, "step": 18140 }, { "epoch": 0.1707764705882353, "grad_norm": 0.6480239092758876, "learning_rate": 4.840944563623673e-06, "loss": 0.02646123468875885, "step": 18145 }, { "epoch": 0.1708235294117647, "grad_norm": 0.7643388817973558, "learning_rate": 4.840277684177656e-06, "loss": 0.02036459296941757, "step": 18150 }, { "epoch": 0.1708705882352941, "grad_norm": 0.5335022223258524, "learning_rate": 4.839611080259912e-06, "loss": 0.02833578586578369, "step": 18155 }, { "epoch": 0.17091764705882354, "grad_norm": 0.8084131197629207, "learning_rate": 4.838944751680766e-06, "loss": 0.029394632577896117, "step": 18160 }, { "epoch": 0.17096470588235294, "grad_norm": 0.5486625376615994, "learning_rate": 4.838278698250723e-06, "loss": 0.028243714570999147, "step": 18165 }, { "epoch": 0.17101176470588236, "grad_norm": 0.6296174458424064, "learning_rate": 4.837612919780473e-06, "loss": 0.035017895698547366, "step": 18170 }, { "epoch": 0.17105882352941176, "grad_norm": 0.539766918764927, "learning_rate": 4.836947416080885e-06, "loss": 0.02178599238395691, "step": 18175 }, { "epoch": 0.17110588235294116, "grad_norm": 0.8508934057071477, "learning_rate": 4.836282186963014e-06, "loss": 0.030707544088363646, "step": 18180 }, { "epoch": 0.1711529411764706, "grad_norm": 1.0055441973153352, "learning_rate": 4.835617232238094e-06, "loss": 0.027108851075172424, "step": 18185 }, { "epoch": 0.1712, "grad_norm": 0.5388258837696492, "learning_rate": 4.83495255171754e-06, "loss": 0.023639555275440215, "step": 18190 }, { "epoch": 0.17124705882352942, "grad_norm": 0.6765364957312597, "learning_rate": 4.83428814521295e-06, "loss": 0.03822364211082459, "step": 18195 }, { "epoch": 0.17129411764705882, "grad_norm": 0.6331506144096298, "learning_rate": 4.833624012536105e-06, "loss": 0.02554706931114197, "step": 18200 }, { "epoch": 0.17134117647058825, "grad_norm": 0.4804042093996092, "learning_rate": 4.832960153498963e-06, "loss": 0.02619016170501709, "step": 18205 }, { "epoch": 0.17138823529411765, "grad_norm": 1.168897207555667, "learning_rate": 4.832296567913667e-06, "loss": 0.036110317707061766, "step": 18210 }, { "epoch": 0.17143529411764705, "grad_norm": 0.6022490567762775, "learning_rate": 4.831633255592535e-06, "loss": 0.02519981861114502, "step": 18215 }, { "epoch": 0.17148235294117647, "grad_norm": 0.7221984721858528, "learning_rate": 4.830970216348069e-06, "loss": 0.02933276295661926, "step": 18220 }, { "epoch": 0.17152941176470587, "grad_norm": 0.8898726172884012, "learning_rate": 4.830307449992953e-06, "loss": 0.031526750326156615, "step": 18225 }, { "epoch": 0.1715764705882353, "grad_norm": 1.0122592740956529, "learning_rate": 4.829644956340046e-06, "loss": 0.030021196603775023, "step": 18230 }, { "epoch": 0.1716235294117647, "grad_norm": 0.6850189746687191, "learning_rate": 4.82898273520239e-06, "loss": 0.024149367213249208, "step": 18235 }, { "epoch": 0.17167058823529413, "grad_norm": 1.0879455332195447, "learning_rate": 4.828320786393204e-06, "loss": 0.02987854480743408, "step": 18240 }, { "epoch": 0.17171764705882353, "grad_norm": 0.6562081576586438, "learning_rate": 4.82765910972589e-06, "loss": 0.030442404747009277, "step": 18245 }, { "epoch": 0.17176470588235293, "grad_norm": 0.673614044491103, "learning_rate": 4.826997705014023e-06, "loss": 0.028042489290237428, "step": 18250 }, { "epoch": 0.17181176470588236, "grad_norm": 0.7141991386465479, "learning_rate": 4.826336572071364e-06, "loss": 0.03166952133178711, "step": 18255 }, { "epoch": 0.17185882352941176, "grad_norm": 0.9466644534127653, "learning_rate": 4.825675710711845e-06, "loss": 0.03390148878097534, "step": 18260 }, { "epoch": 0.1719058823529412, "grad_norm": 0.7121981641896289, "learning_rate": 4.825015120749583e-06, "loss": 0.025668853521347047, "step": 18265 }, { "epoch": 0.1719529411764706, "grad_norm": 0.5292611049166792, "learning_rate": 4.824354801998869e-06, "loss": 0.02750387489795685, "step": 18270 }, { "epoch": 0.172, "grad_norm": 0.8409081188241745, "learning_rate": 4.823694754274173e-06, "loss": 0.03554849922657013, "step": 18275 }, { "epoch": 0.17204705882352941, "grad_norm": 0.4123688532509156, "learning_rate": 4.823034977390141e-06, "loss": 0.028133714199066163, "step": 18280 }, { "epoch": 0.17209411764705881, "grad_norm": 0.511211166741872, "learning_rate": 4.8223754711616014e-06, "loss": 0.028919529914855958, "step": 18285 }, { "epoch": 0.17214117647058824, "grad_norm": 0.7636429500183481, "learning_rate": 4.821716235403555e-06, "loss": 0.03528345227241516, "step": 18290 }, { "epoch": 0.17218823529411764, "grad_norm": 0.5738908830943589, "learning_rate": 4.821057269931178e-06, "loss": 0.026233699917793275, "step": 18295 }, { "epoch": 0.17223529411764707, "grad_norm": 0.6270275862031853, "learning_rate": 4.820398574559831e-06, "loss": 0.025254902243614197, "step": 18300 }, { "epoch": 0.17228235294117647, "grad_norm": 0.7390213828108174, "learning_rate": 4.8197401491050436e-06, "loss": 0.02730374038219452, "step": 18305 }, { "epoch": 0.17232941176470587, "grad_norm": 0.6603431644602265, "learning_rate": 4.819081993382525e-06, "loss": 0.029008808732032775, "step": 18310 }, { "epoch": 0.1723764705882353, "grad_norm": 0.515258518195455, "learning_rate": 4.8184241072081615e-06, "loss": 0.030306905508041382, "step": 18315 }, { "epoch": 0.1724235294117647, "grad_norm": 0.7660315330448952, "learning_rate": 4.8177664903980115e-06, "loss": 0.0355503648519516, "step": 18320 }, { "epoch": 0.17247058823529413, "grad_norm": 0.8611708816263194, "learning_rate": 4.8171091427683134e-06, "loss": 0.035641273856163024, "step": 18325 }, { "epoch": 0.17251764705882353, "grad_norm": 0.6870415657642744, "learning_rate": 4.816452064135477e-06, "loss": 0.027706369757652283, "step": 18330 }, { "epoch": 0.17256470588235295, "grad_norm": 0.50013755769495, "learning_rate": 4.815795254316094e-06, "loss": 0.033021706342697146, "step": 18335 }, { "epoch": 0.17261176470588235, "grad_norm": 0.5270777486192726, "learning_rate": 4.815138713126922e-06, "loss": 0.03164072036743164, "step": 18340 }, { "epoch": 0.17265882352941175, "grad_norm": 0.5475532600971739, "learning_rate": 4.814482440384899e-06, "loss": 0.02153840661048889, "step": 18345 }, { "epoch": 0.17270588235294118, "grad_norm": 0.5091663973838847, "learning_rate": 4.813826435907139e-06, "loss": 0.03421058058738709, "step": 18350 }, { "epoch": 0.17275294117647058, "grad_norm": 0.6049807336895917, "learning_rate": 4.813170699510926e-06, "loss": 0.02760584354400635, "step": 18355 }, { "epoch": 0.1728, "grad_norm": 0.7008203810751573, "learning_rate": 4.81251523101372e-06, "loss": 0.026537084579467775, "step": 18360 }, { "epoch": 0.1728470588235294, "grad_norm": 1.5547513571992553, "learning_rate": 4.811860030233157e-06, "loss": 0.030470210313796996, "step": 18365 }, { "epoch": 0.17289411764705884, "grad_norm": 0.6452778206494723, "learning_rate": 4.811205096987043e-06, "loss": 0.02536934018135071, "step": 18370 }, { "epoch": 0.17294117647058824, "grad_norm": 0.7361395452216014, "learning_rate": 4.810550431093361e-06, "loss": 0.03144161701202393, "step": 18375 }, { "epoch": 0.17298823529411764, "grad_norm": 0.6362174104291051, "learning_rate": 4.8098960323702655e-06, "loss": 0.032531505823135375, "step": 18380 }, { "epoch": 0.17303529411764706, "grad_norm": 0.5388796901969498, "learning_rate": 4.809241900636084e-06, "loss": 0.03066382110118866, "step": 18385 }, { "epoch": 0.17308235294117646, "grad_norm": 0.6458947943530365, "learning_rate": 4.808588035709317e-06, "loss": 0.024902671575546265, "step": 18390 }, { "epoch": 0.1731294117647059, "grad_norm": 0.9227156782123115, "learning_rate": 4.807934437408639e-06, "loss": 0.032441872358322146, "step": 18395 }, { "epoch": 0.1731764705882353, "grad_norm": 0.4317192814740786, "learning_rate": 4.807281105552895e-06, "loss": 0.026157474517822264, "step": 18400 }, { "epoch": 0.1732235294117647, "grad_norm": 0.6571630804871356, "learning_rate": 4.806628039961103e-06, "loss": 0.030875933170318604, "step": 18405 }, { "epoch": 0.17327058823529412, "grad_norm": 0.7579652177778948, "learning_rate": 4.805975240452455e-06, "loss": 0.027716785669326782, "step": 18410 }, { "epoch": 0.17331764705882352, "grad_norm": 0.5810923501281325, "learning_rate": 4.805322706846313e-06, "loss": 0.029075348377227785, "step": 18415 }, { "epoch": 0.17336470588235295, "grad_norm": 0.6971595036870162, "learning_rate": 4.804670438962209e-06, "loss": 0.02952776551246643, "step": 18420 }, { "epoch": 0.17341176470588235, "grad_norm": 0.7508014319482913, "learning_rate": 4.804018436619849e-06, "loss": 0.026637792587280273, "step": 18425 }, { "epoch": 0.17345882352941178, "grad_norm": 0.5561448925304397, "learning_rate": 4.80336669963911e-06, "loss": 0.03005964159965515, "step": 18430 }, { "epoch": 0.17350588235294118, "grad_norm": 0.49225410718343776, "learning_rate": 4.80271522784004e-06, "loss": 0.02481197416782379, "step": 18435 }, { "epoch": 0.17355294117647058, "grad_norm": 0.8380092814138659, "learning_rate": 4.802064021042854e-06, "loss": 0.030576097965240478, "step": 18440 }, { "epoch": 0.1736, "grad_norm": 0.6271271250737214, "learning_rate": 4.801413079067945e-06, "loss": 0.02486768662929535, "step": 18445 }, { "epoch": 0.1736470588235294, "grad_norm": 0.818497339702259, "learning_rate": 4.80076240173587e-06, "loss": 0.02984780669212341, "step": 18450 }, { "epoch": 0.17369411764705883, "grad_norm": 0.6163672742987164, "learning_rate": 4.800111988867358e-06, "loss": 0.02067924439907074, "step": 18455 }, { "epoch": 0.17374117647058823, "grad_norm": 0.43935057895157364, "learning_rate": 4.799461840283311e-06, "loss": 0.028436833620071413, "step": 18460 }, { "epoch": 0.17378823529411766, "grad_norm": 0.6165113163888005, "learning_rate": 4.798811955804796e-06, "loss": 0.02881472110748291, "step": 18465 }, { "epoch": 0.17383529411764706, "grad_norm": 0.7122748174012455, "learning_rate": 4.7981623352530514e-06, "loss": 0.027766263484954833, "step": 18470 }, { "epoch": 0.17388235294117646, "grad_norm": 0.5208694867039252, "learning_rate": 4.797512978449486e-06, "loss": 0.02289808988571167, "step": 18475 }, { "epoch": 0.1739294117647059, "grad_norm": 0.5112189900884834, "learning_rate": 4.7968638852156766e-06, "loss": 0.02842220962047577, "step": 18480 }, { "epoch": 0.1739764705882353, "grad_norm": 0.6263698127649867, "learning_rate": 4.79621505537337e-06, "loss": 0.03177317976951599, "step": 18485 }, { "epoch": 0.17402352941176472, "grad_norm": 0.6643714422995854, "learning_rate": 4.79556648874448e-06, "loss": 0.02820305824279785, "step": 18490 }, { "epoch": 0.17407058823529412, "grad_norm": 0.569141585353669, "learning_rate": 4.794918185151092e-06, "loss": 0.0236525759100914, "step": 18495 }, { "epoch": 0.17411764705882352, "grad_norm": 0.6896509522034404, "learning_rate": 4.794270144415455e-06, "loss": 0.02684495151042938, "step": 18500 }, { "epoch": 0.17416470588235294, "grad_norm": 0.6870894886894107, "learning_rate": 4.793622366359991e-06, "loss": 0.028147315979003905, "step": 18505 }, { "epoch": 0.17421176470588234, "grad_norm": 0.5435160764160527, "learning_rate": 4.792974850807286e-06, "loss": 0.02756371796131134, "step": 18510 }, { "epoch": 0.17425882352941177, "grad_norm": 1.0048900634499542, "learning_rate": 4.792327597580096e-06, "loss": 0.0414174884557724, "step": 18515 }, { "epoch": 0.17430588235294117, "grad_norm": 0.7267400343224798, "learning_rate": 4.791680606501346e-06, "loss": 0.02805480659008026, "step": 18520 }, { "epoch": 0.1743529411764706, "grad_norm": 0.7117904449980323, "learning_rate": 4.791033877394122e-06, "loss": 0.029221367835998536, "step": 18525 }, { "epoch": 0.1744, "grad_norm": 0.8118106698209472, "learning_rate": 4.790387410081686e-06, "loss": 0.024501140415668487, "step": 18530 }, { "epoch": 0.1744470588235294, "grad_norm": 0.7618172815082926, "learning_rate": 4.789741204387458e-06, "loss": 0.030515408515930174, "step": 18535 }, { "epoch": 0.17449411764705883, "grad_norm": 0.6000751396174194, "learning_rate": 4.789095260135031e-06, "loss": 0.030011284351348876, "step": 18540 }, { "epoch": 0.17454117647058823, "grad_norm": 0.5553776698295537, "learning_rate": 4.788449577148163e-06, "loss": 0.028880402445793152, "step": 18545 }, { "epoch": 0.17458823529411766, "grad_norm": 0.7064635065720798, "learning_rate": 4.787804155250778e-06, "loss": 0.02861015796661377, "step": 18550 }, { "epoch": 0.17463529411764706, "grad_norm": 0.6723300576456804, "learning_rate": 4.7871589942669625e-06, "loss": 0.027365824580192565, "step": 18555 }, { "epoch": 0.17468235294117648, "grad_norm": 0.5289632280415443, "learning_rate": 4.786514094020975e-06, "loss": 0.027403348684310914, "step": 18560 }, { "epoch": 0.17472941176470588, "grad_norm": 0.6486907543448166, "learning_rate": 4.785869454337237e-06, "loss": 0.02821420431137085, "step": 18565 }, { "epoch": 0.17477647058823528, "grad_norm": 0.5363176317801435, "learning_rate": 4.785225075040334e-06, "loss": 0.037904354929924014, "step": 18570 }, { "epoch": 0.1748235294117647, "grad_norm": 3.724794094700295, "learning_rate": 4.784580955955018e-06, "loss": 0.031574711203575134, "step": 18575 }, { "epoch": 0.1748705882352941, "grad_norm": 0.6951005027948528, "learning_rate": 4.7839370969062075e-06, "loss": 0.03359992802143097, "step": 18580 }, { "epoch": 0.17491764705882354, "grad_norm": 0.9190320494952741, "learning_rate": 4.783293497718984e-06, "loss": 0.02883494198322296, "step": 18585 }, { "epoch": 0.17496470588235294, "grad_norm": 0.6372193454352796, "learning_rate": 4.782650158218593e-06, "loss": 0.02678671181201935, "step": 18590 }, { "epoch": 0.17501176470588237, "grad_norm": 0.8033299544416435, "learning_rate": 4.782007078230449e-06, "loss": 0.02767922878265381, "step": 18595 }, { "epoch": 0.17505882352941177, "grad_norm": 0.7115497945905599, "learning_rate": 4.781364257580123e-06, "loss": 0.030134883522987366, "step": 18600 }, { "epoch": 0.17510588235294117, "grad_norm": 0.609300309571918, "learning_rate": 4.780721696093356e-06, "loss": 0.028572869300842286, "step": 18605 }, { "epoch": 0.1751529411764706, "grad_norm": 0.7355370884077259, "learning_rate": 4.780079393596055e-06, "loss": 0.029534608125686646, "step": 18610 }, { "epoch": 0.1752, "grad_norm": 0.5368339448289736, "learning_rate": 4.779437349914284e-06, "loss": 0.02902466058731079, "step": 18615 }, { "epoch": 0.17524705882352942, "grad_norm": 0.6082394810541556, "learning_rate": 4.778795564874273e-06, "loss": 0.02629016637802124, "step": 18620 }, { "epoch": 0.17529411764705882, "grad_norm": 0.8374467805854633, "learning_rate": 4.7781540383024185e-06, "loss": 0.02802714705467224, "step": 18625 }, { "epoch": 0.17534117647058822, "grad_norm": 0.5292141797101322, "learning_rate": 4.7775127700252746e-06, "loss": 0.025805586576461793, "step": 18630 }, { "epoch": 0.17538823529411765, "grad_norm": 0.6014007973039623, "learning_rate": 4.7768717598695646e-06, "loss": 0.023410800099372863, "step": 18635 }, { "epoch": 0.17543529411764705, "grad_norm": 1.2874852104353685, "learning_rate": 4.7762310076621685e-06, "loss": 0.0320695161819458, "step": 18640 }, { "epoch": 0.17548235294117648, "grad_norm": 0.5396715382303152, "learning_rate": 4.775590513230133e-06, "loss": 0.02653513550758362, "step": 18645 }, { "epoch": 0.17552941176470588, "grad_norm": 0.7108267047046125, "learning_rate": 4.774950276400663e-06, "loss": 0.024536275863647462, "step": 18650 }, { "epoch": 0.1755764705882353, "grad_norm": 0.6464697331247802, "learning_rate": 4.774310297001132e-06, "loss": 0.03348877429962158, "step": 18655 }, { "epoch": 0.1756235294117647, "grad_norm": 0.598366728968324, "learning_rate": 4.773670574859068e-06, "loss": 0.029433754086494446, "step": 18660 }, { "epoch": 0.1756705882352941, "grad_norm": 0.483126109345416, "learning_rate": 4.773031109802167e-06, "loss": 0.02522035241127014, "step": 18665 }, { "epoch": 0.17571764705882353, "grad_norm": 0.5436510509786365, "learning_rate": 4.772391901658282e-06, "loss": 0.03149473071098328, "step": 18670 }, { "epoch": 0.17576470588235293, "grad_norm": 0.6371252406006537, "learning_rate": 4.77175295025543e-06, "loss": 0.02923213839530945, "step": 18675 }, { "epoch": 0.17581176470588236, "grad_norm": 0.8483797789678558, "learning_rate": 4.771114255421787e-06, "loss": 0.02739756405353546, "step": 18680 }, { "epoch": 0.17585882352941176, "grad_norm": 0.6146478166563634, "learning_rate": 4.770475816985692e-06, "loss": 0.030422860383987428, "step": 18685 }, { "epoch": 0.1759058823529412, "grad_norm": 0.6391049074231127, "learning_rate": 4.769837634775644e-06, "loss": 0.02669089734554291, "step": 18690 }, { "epoch": 0.1759529411764706, "grad_norm": 0.5621469975400546, "learning_rate": 4.769199708620302e-06, "loss": 0.026719093322753906, "step": 18695 }, { "epoch": 0.176, "grad_norm": 0.6520012763796034, "learning_rate": 4.768562038348486e-06, "loss": 0.026250991225242614, "step": 18700 }, { "epoch": 0.17604705882352942, "grad_norm": 0.4612952762954328, "learning_rate": 4.767924623789176e-06, "loss": 0.025311893224716185, "step": 18705 }, { "epoch": 0.17609411764705882, "grad_norm": 0.6482727992283092, "learning_rate": 4.7672874647715135e-06, "loss": 0.02666986882686615, "step": 18710 }, { "epoch": 0.17614117647058825, "grad_norm": 1.2152976578467483, "learning_rate": 4.766650561124795e-06, "loss": 0.023393335938453674, "step": 18715 }, { "epoch": 0.17618823529411765, "grad_norm": 0.5487779897542115, "learning_rate": 4.766013912678483e-06, "loss": 0.026143604516983034, "step": 18720 }, { "epoch": 0.17623529411764705, "grad_norm": 0.6040644524696662, "learning_rate": 4.765377519262195e-06, "loss": 0.024774454534053802, "step": 18725 }, { "epoch": 0.17628235294117647, "grad_norm": 0.6751754196484822, "learning_rate": 4.76474138070571e-06, "loss": 0.030469322204589845, "step": 18730 }, { "epoch": 0.17632941176470587, "grad_norm": 0.6951663181533145, "learning_rate": 4.764105496838962e-06, "loss": 0.030366784334182738, "step": 18735 }, { "epoch": 0.1763764705882353, "grad_norm": 0.560086183673622, "learning_rate": 4.76346986749205e-06, "loss": 0.026389318704605102, "step": 18740 }, { "epoch": 0.1764235294117647, "grad_norm": 0.803478391215047, "learning_rate": 4.762834492495227e-06, "loss": 0.03034104108810425, "step": 18745 }, { "epoch": 0.17647058823529413, "grad_norm": 0.6822525352962866, "learning_rate": 4.7621993716789065e-06, "loss": 0.050726181268692015, "step": 18750 }, { "epoch": 0.17651764705882353, "grad_norm": 0.7438574117050887, "learning_rate": 4.761564504873659e-06, "loss": 0.028660625219345093, "step": 18755 }, { "epoch": 0.17656470588235293, "grad_norm": 1.1864981514535418, "learning_rate": 4.7609298919102145e-06, "loss": 0.03075100779533386, "step": 18760 }, { "epoch": 0.17661176470588236, "grad_norm": 0.5378169617524676, "learning_rate": 4.760295532619461e-06, "loss": 0.02812625765800476, "step": 18765 }, { "epoch": 0.17665882352941176, "grad_norm": 0.6257605553622366, "learning_rate": 4.759661426832439e-06, "loss": 0.02862437963485718, "step": 18770 }, { "epoch": 0.17670588235294118, "grad_norm": 0.702490477786939, "learning_rate": 4.759027574380357e-06, "loss": 0.029412299394607544, "step": 18775 }, { "epoch": 0.17675294117647058, "grad_norm": 0.7020689736027932, "learning_rate": 4.75839397509457e-06, "loss": 0.024349454045295715, "step": 18780 }, { "epoch": 0.1768, "grad_norm": 0.7345368596961469, "learning_rate": 4.757760628806598e-06, "loss": 0.028094255924224855, "step": 18785 }, { "epoch": 0.1768470588235294, "grad_norm": 0.664894552548446, "learning_rate": 4.757127535348111e-06, "loss": 0.02558029294013977, "step": 18790 }, { "epoch": 0.1768941176470588, "grad_norm": 0.6354401826975732, "learning_rate": 4.756494694550943e-06, "loss": 0.03154321908950806, "step": 18795 }, { "epoch": 0.17694117647058824, "grad_norm": 0.7305823181562763, "learning_rate": 4.755862106247079e-06, "loss": 0.027921268343925477, "step": 18800 }, { "epoch": 0.17698823529411764, "grad_norm": 0.52125738974846, "learning_rate": 4.755229770268663e-06, "loss": 0.025349920988082884, "step": 18805 }, { "epoch": 0.17703529411764707, "grad_norm": 0.5405760756561323, "learning_rate": 4.754597686447993e-06, "loss": 0.029660838842391967, "step": 18810 }, { "epoch": 0.17708235294117647, "grad_norm": 0.6873489587191136, "learning_rate": 4.753965854617527e-06, "loss": 0.025434738397598265, "step": 18815 }, { "epoch": 0.17712941176470587, "grad_norm": 0.7525085569927897, "learning_rate": 4.753334274609875e-06, "loss": 0.02953101396560669, "step": 18820 }, { "epoch": 0.1771764705882353, "grad_norm": 0.6949648972163559, "learning_rate": 4.752702946257805e-06, "loss": 0.02626454532146454, "step": 18825 }, { "epoch": 0.1772235294117647, "grad_norm": 0.746413476497638, "learning_rate": 4.752071869394238e-06, "loss": 0.025403901934623718, "step": 18830 }, { "epoch": 0.17727058823529412, "grad_norm": 0.5463044214770387, "learning_rate": 4.751441043852253e-06, "loss": 0.027075794339179993, "step": 18835 }, { "epoch": 0.17731764705882352, "grad_norm": 0.7365150246333145, "learning_rate": 4.750810469465081e-06, "loss": 0.03171863555908203, "step": 18840 }, { "epoch": 0.17736470588235295, "grad_norm": 0.7378204458232989, "learning_rate": 4.750180146066112e-06, "loss": 0.03497408032417297, "step": 18845 }, { "epoch": 0.17741176470588235, "grad_norm": 0.49550630154859243, "learning_rate": 4.749550073488886e-06, "loss": 0.029375213384628295, "step": 18850 }, { "epoch": 0.17745882352941175, "grad_norm": 0.9419026612295964, "learning_rate": 4.7489202515671015e-06, "loss": 0.02746914029121399, "step": 18855 }, { "epoch": 0.17750588235294118, "grad_norm": 0.5047326559238936, "learning_rate": 4.748290680134609e-06, "loss": 0.024956777691841125, "step": 18860 }, { "epoch": 0.17755294117647058, "grad_norm": 0.6881796494688394, "learning_rate": 4.747661359025414e-06, "loss": 0.027338898181915282, "step": 18865 }, { "epoch": 0.1776, "grad_norm": 0.6142208959663021, "learning_rate": 4.747032288073676e-06, "loss": 0.030033195018768312, "step": 18870 }, { "epoch": 0.1776470588235294, "grad_norm": 0.621635885164129, "learning_rate": 4.7464034671137065e-06, "loss": 0.03149566054344177, "step": 18875 }, { "epoch": 0.17769411764705884, "grad_norm": 0.9599892961428234, "learning_rate": 4.745774895979975e-06, "loss": 0.033447229862213136, "step": 18880 }, { "epoch": 0.17774117647058824, "grad_norm": 0.7168592198775496, "learning_rate": 4.7451465745070994e-06, "loss": 0.024623383581638337, "step": 18885 }, { "epoch": 0.17778823529411764, "grad_norm": 0.7077662811562798, "learning_rate": 4.7445185025298535e-06, "loss": 0.027946868538856508, "step": 18890 }, { "epoch": 0.17783529411764706, "grad_norm": 0.5212173323988637, "learning_rate": 4.7438906798831625e-06, "loss": 0.02947869598865509, "step": 18895 }, { "epoch": 0.17788235294117646, "grad_norm": 0.9405426279913885, "learning_rate": 4.743263106402108e-06, "loss": 0.02882511615753174, "step": 18900 }, { "epoch": 0.1779294117647059, "grad_norm": 0.494345057223211, "learning_rate": 4.74263578192192e-06, "loss": 0.030681726336479188, "step": 18905 }, { "epoch": 0.1779764705882353, "grad_norm": 1.0259406525897405, "learning_rate": 4.742008706277985e-06, "loss": 0.031225377321243288, "step": 18910 }, { "epoch": 0.17802352941176472, "grad_norm": 0.42911780354597656, "learning_rate": 4.741381879305837e-06, "loss": 0.02535125017166138, "step": 18915 }, { "epoch": 0.17807058823529412, "grad_norm": 0.6106532289922756, "learning_rate": 4.740755300841167e-06, "loss": 0.029338058829307557, "step": 18920 }, { "epoch": 0.17811764705882352, "grad_norm": 0.5727886155171401, "learning_rate": 4.740128970719814e-06, "loss": 0.030810439586639406, "step": 18925 }, { "epoch": 0.17816470588235295, "grad_norm": 0.5974702194171597, "learning_rate": 4.739502888777773e-06, "loss": 0.0250252902507782, "step": 18930 }, { "epoch": 0.17821176470588235, "grad_norm": 0.5126051122130717, "learning_rate": 4.7388770548511856e-06, "loss": 0.031074666976928712, "step": 18935 }, { "epoch": 0.17825882352941178, "grad_norm": 0.8212512928604898, "learning_rate": 4.738251468776347e-06, "loss": 0.026088374853134155, "step": 18940 }, { "epoch": 0.17830588235294118, "grad_norm": 0.701750522553071, "learning_rate": 4.737626130389708e-06, "loss": 0.033437705039978026, "step": 18945 }, { "epoch": 0.17835294117647058, "grad_norm": 0.49351290029923434, "learning_rate": 4.737001039527862e-06, "loss": 0.030622261762619018, "step": 18950 }, { "epoch": 0.1784, "grad_norm": 0.89241689766598, "learning_rate": 4.7363761960275605e-06, "loss": 0.034037715196609496, "step": 18955 }, { "epoch": 0.1784470588235294, "grad_norm": 0.5722796680939295, "learning_rate": 4.735751599725703e-06, "loss": 0.033879315853118895, "step": 18960 }, { "epoch": 0.17849411764705883, "grad_norm": 0.7396081283070256, "learning_rate": 4.7351272504593385e-06, "loss": 0.026536136865615845, "step": 18965 }, { "epoch": 0.17854117647058823, "grad_norm": 0.9117060153562502, "learning_rate": 4.734503148065666e-06, "loss": 0.03191929757595062, "step": 18970 }, { "epoch": 0.17858823529411766, "grad_norm": 0.6269548484294721, "learning_rate": 4.7338792923820385e-06, "loss": 0.030043286085128785, "step": 18975 }, { "epoch": 0.17863529411764706, "grad_norm": 0.7372911247791167, "learning_rate": 4.733255683245955e-06, "loss": 0.03062742352485657, "step": 18980 }, { "epoch": 0.17868235294117646, "grad_norm": 0.6272892814064164, "learning_rate": 4.732632320495067e-06, "loss": 0.027019786834716796, "step": 18985 }, { "epoch": 0.1787294117647059, "grad_norm": 0.6414149430175976, "learning_rate": 4.732009203967174e-06, "loss": 0.027829355001449584, "step": 18990 }, { "epoch": 0.1787764705882353, "grad_norm": 1.7918005166930324, "learning_rate": 4.731386333500223e-06, "loss": 0.02578674554824829, "step": 18995 }, { "epoch": 0.17882352941176471, "grad_norm": 0.6463770094722155, "learning_rate": 4.730763708932317e-06, "loss": 0.03604620695114136, "step": 19000 }, { "epoch": 0.17887058823529411, "grad_norm": 0.627545830871125, "learning_rate": 4.7301413301016995e-06, "loss": 0.031733617186546326, "step": 19005 }, { "epoch": 0.17891764705882354, "grad_norm": 0.6816669475470628, "learning_rate": 4.729519196846769e-06, "loss": 0.029207229614257812, "step": 19010 }, { "epoch": 0.17896470588235294, "grad_norm": 0.4849559745801289, "learning_rate": 4.72889730900607e-06, "loss": 0.02627899944782257, "step": 19015 }, { "epoch": 0.17901176470588234, "grad_norm": 0.49578772070735266, "learning_rate": 4.7282756664182985e-06, "loss": 0.030431121587753296, "step": 19020 }, { "epoch": 0.17905882352941177, "grad_norm": 0.66865182304636, "learning_rate": 4.727654268922296e-06, "loss": 0.02471870183944702, "step": 19025 }, { "epoch": 0.17910588235294117, "grad_norm": 1.7068032825017594, "learning_rate": 4.727033116357052e-06, "loss": 0.022545620799064636, "step": 19030 }, { "epoch": 0.1791529411764706, "grad_norm": 1.1279591821511716, "learning_rate": 4.726412208561706e-06, "loss": 0.03151218593120575, "step": 19035 }, { "epoch": 0.1792, "grad_norm": 0.7385127088935207, "learning_rate": 4.725791545375545e-06, "loss": 0.025339466333389283, "step": 19040 }, { "epoch": 0.1792470588235294, "grad_norm": 1.126492248048445, "learning_rate": 4.725171126638002e-06, "loss": 0.03147286772727966, "step": 19045 }, { "epoch": 0.17929411764705883, "grad_norm": 0.9444125445479311, "learning_rate": 4.724550952188659e-06, "loss": 0.029308778047561646, "step": 19050 }, { "epoch": 0.17934117647058823, "grad_norm": 0.8379290884331703, "learning_rate": 4.723931021867246e-06, "loss": 0.030907082557678222, "step": 19055 }, { "epoch": 0.17938823529411765, "grad_norm": 0.5896795813302693, "learning_rate": 4.723311335513638e-06, "loss": 0.025474828481674195, "step": 19060 }, { "epoch": 0.17943529411764705, "grad_norm": 0.5036182422596979, "learning_rate": 4.7226918929678575e-06, "loss": 0.025595858693122864, "step": 19065 }, { "epoch": 0.17948235294117648, "grad_norm": 0.816095938962893, "learning_rate": 4.722072694070077e-06, "loss": 0.026338210701942442, "step": 19070 }, { "epoch": 0.17952941176470588, "grad_norm": 0.6389465054280806, "learning_rate": 4.7214537386606104e-06, "loss": 0.02880522608757019, "step": 19075 }, { "epoch": 0.17957647058823528, "grad_norm": 0.857839019874524, "learning_rate": 4.720835026579924e-06, "loss": 0.03934252560138703, "step": 19080 }, { "epoch": 0.1796235294117647, "grad_norm": 0.7642649547155296, "learning_rate": 4.720216557668626e-06, "loss": 0.029527848958969115, "step": 19085 }, { "epoch": 0.1796705882352941, "grad_norm": 1.058614514533546, "learning_rate": 4.719598331767469e-06, "loss": 0.028807374835014343, "step": 19090 }, { "epoch": 0.17971764705882354, "grad_norm": 0.5982893078981575, "learning_rate": 4.718980348717359e-06, "loss": 0.029163816571235658, "step": 19095 }, { "epoch": 0.17976470588235294, "grad_norm": 0.6264795241772466, "learning_rate": 4.7183626083593405e-06, "loss": 0.028523147106170654, "step": 19100 }, { "epoch": 0.17981176470588237, "grad_norm": 0.608087972346409, "learning_rate": 4.7177451105346075e-06, "loss": 0.029742711782455446, "step": 19105 }, { "epoch": 0.17985882352941177, "grad_norm": 0.8667759149516151, "learning_rate": 4.7171278550845e-06, "loss": 0.029766228795051575, "step": 19110 }, { "epoch": 0.17990588235294117, "grad_norm": 0.5393072075367896, "learning_rate": 4.716510841850498e-06, "loss": 0.02385125756263733, "step": 19115 }, { "epoch": 0.1799529411764706, "grad_norm": 0.5930023172382625, "learning_rate": 4.715894070674233e-06, "loss": 0.029958289861679078, "step": 19120 }, { "epoch": 0.18, "grad_norm": 0.6542504285347513, "learning_rate": 4.715277541397478e-06, "loss": 0.03281049728393555, "step": 19125 }, { "epoch": 0.18004705882352942, "grad_norm": 0.8918602962872436, "learning_rate": 4.714661253862152e-06, "loss": 0.032416743040084836, "step": 19130 }, { "epoch": 0.18009411764705882, "grad_norm": 0.6196694876904244, "learning_rate": 4.714045207910318e-06, "loss": 0.02618785500526428, "step": 19135 }, { "epoch": 0.18014117647058825, "grad_norm": 0.5844115345979076, "learning_rate": 4.713429403384181e-06, "loss": 0.02718856930732727, "step": 19140 }, { "epoch": 0.18018823529411765, "grad_norm": 0.4146369010368529, "learning_rate": 4.712813840126096e-06, "loss": 0.022464409470558167, "step": 19145 }, { "epoch": 0.18023529411764705, "grad_norm": 0.6072710920493211, "learning_rate": 4.712198517978558e-06, "loss": 0.025882786512374877, "step": 19150 }, { "epoch": 0.18028235294117648, "grad_norm": 0.8857791711607985, "learning_rate": 4.7115834367842055e-06, "loss": 0.02920742928981781, "step": 19155 }, { "epoch": 0.18032941176470588, "grad_norm": 0.5702762330485572, "learning_rate": 4.710968596385823e-06, "loss": 0.03029974699020386, "step": 19160 }, { "epoch": 0.1803764705882353, "grad_norm": 0.6679956369025516, "learning_rate": 4.710353996626338e-06, "loss": 0.02989904284477234, "step": 19165 }, { "epoch": 0.1804235294117647, "grad_norm": 0.7329027962290335, "learning_rate": 4.70973963734882e-06, "loss": 0.027913972735404968, "step": 19170 }, { "epoch": 0.1804705882352941, "grad_norm": 0.7593597676318967, "learning_rate": 4.7091255183964825e-06, "loss": 0.028574818372726442, "step": 19175 }, { "epoch": 0.18051764705882353, "grad_norm": 0.5749188232727386, "learning_rate": 4.708511639612682e-06, "loss": 0.023380491137504577, "step": 19180 }, { "epoch": 0.18056470588235293, "grad_norm": 0.616071864466767, "learning_rate": 4.7078980008409206e-06, "loss": 0.029570615291595458, "step": 19185 }, { "epoch": 0.18061176470588236, "grad_norm": 0.6926054198995576, "learning_rate": 4.707284601924839e-06, "loss": 0.03431960344314575, "step": 19190 }, { "epoch": 0.18065882352941176, "grad_norm": 0.6191395903519077, "learning_rate": 4.706671442708221e-06, "loss": 0.02430673986673355, "step": 19195 }, { "epoch": 0.1807058823529412, "grad_norm": 0.8362426476554152, "learning_rate": 4.706058523034996e-06, "loss": 0.02949390411376953, "step": 19200 }, { "epoch": 0.1807529411764706, "grad_norm": 0.5158685987052967, "learning_rate": 4.705445842749232e-06, "loss": 0.02698306441307068, "step": 19205 }, { "epoch": 0.1808, "grad_norm": 0.5341683796039897, "learning_rate": 4.704833401695143e-06, "loss": 0.02173864543437958, "step": 19210 }, { "epoch": 0.18084705882352942, "grad_norm": 0.5247746204737855, "learning_rate": 4.7042211997170804e-06, "loss": 0.024519488215446472, "step": 19215 }, { "epoch": 0.18089411764705882, "grad_norm": 0.4957230912292314, "learning_rate": 4.703609236659539e-06, "loss": 0.02823333740234375, "step": 19220 }, { "epoch": 0.18094117647058824, "grad_norm": 0.6519211651456991, "learning_rate": 4.7029975123671574e-06, "loss": 0.02765897214412689, "step": 19225 }, { "epoch": 0.18098823529411764, "grad_norm": 0.5745711782738357, "learning_rate": 4.7023860266847135e-06, "loss": 0.02380506694316864, "step": 19230 }, { "epoch": 0.18103529411764707, "grad_norm": 0.743761043499759, "learning_rate": 4.7017747794571264e-06, "loss": 0.035617098212242126, "step": 19235 }, { "epoch": 0.18108235294117647, "grad_norm": 0.5425211146383284, "learning_rate": 4.701163770529457e-06, "loss": 0.02229733169078827, "step": 19240 }, { "epoch": 0.18112941176470587, "grad_norm": 0.6560142331065336, "learning_rate": 4.700552999746907e-06, "loss": 0.028814703226089478, "step": 19245 }, { "epoch": 0.1811764705882353, "grad_norm": 0.6550406572372788, "learning_rate": 4.699942466954818e-06, "loss": 0.03392897248268127, "step": 19250 }, { "epoch": 0.1812235294117647, "grad_norm": 0.7400073961415168, "learning_rate": 4.699332171998673e-06, "loss": 0.029921814799308777, "step": 19255 }, { "epoch": 0.18127058823529413, "grad_norm": 0.5714736992441583, "learning_rate": 4.698722114724095e-06, "loss": 0.026792797446250915, "step": 19260 }, { "epoch": 0.18131764705882353, "grad_norm": 0.6518912426708589, "learning_rate": 4.698112294976847e-06, "loss": 0.02501595616340637, "step": 19265 }, { "epoch": 0.18136470588235293, "grad_norm": 0.610504114464912, "learning_rate": 4.697502712602836e-06, "loss": 0.025044602155685425, "step": 19270 }, { "epoch": 0.18141176470588236, "grad_norm": 0.7203196017316187, "learning_rate": 4.696893367448101e-06, "loss": 0.02969111204147339, "step": 19275 }, { "epoch": 0.18145882352941176, "grad_norm": 0.7082565924802816, "learning_rate": 4.696284259358828e-06, "loss": 0.022120849788188936, "step": 19280 }, { "epoch": 0.18150588235294118, "grad_norm": 0.5224345554580849, "learning_rate": 4.695675388181338e-06, "loss": 0.028477582335472106, "step": 19285 }, { "epoch": 0.18155294117647058, "grad_norm": 0.9040617529548585, "learning_rate": 4.695066753762094e-06, "loss": 0.03098587989807129, "step": 19290 }, { "epoch": 0.1816, "grad_norm": 1.0001891171343922, "learning_rate": 4.694458355947699e-06, "loss": 0.030419009923934936, "step": 19295 }, { "epoch": 0.1816470588235294, "grad_norm": 0.8133319679936962, "learning_rate": 4.693850194584891e-06, "loss": 0.028972035646438597, "step": 19300 }, { "epoch": 0.1816941176470588, "grad_norm": 0.5591757517595934, "learning_rate": 4.693242269520553e-06, "loss": 0.025508585572242736, "step": 19305 }, { "epoch": 0.18174117647058824, "grad_norm": 0.7149966180113198, "learning_rate": 4.6926345806016994e-06, "loss": 0.027045130729675293, "step": 19310 }, { "epoch": 0.18178823529411764, "grad_norm": 0.5545303827283091, "learning_rate": 4.69202712767549e-06, "loss": 0.019424985349178314, "step": 19315 }, { "epoch": 0.18183529411764707, "grad_norm": 0.6701280606415931, "learning_rate": 4.691419910589219e-06, "loss": 0.02870669960975647, "step": 19320 }, { "epoch": 0.18188235294117647, "grad_norm": 0.6829864624409393, "learning_rate": 4.690812929190321e-06, "loss": 0.033948767185211184, "step": 19325 }, { "epoch": 0.1819294117647059, "grad_norm": 0.6974148212838914, "learning_rate": 4.690206183326367e-06, "loss": 0.03209361135959625, "step": 19330 }, { "epoch": 0.1819764705882353, "grad_norm": 0.6276385353519316, "learning_rate": 4.689599672845069e-06, "loss": 0.023533758521080018, "step": 19335 }, { "epoch": 0.1820235294117647, "grad_norm": 0.5775323989201075, "learning_rate": 4.688993397594271e-06, "loss": 0.022120752930641176, "step": 19340 }, { "epoch": 0.18207058823529412, "grad_norm": 0.764929307973227, "learning_rate": 4.688387357421961e-06, "loss": 0.02753494381904602, "step": 19345 }, { "epoch": 0.18211764705882352, "grad_norm": 0.48821954146236796, "learning_rate": 4.687781552176262e-06, "loss": 0.026854437589645386, "step": 19350 }, { "epoch": 0.18216470588235295, "grad_norm": 0.6198849927915013, "learning_rate": 4.687175981705433e-06, "loss": 0.03303709030151367, "step": 19355 }, { "epoch": 0.18221176470588235, "grad_norm": 0.732322344849333, "learning_rate": 4.686570645857871e-06, "loss": 0.02642376720905304, "step": 19360 }, { "epoch": 0.18225882352941175, "grad_norm": 0.6965887178689515, "learning_rate": 4.685965544482113e-06, "loss": 0.028589248657226562, "step": 19365 }, { "epoch": 0.18230588235294118, "grad_norm": 0.6792188867971237, "learning_rate": 4.685360677426827e-06, "loss": 0.027282524108886718, "step": 19370 }, { "epoch": 0.18235294117647058, "grad_norm": 0.8181188710071802, "learning_rate": 4.684756044540823e-06, "loss": 0.031814408302307126, "step": 19375 }, { "epoch": 0.1824, "grad_norm": 0.7129339318869695, "learning_rate": 4.6841516456730446e-06, "loss": 0.023900794982910156, "step": 19380 }, { "epoch": 0.1824470588235294, "grad_norm": 0.8931321904076406, "learning_rate": 4.683547480672573e-06, "loss": 0.031832185387611386, "step": 19385 }, { "epoch": 0.18249411764705883, "grad_norm": 0.7630034090850044, "learning_rate": 4.6829435493886255e-06, "loss": 0.027426016330718995, "step": 19390 }, { "epoch": 0.18254117647058823, "grad_norm": 0.7018810152039794, "learning_rate": 4.682339851670555e-06, "loss": 0.036029654741287234, "step": 19395 }, { "epoch": 0.18258823529411763, "grad_norm": 0.6557949445098458, "learning_rate": 4.68173638736785e-06, "loss": 0.021950191259384154, "step": 19400 }, { "epoch": 0.18263529411764706, "grad_norm": 0.8028288888052371, "learning_rate": 4.6811331563301365e-06, "loss": 0.02741297483444214, "step": 19405 }, { "epoch": 0.18268235294117646, "grad_norm": 0.6599307649365166, "learning_rate": 4.6805301584071745e-06, "loss": 0.039183810353279114, "step": 19410 }, { "epoch": 0.1827294117647059, "grad_norm": 0.5668535904200097, "learning_rate": 4.6799273934488585e-06, "loss": 0.02609189450740814, "step": 19415 }, { "epoch": 0.1827764705882353, "grad_norm": 0.6681568694657768, "learning_rate": 4.679324861305221e-06, "loss": 0.027856242656707764, "step": 19420 }, { "epoch": 0.18282352941176472, "grad_norm": 0.6151331358377509, "learning_rate": 4.678722561826429e-06, "loss": 0.02193589210510254, "step": 19425 }, { "epoch": 0.18287058823529412, "grad_norm": 0.5360174559197423, "learning_rate": 4.678120494862782e-06, "loss": 0.02561263144016266, "step": 19430 }, { "epoch": 0.18291764705882352, "grad_norm": 0.6538769380037271, "learning_rate": 4.6775186602647165e-06, "loss": 0.026883676648139954, "step": 19435 }, { "epoch": 0.18296470588235295, "grad_norm": 0.5655339219130268, "learning_rate": 4.6769170578828036e-06, "loss": 0.023767824470996856, "step": 19440 }, { "epoch": 0.18301176470588235, "grad_norm": 0.5841855609133347, "learning_rate": 4.676315687567749e-06, "loss": 0.031450283527374265, "step": 19445 }, { "epoch": 0.18305882352941177, "grad_norm": 0.5919297509870529, "learning_rate": 4.675714549170391e-06, "loss": 0.026339256763458253, "step": 19450 }, { "epoch": 0.18310588235294117, "grad_norm": 0.6053180226271971, "learning_rate": 4.6751136425417035e-06, "loss": 0.032273119688034056, "step": 19455 }, { "epoch": 0.1831529411764706, "grad_norm": 0.6007346887060383, "learning_rate": 4.674512967532795e-06, "loss": 0.023216438293457032, "step": 19460 }, { "epoch": 0.1832, "grad_norm": 0.7627105480510629, "learning_rate": 4.673912523994906e-06, "loss": 0.02916530966758728, "step": 19465 }, { "epoch": 0.1832470588235294, "grad_norm": 0.4697538283297227, "learning_rate": 4.6733123117794135e-06, "loss": 0.021419614553451538, "step": 19470 }, { "epoch": 0.18329411764705883, "grad_norm": 0.6580808599507092, "learning_rate": 4.672712330737826e-06, "loss": 0.0331057071685791, "step": 19475 }, { "epoch": 0.18334117647058823, "grad_norm": 0.5037634217454564, "learning_rate": 4.672112580721783e-06, "loss": 0.02676050364971161, "step": 19480 }, { "epoch": 0.18338823529411766, "grad_norm": 0.6410915576863918, "learning_rate": 4.671513061583063e-06, "loss": 0.025173798203468323, "step": 19485 }, { "epoch": 0.18343529411764706, "grad_norm": 0.6839160092541685, "learning_rate": 4.670913773173575e-06, "loss": 0.029412686824798584, "step": 19490 }, { "epoch": 0.18348235294117646, "grad_norm": 0.44355071798364504, "learning_rate": 4.670314715345359e-06, "loss": 0.024215075373649596, "step": 19495 }, { "epoch": 0.18352941176470589, "grad_norm": 0.5358510324472807, "learning_rate": 4.6697158879505904e-06, "loss": 0.026021042466163637, "step": 19500 }, { "epoch": 0.18357647058823529, "grad_norm": 0.46823606814604046, "learning_rate": 4.669117290841575e-06, "loss": 0.025021713972091675, "step": 19505 }, { "epoch": 0.1836235294117647, "grad_norm": 0.6316226612145582, "learning_rate": 4.668518923870754e-06, "loss": 0.02805818021297455, "step": 19510 }, { "epoch": 0.1836705882352941, "grad_norm": 0.5324437226315728, "learning_rate": 4.667920786890699e-06, "loss": 0.027350401878356932, "step": 19515 }, { "epoch": 0.18371764705882354, "grad_norm": 0.7626366374468906, "learning_rate": 4.667322879754113e-06, "loss": 0.026002964377403258, "step": 19520 }, { "epoch": 0.18376470588235294, "grad_norm": 0.5582799514195235, "learning_rate": 4.666725202313833e-06, "loss": 0.026757305860519408, "step": 19525 }, { "epoch": 0.18381176470588234, "grad_norm": 0.5042913025768223, "learning_rate": 4.666127754422828e-06, "loss": 0.025893133878707886, "step": 19530 }, { "epoch": 0.18385882352941177, "grad_norm": 0.7887131922709149, "learning_rate": 4.665530535934195e-06, "loss": 0.028676646947860717, "step": 19535 }, { "epoch": 0.18390588235294117, "grad_norm": 0.5163806246622165, "learning_rate": 4.664933546701167e-06, "loss": 0.022064101696014405, "step": 19540 }, { "epoch": 0.1839529411764706, "grad_norm": 0.602377835092464, "learning_rate": 4.664336786577108e-06, "loss": 0.020960842072963715, "step": 19545 }, { "epoch": 0.184, "grad_norm": 0.5879471650572111, "learning_rate": 4.663740255415509e-06, "loss": 0.028613445162773133, "step": 19550 }, { "epoch": 0.18404705882352942, "grad_norm": 0.5928361116936555, "learning_rate": 4.663143953069999e-06, "loss": 0.028205737471580505, "step": 19555 }, { "epoch": 0.18409411764705882, "grad_norm": 0.6019688706174646, "learning_rate": 4.66254787939433e-06, "loss": 0.033631163835525515, "step": 19560 }, { "epoch": 0.18414117647058822, "grad_norm": 0.6338673910103597, "learning_rate": 4.661952034242392e-06, "loss": 0.027019569277763368, "step": 19565 }, { "epoch": 0.18418823529411765, "grad_norm": 1.0025364140604835, "learning_rate": 4.661356417468202e-06, "loss": 0.026184645295143128, "step": 19570 }, { "epoch": 0.18423529411764705, "grad_norm": 0.5602134811452438, "learning_rate": 4.660761028925906e-06, "loss": 0.022135809063911438, "step": 19575 }, { "epoch": 0.18428235294117648, "grad_norm": 0.4440208150738177, "learning_rate": 4.660165868469785e-06, "loss": 0.025332939624786378, "step": 19580 }, { "epoch": 0.18432941176470588, "grad_norm": 0.5161323870702144, "learning_rate": 4.659570935954248e-06, "loss": 0.0227832168340683, "step": 19585 }, { "epoch": 0.18437647058823528, "grad_norm": 0.5475964632987856, "learning_rate": 4.658976231233832e-06, "loss": 0.029485273361206054, "step": 19590 }, { "epoch": 0.1844235294117647, "grad_norm": 0.6237267412425551, "learning_rate": 4.658381754163206e-06, "loss": 0.023302686214447022, "step": 19595 }, { "epoch": 0.1844705882352941, "grad_norm": 0.7462850725745865, "learning_rate": 4.65778750459717e-06, "loss": 0.029418700933456422, "step": 19600 }, { "epoch": 0.18451764705882354, "grad_norm": 0.708297191523765, "learning_rate": 4.6571934823906515e-06, "loss": 0.030865380167961122, "step": 19605 }, { "epoch": 0.18456470588235294, "grad_norm": 0.556866772072519, "learning_rate": 4.656599687398706e-06, "loss": 0.026733604073524476, "step": 19610 }, { "epoch": 0.18461176470588236, "grad_norm": 0.8387052145438273, "learning_rate": 4.656006119476524e-06, "loss": 0.03175460696220398, "step": 19615 }, { "epoch": 0.18465882352941176, "grad_norm": 0.5338924739037475, "learning_rate": 4.655412778479419e-06, "loss": 0.03029518723487854, "step": 19620 }, { "epoch": 0.18470588235294116, "grad_norm": 0.7452251212350576, "learning_rate": 4.654819664262837e-06, "loss": 0.025577104091644286, "step": 19625 }, { "epoch": 0.1847529411764706, "grad_norm": 0.6456238568922823, "learning_rate": 4.654226776682352e-06, "loss": 0.02589026391506195, "step": 19630 }, { "epoch": 0.1848, "grad_norm": 0.5087243998514039, "learning_rate": 4.6536341155936684e-06, "loss": 0.02720799446105957, "step": 19635 }, { "epoch": 0.18484705882352942, "grad_norm": 0.49739354718283973, "learning_rate": 4.653041680852615e-06, "loss": 0.024101412296295165, "step": 19640 }, { "epoch": 0.18489411764705882, "grad_norm": 0.7888091795987453, "learning_rate": 4.652449472315153e-06, "loss": 0.02953956127166748, "step": 19645 }, { "epoch": 0.18494117647058825, "grad_norm": 1.0143778498223575, "learning_rate": 4.651857489837369e-06, "loss": 0.023647844791412354, "step": 19650 }, { "epoch": 0.18498823529411765, "grad_norm": 0.6877124953114067, "learning_rate": 4.6512657332754805e-06, "loss": 0.024178072810173035, "step": 19655 }, { "epoch": 0.18503529411764705, "grad_norm": 0.5782348866231474, "learning_rate": 4.650674202485832e-06, "loss": 0.02488637566566467, "step": 19660 }, { "epoch": 0.18508235294117648, "grad_norm": 0.7254383513313708, "learning_rate": 4.6500828973248954e-06, "loss": 0.03101654350757599, "step": 19665 }, { "epoch": 0.18512941176470588, "grad_norm": 0.6623082083578626, "learning_rate": 4.649491817649271e-06, "loss": 0.025567659735679628, "step": 19670 }, { "epoch": 0.1851764705882353, "grad_norm": 0.686210700930593, "learning_rate": 4.648900963315685e-06, "loss": 0.026049920916557313, "step": 19675 }, { "epoch": 0.1852235294117647, "grad_norm": 0.6049094010633711, "learning_rate": 4.648310334180992e-06, "loss": 0.025091689825057984, "step": 19680 }, { "epoch": 0.18527058823529413, "grad_norm": 0.7881978126671391, "learning_rate": 4.6477199301021755e-06, "loss": 0.026303619146347046, "step": 19685 }, { "epoch": 0.18531764705882353, "grad_norm": 0.7903429971278495, "learning_rate": 4.647129750936345e-06, "loss": 0.02707219123840332, "step": 19690 }, { "epoch": 0.18536470588235293, "grad_norm": 0.4058715244756158, "learning_rate": 4.646539796540734e-06, "loss": 0.04077116549015045, "step": 19695 }, { "epoch": 0.18541176470588236, "grad_norm": 0.5895005564970096, "learning_rate": 4.645950066772708e-06, "loss": 0.022329702973365784, "step": 19700 }, { "epoch": 0.18545882352941176, "grad_norm": 0.6858814857341411, "learning_rate": 4.645360561489755e-06, "loss": 0.022840225696563722, "step": 19705 }, { "epoch": 0.1855058823529412, "grad_norm": 0.5542087659289228, "learning_rate": 4.644771280549494e-06, "loss": 0.020241045951843263, "step": 19710 }, { "epoch": 0.1855529411764706, "grad_norm": 0.647019645503262, "learning_rate": 4.644182223809665e-06, "loss": 0.026652979850769042, "step": 19715 }, { "epoch": 0.1856, "grad_norm": 0.559819834293242, "learning_rate": 4.643593391128138e-06, "loss": 0.02471175789833069, "step": 19720 }, { "epoch": 0.18564705882352942, "grad_norm": 0.6516175255585106, "learning_rate": 4.643004782362907e-06, "loss": 0.026143598556518554, "step": 19725 }, { "epoch": 0.18569411764705882, "grad_norm": 1.052957748804981, "learning_rate": 4.642416397372093e-06, "loss": 0.026376190781593322, "step": 19730 }, { "epoch": 0.18574117647058824, "grad_norm": 0.5586614189633008, "learning_rate": 4.641828236013945e-06, "loss": 0.03354946970939636, "step": 19735 }, { "epoch": 0.18578823529411764, "grad_norm": 0.764165894450596, "learning_rate": 4.641240298146834e-06, "loss": 0.02654818892478943, "step": 19740 }, { "epoch": 0.18583529411764707, "grad_norm": 0.8307081226941951, "learning_rate": 4.640652583629257e-06, "loss": 0.02524517774581909, "step": 19745 }, { "epoch": 0.18588235294117647, "grad_norm": 0.5478285396956268, "learning_rate": 4.640065092319839e-06, "loss": 0.030261823534965517, "step": 19750 }, { "epoch": 0.18592941176470587, "grad_norm": 0.567710004121026, "learning_rate": 4.6394778240773276e-06, "loss": 0.029697003960609435, "step": 19755 }, { "epoch": 0.1859764705882353, "grad_norm": 0.747419799711807, "learning_rate": 4.6388907787605974e-06, "loss": 0.024088865518569945, "step": 19760 }, { "epoch": 0.1860235294117647, "grad_norm": 0.6861842424742267, "learning_rate": 4.638303956228648e-06, "loss": 0.028310787677764893, "step": 19765 }, { "epoch": 0.18607058823529413, "grad_norm": 0.6469875755150469, "learning_rate": 4.6377173563406e-06, "loss": 0.03314407765865326, "step": 19770 }, { "epoch": 0.18611764705882353, "grad_norm": 0.691146924221893, "learning_rate": 4.637130978955705e-06, "loss": 0.02747848629951477, "step": 19775 }, { "epoch": 0.18616470588235295, "grad_norm": 0.7307616785998946, "learning_rate": 4.636544823933335e-06, "loss": 0.028131076693534852, "step": 19780 }, { "epoch": 0.18621176470588235, "grad_norm": 0.5023504954076041, "learning_rate": 4.635958891132985e-06, "loss": 0.02765982747077942, "step": 19785 }, { "epoch": 0.18625882352941175, "grad_norm": 1.4264841804341564, "learning_rate": 4.635373180414279e-06, "loss": 0.032712697982788086, "step": 19790 }, { "epoch": 0.18630588235294118, "grad_norm": 0.5496855268181657, "learning_rate": 4.634787691636961e-06, "loss": 0.026753970980644227, "step": 19795 }, { "epoch": 0.18635294117647058, "grad_norm": 0.5855424580872263, "learning_rate": 4.6342024246609e-06, "loss": 0.030511221289634703, "step": 19800 }, { "epoch": 0.1864, "grad_norm": 0.4012879626118351, "learning_rate": 4.633617379346092e-06, "loss": 0.035206520557403566, "step": 19805 }, { "epoch": 0.1864470588235294, "grad_norm": 0.7096369089700824, "learning_rate": 4.6330325555526526e-06, "loss": 0.025398826599121092, "step": 19810 }, { "epoch": 0.1864941176470588, "grad_norm": 0.8107163672679074, "learning_rate": 4.632447953140823e-06, "loss": 0.03343799710273743, "step": 19815 }, { "epoch": 0.18654117647058824, "grad_norm": 0.517825230371276, "learning_rate": 4.631863571970966e-06, "loss": 0.033677467703819276, "step": 19820 }, { "epoch": 0.18658823529411764, "grad_norm": 0.6942985144208451, "learning_rate": 4.63127941190357e-06, "loss": 0.03384128212928772, "step": 19825 }, { "epoch": 0.18663529411764707, "grad_norm": 0.7817729494219823, "learning_rate": 4.630695472799245e-06, "loss": 0.036601966619491576, "step": 19830 }, { "epoch": 0.18668235294117647, "grad_norm": 0.5027447431317739, "learning_rate": 4.630111754518724e-06, "loss": 0.028187388181686403, "step": 19835 }, { "epoch": 0.1867294117647059, "grad_norm": 0.6761619251880033, "learning_rate": 4.629528256922865e-06, "loss": 0.0349825918674469, "step": 19840 }, { "epoch": 0.1867764705882353, "grad_norm": 0.7631591922803054, "learning_rate": 4.628944979872646e-06, "loss": 0.030668091773986817, "step": 19845 }, { "epoch": 0.1868235294117647, "grad_norm": 0.7715820244431179, "learning_rate": 4.628361923229168e-06, "loss": 0.027201509475708006, "step": 19850 }, { "epoch": 0.18687058823529412, "grad_norm": 0.5955471243200559, "learning_rate": 4.627779086853656e-06, "loss": 0.024999575316905977, "step": 19855 }, { "epoch": 0.18691764705882352, "grad_norm": 0.6094425397986071, "learning_rate": 4.627196470607456e-06, "loss": 0.02874714732170105, "step": 19860 }, { "epoch": 0.18696470588235295, "grad_norm": 0.7280593361405813, "learning_rate": 4.626614074352037e-06, "loss": 0.029899579286575318, "step": 19865 }, { "epoch": 0.18701176470588235, "grad_norm": 0.9955609088992448, "learning_rate": 4.626031897948988e-06, "loss": 0.027584248781204225, "step": 19870 }, { "epoch": 0.18705882352941178, "grad_norm": 0.5880335881337692, "learning_rate": 4.625449941260023e-06, "loss": 0.025185829401016234, "step": 19875 }, { "epoch": 0.18710588235294118, "grad_norm": 0.8311393059977156, "learning_rate": 4.624868204146975e-06, "loss": 0.02489778846502304, "step": 19880 }, { "epoch": 0.18715294117647058, "grad_norm": 0.5164799824720525, "learning_rate": 4.624286686471801e-06, "loss": 0.021382042765617372, "step": 19885 }, { "epoch": 0.1872, "grad_norm": 0.624857643506697, "learning_rate": 4.623705388096578e-06, "loss": 0.02967793643474579, "step": 19890 }, { "epoch": 0.1872470588235294, "grad_norm": 0.6172235730687158, "learning_rate": 4.623124308883503e-06, "loss": 0.027486026287078857, "step": 19895 }, { "epoch": 0.18729411764705883, "grad_norm": 0.5838839229180438, "learning_rate": 4.622543448694898e-06, "loss": 0.019219648838043214, "step": 19900 }, { "epoch": 0.18734117647058823, "grad_norm": 0.6274909029920153, "learning_rate": 4.621962807393202e-06, "loss": 0.02705046534538269, "step": 19905 }, { "epoch": 0.18738823529411763, "grad_norm": 0.7313662632449328, "learning_rate": 4.6213823848409775e-06, "loss": 0.03218128979206085, "step": 19910 }, { "epoch": 0.18743529411764706, "grad_norm": 0.7247736152969305, "learning_rate": 4.620802180900908e-06, "loss": 0.029109930992126463, "step": 19915 }, { "epoch": 0.18748235294117646, "grad_norm": 0.733837642984108, "learning_rate": 4.620222195435794e-06, "loss": 0.022272664308547973, "step": 19920 }, { "epoch": 0.1875294117647059, "grad_norm": 0.6371717612136388, "learning_rate": 4.619642428308562e-06, "loss": 0.02216646820306778, "step": 19925 }, { "epoch": 0.1875764705882353, "grad_norm": 0.8202547618070243, "learning_rate": 4.619062879382256e-06, "loss": 0.03020934760570526, "step": 19930 }, { "epoch": 0.18762352941176472, "grad_norm": 0.815614317977644, "learning_rate": 4.6184835485200375e-06, "loss": 0.02868248224258423, "step": 19935 }, { "epoch": 0.18767058823529412, "grad_norm": 0.7452205850284758, "learning_rate": 4.6179044355851934e-06, "loss": 0.03565991818904877, "step": 19940 }, { "epoch": 0.18771764705882352, "grad_norm": 0.5561057568836647, "learning_rate": 4.617325540441128e-06, "loss": 0.02805587649345398, "step": 19945 }, { "epoch": 0.18776470588235294, "grad_norm": 0.649165849074153, "learning_rate": 4.616746862951364e-06, "loss": 0.03323500752449036, "step": 19950 }, { "epoch": 0.18781176470588234, "grad_norm": 0.5441485887752695, "learning_rate": 4.6161684029795465e-06, "loss": 0.029969751834869385, "step": 19955 }, { "epoch": 0.18785882352941177, "grad_norm": 0.7448783109027298, "learning_rate": 4.61559016038944e-06, "loss": 0.029837703704833983, "step": 19960 }, { "epoch": 0.18790588235294117, "grad_norm": 0.7406190335873021, "learning_rate": 4.615012135044924e-06, "loss": 0.02590269148349762, "step": 19965 }, { "epoch": 0.1879529411764706, "grad_norm": 0.46546461754161556, "learning_rate": 4.614434326810003e-06, "loss": 0.02739374339580536, "step": 19970 }, { "epoch": 0.188, "grad_norm": 0.5924560125552097, "learning_rate": 4.6138567355488e-06, "loss": 0.032019370794296266, "step": 19975 }, { "epoch": 0.1880470588235294, "grad_norm": 0.6733096411251662, "learning_rate": 4.613279361125552e-06, "loss": 0.024742472171783447, "step": 19980 }, { "epoch": 0.18809411764705883, "grad_norm": 0.567229944089785, "learning_rate": 4.61270220340462e-06, "loss": 0.026046442985534667, "step": 19985 }, { "epoch": 0.18814117647058823, "grad_norm": 0.553645584196547, "learning_rate": 4.612125262250483e-06, "loss": 0.030941882729530336, "step": 19990 }, { "epoch": 0.18818823529411766, "grad_norm": 0.665338036789333, "learning_rate": 4.611548537527735e-06, "loss": 0.026299929618835448, "step": 19995 }, { "epoch": 0.18823529411764706, "grad_norm": 0.7596360594036807, "learning_rate": 4.610972029101094e-06, "loss": 0.03234056234359741, "step": 20000 }, { "epoch": 0.18828235294117648, "grad_norm": 0.5212194277269779, "learning_rate": 4.610395736835392e-06, "loss": 0.028373318910598754, "step": 20005 }, { "epoch": 0.18832941176470588, "grad_norm": 0.7129187293845546, "learning_rate": 4.60981966059558e-06, "loss": 0.02781800627708435, "step": 20010 }, { "epoch": 0.18837647058823528, "grad_norm": 0.6853651080601663, "learning_rate": 4.609243800246731e-06, "loss": 0.03362595736980438, "step": 20015 }, { "epoch": 0.1884235294117647, "grad_norm": 0.6021767828439764, "learning_rate": 4.608668155654029e-06, "loss": 0.025568822026252748, "step": 20020 }, { "epoch": 0.1884705882352941, "grad_norm": 0.6401468902257297, "learning_rate": 4.6080927266827815e-06, "loss": 0.028339743614196777, "step": 20025 }, { "epoch": 0.18851764705882354, "grad_norm": 0.8985753858023312, "learning_rate": 4.607517513198411e-06, "loss": 0.026455873250961305, "step": 20030 }, { "epoch": 0.18856470588235294, "grad_norm": 1.3222763425126587, "learning_rate": 4.606942515066461e-06, "loss": 0.027270549535751344, "step": 20035 }, { "epoch": 0.18861176470588234, "grad_norm": 0.5537325124026965, "learning_rate": 4.606367732152586e-06, "loss": 0.024141651391983033, "step": 20040 }, { "epoch": 0.18865882352941177, "grad_norm": 0.7150748942883347, "learning_rate": 4.605793164322564e-06, "loss": 0.0281724214553833, "step": 20045 }, { "epoch": 0.18870588235294117, "grad_norm": 0.6618942450617797, "learning_rate": 4.605218811442287e-06, "loss": 0.031010127067565917, "step": 20050 }, { "epoch": 0.1887529411764706, "grad_norm": 2.272406020488527, "learning_rate": 4.604644673377765e-06, "loss": 0.031268072128295896, "step": 20055 }, { "epoch": 0.1888, "grad_norm": 0.46407409688281726, "learning_rate": 4.604070749995125e-06, "loss": 0.030162593722343443, "step": 20060 }, { "epoch": 0.18884705882352942, "grad_norm": 0.5448492402696604, "learning_rate": 4.603497041160609e-06, "loss": 0.025279748439788818, "step": 20065 }, { "epoch": 0.18889411764705882, "grad_norm": 0.5263303712079934, "learning_rate": 4.60292354674058e-06, "loss": 0.026736849546432497, "step": 20070 }, { "epoch": 0.18894117647058822, "grad_norm": 0.5490797321740744, "learning_rate": 4.602350266601512e-06, "loss": 0.025584402680397033, "step": 20075 }, { "epoch": 0.18898823529411765, "grad_norm": 0.46439628242716136, "learning_rate": 4.601777200609998e-06, "loss": 0.023063495755195618, "step": 20080 }, { "epoch": 0.18903529411764705, "grad_norm": 0.5723351464330343, "learning_rate": 4.601204348632749e-06, "loss": 0.026319846510887146, "step": 20085 }, { "epoch": 0.18908235294117648, "grad_norm": 0.7175419359077375, "learning_rate": 4.600631710536589e-06, "loss": 0.030493712425231932, "step": 20090 }, { "epoch": 0.18912941176470588, "grad_norm": 0.480023346609629, "learning_rate": 4.6000592861884616e-06, "loss": 0.026413172483444214, "step": 20095 }, { "epoch": 0.1891764705882353, "grad_norm": 0.7023684895060353, "learning_rate": 4.599487075455421e-06, "loss": 0.027350053191184998, "step": 20100 }, { "epoch": 0.1892235294117647, "grad_norm": 0.5562186944991316, "learning_rate": 4.5989150782046414e-06, "loss": 0.030153483152389526, "step": 20105 }, { "epoch": 0.1892705882352941, "grad_norm": 0.776821357111721, "learning_rate": 4.5983432943034125e-06, "loss": 0.03056051433086395, "step": 20110 }, { "epoch": 0.18931764705882353, "grad_norm": 0.6957320820153476, "learning_rate": 4.5977717236191365e-06, "loss": 0.032467931509017944, "step": 20115 }, { "epoch": 0.18936470588235293, "grad_norm": 0.49391099475193445, "learning_rate": 4.597200366019334e-06, "loss": 0.02770521938800812, "step": 20120 }, { "epoch": 0.18941176470588236, "grad_norm": 0.5168243989129182, "learning_rate": 4.596629221371639e-06, "loss": 0.024297182261943818, "step": 20125 }, { "epoch": 0.18945882352941176, "grad_norm": 0.5467156069018362, "learning_rate": 4.5960582895438025e-06, "loss": 0.026485764980316163, "step": 20130 }, { "epoch": 0.18950588235294116, "grad_norm": 0.5712250317074764, "learning_rate": 4.5954875704036865e-06, "loss": 0.027267146110534667, "step": 20135 }, { "epoch": 0.1895529411764706, "grad_norm": 0.5631451230562635, "learning_rate": 4.5949170638192736e-06, "loss": 0.03071534037590027, "step": 20140 }, { "epoch": 0.1896, "grad_norm": 0.7276518208829118, "learning_rate": 4.594346769658656e-06, "loss": 0.02924792766571045, "step": 20145 }, { "epoch": 0.18964705882352942, "grad_norm": 0.49195005837152694, "learning_rate": 4.593776687790041e-06, "loss": 0.026200222969055175, "step": 20150 }, { "epoch": 0.18969411764705882, "grad_norm": 0.5594241872304662, "learning_rate": 4.593206818081756e-06, "loss": 0.02689424157142639, "step": 20155 }, { "epoch": 0.18974117647058825, "grad_norm": 0.5458392391464909, "learning_rate": 4.592637160402235e-06, "loss": 0.024863721430301668, "step": 20160 }, { "epoch": 0.18978823529411765, "grad_norm": 1.0363400852311124, "learning_rate": 4.59206771462003e-06, "loss": 0.02999701499938965, "step": 20165 }, { "epoch": 0.18983529411764705, "grad_norm": 2.0560361440809714, "learning_rate": 4.591498480603808e-06, "loss": 0.032598429918289186, "step": 20170 }, { "epoch": 0.18988235294117647, "grad_norm": 0.4819188589947282, "learning_rate": 4.590929458222349e-06, "loss": 0.02708268165588379, "step": 20175 }, { "epoch": 0.18992941176470587, "grad_norm": 0.6368110674100786, "learning_rate": 4.590360647344545e-06, "loss": 0.024411311745643614, "step": 20180 }, { "epoch": 0.1899764705882353, "grad_norm": 0.5359537575103822, "learning_rate": 4.589792047839404e-06, "loss": 0.023923198878765106, "step": 20185 }, { "epoch": 0.1900235294117647, "grad_norm": 0.4576030628981039, "learning_rate": 4.589223659576047e-06, "loss": 0.0328514963388443, "step": 20190 }, { "epoch": 0.19007058823529413, "grad_norm": 1.2380906742409252, "learning_rate": 4.588655482423707e-06, "loss": 0.031353998184204104, "step": 20195 }, { "epoch": 0.19011764705882353, "grad_norm": 0.7017146344365394, "learning_rate": 4.5880875162517325e-06, "loss": 0.0340071827173233, "step": 20200 }, { "epoch": 0.19016470588235293, "grad_norm": 0.6223463546770398, "learning_rate": 4.587519760929583e-06, "loss": 0.028404009342193604, "step": 20205 }, { "epoch": 0.19021176470588236, "grad_norm": 0.35281413203987305, "learning_rate": 4.586952216326834e-06, "loss": 0.022366249561309816, "step": 20210 }, { "epoch": 0.19025882352941176, "grad_norm": 0.586790011814126, "learning_rate": 4.586384882313171e-06, "loss": 0.022734850645065308, "step": 20215 }, { "epoch": 0.19030588235294119, "grad_norm": 0.5974884288195521, "learning_rate": 4.585817758758393e-06, "loss": 0.03022617697715759, "step": 20220 }, { "epoch": 0.19035294117647059, "grad_norm": 0.7945129993908858, "learning_rate": 4.585250845532412e-06, "loss": 0.02907278835773468, "step": 20225 }, { "epoch": 0.1904, "grad_norm": 0.5754853348508584, "learning_rate": 4.584684142505253e-06, "loss": 0.026673382520675658, "step": 20230 }, { "epoch": 0.1904470588235294, "grad_norm": 0.5748588480314551, "learning_rate": 4.5841176495470535e-06, "loss": 0.025887435674667357, "step": 20235 }, { "epoch": 0.1904941176470588, "grad_norm": 0.8109868197707573, "learning_rate": 4.583551366528062e-06, "loss": 0.02547425925731659, "step": 20240 }, { "epoch": 0.19054117647058824, "grad_norm": 0.5208143659901836, "learning_rate": 4.582985293318641e-06, "loss": 0.026366913318634035, "step": 20245 }, { "epoch": 0.19058823529411764, "grad_norm": 0.6565835318301784, "learning_rate": 4.582419429789262e-06, "loss": 0.024160562455654143, "step": 20250 }, { "epoch": 0.19063529411764707, "grad_norm": 0.603636843814998, "learning_rate": 4.581853775810513e-06, "loss": 0.026288175582885744, "step": 20255 }, { "epoch": 0.19068235294117647, "grad_norm": 0.6070245652279088, "learning_rate": 4.58128833125309e-06, "loss": 0.03132374286651611, "step": 20260 }, { "epoch": 0.19072941176470587, "grad_norm": 0.4570656688493529, "learning_rate": 4.580723095987801e-06, "loss": 0.025708186626434325, "step": 20265 }, { "epoch": 0.1907764705882353, "grad_norm": 0.6937006022614864, "learning_rate": 4.580158069885569e-06, "loss": 0.02701610028743744, "step": 20270 }, { "epoch": 0.1908235294117647, "grad_norm": 0.8631180232622617, "learning_rate": 4.579593252817423e-06, "loss": 0.03546724021434784, "step": 20275 }, { "epoch": 0.19087058823529413, "grad_norm": 0.7092267226557193, "learning_rate": 4.579028644654508e-06, "loss": 0.032151424884796144, "step": 20280 }, { "epoch": 0.19091764705882353, "grad_norm": 0.5184899990216872, "learning_rate": 4.5784642452680775e-06, "loss": 0.02220630943775177, "step": 20285 }, { "epoch": 0.19096470588235295, "grad_norm": 0.5109565927844402, "learning_rate": 4.577900054529498e-06, "loss": 0.028803709149360656, "step": 20290 }, { "epoch": 0.19101176470588235, "grad_norm": 0.956452842250825, "learning_rate": 4.577336072310244e-06, "loss": 0.027084988355636597, "step": 20295 }, { "epoch": 0.19105882352941175, "grad_norm": 0.5644495972018717, "learning_rate": 4.576772298481906e-06, "loss": 0.027287238836288454, "step": 20300 }, { "epoch": 0.19110588235294118, "grad_norm": 0.7236201034027993, "learning_rate": 4.576208732916177e-06, "loss": 0.03136751651763916, "step": 20305 }, { "epoch": 0.19115294117647058, "grad_norm": 0.5829161392758802, "learning_rate": 4.57564537548487e-06, "loss": 0.025429767370224, "step": 20310 }, { "epoch": 0.1912, "grad_norm": 0.5591792788084139, "learning_rate": 4.575082226059901e-06, "loss": 0.02529638409614563, "step": 20315 }, { "epoch": 0.1912470588235294, "grad_norm": 0.5780428669247084, "learning_rate": 4.5745192845133e-06, "loss": 0.02202315330505371, "step": 20320 }, { "epoch": 0.19129411764705884, "grad_norm": 0.6741127984164251, "learning_rate": 4.573956550717207e-06, "loss": 0.029319411516189574, "step": 20325 }, { "epoch": 0.19134117647058824, "grad_norm": 1.3761352404675924, "learning_rate": 4.573394024543871e-06, "loss": 0.027084314823150636, "step": 20330 }, { "epoch": 0.19138823529411764, "grad_norm": 0.7045183643362933, "learning_rate": 4.5728317058656505e-06, "loss": 0.02784053087234497, "step": 20335 }, { "epoch": 0.19143529411764706, "grad_norm": 0.4323395596515925, "learning_rate": 4.5722695945550165e-06, "loss": 0.025277763605117798, "step": 20340 }, { "epoch": 0.19148235294117646, "grad_norm": 0.5774207126311295, "learning_rate": 4.571707690484547e-06, "loss": 0.026914292573928834, "step": 20345 }, { "epoch": 0.1915294117647059, "grad_norm": 0.5526152704940536, "learning_rate": 4.5711459935269295e-06, "loss": 0.029181081056594848, "step": 20350 }, { "epoch": 0.1915764705882353, "grad_norm": 0.5382876488913239, "learning_rate": 4.570584503554964e-06, "loss": 0.027170100808143617, "step": 20355 }, { "epoch": 0.1916235294117647, "grad_norm": 0.7200375453854182, "learning_rate": 4.570023220441558e-06, "loss": 0.029491376876831055, "step": 20360 }, { "epoch": 0.19167058823529412, "grad_norm": 0.7393210002066732, "learning_rate": 4.569462144059725e-06, "loss": 0.02460949420928955, "step": 20365 }, { "epoch": 0.19171764705882352, "grad_norm": 0.5273609948559208, "learning_rate": 4.568901274282593e-06, "loss": 0.029580098390579224, "step": 20370 }, { "epoch": 0.19176470588235295, "grad_norm": 0.8912982006496452, "learning_rate": 4.5683406109833964e-06, "loss": 0.03565784096717835, "step": 20375 }, { "epoch": 0.19181176470588235, "grad_norm": 0.5369726626325191, "learning_rate": 4.567780154035478e-06, "loss": 0.023719751834869386, "step": 20380 }, { "epoch": 0.19185882352941178, "grad_norm": 0.5435215380488295, "learning_rate": 4.567219903312292e-06, "loss": 0.024169832468032837, "step": 20385 }, { "epoch": 0.19190588235294118, "grad_norm": 0.6475588165412379, "learning_rate": 4.566659858687396e-06, "loss": 0.028150290250778198, "step": 20390 }, { "epoch": 0.19195294117647058, "grad_norm": 0.49274744678701865, "learning_rate": 4.566100020034462e-06, "loss": 0.025862908363342284, "step": 20395 }, { "epoch": 0.192, "grad_norm": 0.6008888567147029, "learning_rate": 4.565540387227267e-06, "loss": 0.022642701864242554, "step": 20400 }, { "epoch": 0.1920470588235294, "grad_norm": 0.5883226780496303, "learning_rate": 4.564980960139697e-06, "loss": 0.030661377310752868, "step": 20405 }, { "epoch": 0.19209411764705883, "grad_norm": 0.5290154740528423, "learning_rate": 4.564421738645747e-06, "loss": 0.025374898314476015, "step": 20410 }, { "epoch": 0.19214117647058823, "grad_norm": 0.5237838155095246, "learning_rate": 4.563862722619518e-06, "loss": 0.02615034580230713, "step": 20415 }, { "epoch": 0.19218823529411766, "grad_norm": 0.6549463273970557, "learning_rate": 4.56330391193522e-06, "loss": 0.028782463073730467, "step": 20420 }, { "epoch": 0.19223529411764706, "grad_norm": 0.5700407906569289, "learning_rate": 4.562745306467173e-06, "loss": 0.0245738610625267, "step": 20425 }, { "epoch": 0.19228235294117646, "grad_norm": 0.784367762262725, "learning_rate": 4.5621869060898e-06, "loss": 0.025631588697433472, "step": 20430 }, { "epoch": 0.1923294117647059, "grad_norm": 0.8581992030656821, "learning_rate": 4.561628710677634e-06, "loss": 0.02199649065732956, "step": 20435 }, { "epoch": 0.1923764705882353, "grad_norm": 0.9740612711982298, "learning_rate": 4.561070720105319e-06, "loss": 0.027732044458389282, "step": 20440 }, { "epoch": 0.19242352941176472, "grad_norm": 0.6893479873601034, "learning_rate": 4.560512934247599e-06, "loss": 0.024215897917747496, "step": 20445 }, { "epoch": 0.19247058823529412, "grad_norm": 0.5843628677305391, "learning_rate": 4.559955352979331e-06, "loss": 0.02463410943746567, "step": 20450 }, { "epoch": 0.19251764705882352, "grad_norm": 0.5945214447621355, "learning_rate": 4.559397976175477e-06, "loss": 0.0248775839805603, "step": 20455 }, { "epoch": 0.19256470588235294, "grad_norm": 0.5464147149931172, "learning_rate": 4.558840803711105e-06, "loss": 0.028032207489013673, "step": 20460 }, { "epoch": 0.19261176470588234, "grad_norm": 0.5034515849971564, "learning_rate": 4.5582838354613915e-06, "loss": 0.027905601263046264, "step": 20465 }, { "epoch": 0.19265882352941177, "grad_norm": 0.5452403957971341, "learning_rate": 4.557727071301621e-06, "loss": 0.02921591103076935, "step": 20470 }, { "epoch": 0.19270588235294117, "grad_norm": 0.8702271734122227, "learning_rate": 4.55717051110718e-06, "loss": 0.03373079299926758, "step": 20475 }, { "epoch": 0.1927529411764706, "grad_norm": 0.5767334420296854, "learning_rate": 4.556614154753566e-06, "loss": 0.027829399704933165, "step": 20480 }, { "epoch": 0.1928, "grad_norm": 0.45819695989671716, "learning_rate": 4.55605800211638e-06, "loss": 0.02350752204656601, "step": 20485 }, { "epoch": 0.1928470588235294, "grad_norm": 0.6660850474734222, "learning_rate": 4.555502053071332e-06, "loss": 0.028326046466827393, "step": 20490 }, { "epoch": 0.19289411764705883, "grad_norm": 0.5736901402051264, "learning_rate": 4.554946307494235e-06, "loss": 0.028852435946464538, "step": 20495 }, { "epoch": 0.19294117647058823, "grad_norm": 0.6288129942105498, "learning_rate": 4.55439076526101e-06, "loss": 0.031161856651306153, "step": 20500 }, { "epoch": 0.19298823529411765, "grad_norm": 0.5103119921621877, "learning_rate": 4.553835426247684e-06, "loss": 0.02973533272743225, "step": 20505 }, { "epoch": 0.19303529411764705, "grad_norm": 0.7107614961027432, "learning_rate": 4.553280290330388e-06, "loss": 0.02961873710155487, "step": 20510 }, { "epoch": 0.19308235294117648, "grad_norm": 0.8115951990424964, "learning_rate": 4.552725357385361e-06, "loss": 0.030568802356719972, "step": 20515 }, { "epoch": 0.19312941176470588, "grad_norm": 0.6059096739448147, "learning_rate": 4.552170627288946e-06, "loss": 0.027095341682434083, "step": 20520 }, { "epoch": 0.19317647058823528, "grad_norm": 0.6976879536999635, "learning_rate": 4.551616099917593e-06, "loss": 0.03224882483482361, "step": 20525 }, { "epoch": 0.1932235294117647, "grad_norm": 0.5540980122208649, "learning_rate": 4.551061775147855e-06, "loss": 0.029129907488822937, "step": 20530 }, { "epoch": 0.1932705882352941, "grad_norm": 0.6200372969114702, "learning_rate": 4.550507652856392e-06, "loss": 0.022396069765090943, "step": 20535 }, { "epoch": 0.19331764705882354, "grad_norm": 0.48761626445142336, "learning_rate": 4.549953732919969e-06, "loss": 0.023836666345596315, "step": 20540 }, { "epoch": 0.19336470588235294, "grad_norm": 1.0113268348159006, "learning_rate": 4.5494000152154544e-06, "loss": 0.02800326347351074, "step": 20545 }, { "epoch": 0.19341176470588237, "grad_norm": 1.0641394092719072, "learning_rate": 4.548846499619823e-06, "loss": 0.031556087732315066, "step": 20550 }, { "epoch": 0.19345882352941177, "grad_norm": 0.8566797481003643, "learning_rate": 4.548293186010154e-06, "loss": 0.027741333842277525, "step": 20555 }, { "epoch": 0.19350588235294117, "grad_norm": 0.639271670059036, "learning_rate": 4.547740074263632e-06, "loss": 0.028367874026298524, "step": 20560 }, { "epoch": 0.1935529411764706, "grad_norm": 0.6257430616337353, "learning_rate": 4.5471871642575445e-06, "loss": 0.02594282627105713, "step": 20565 }, { "epoch": 0.1936, "grad_norm": 0.6779305264159036, "learning_rate": 4.546634455869284e-06, "loss": 0.02553880214691162, "step": 20570 }, { "epoch": 0.19364705882352942, "grad_norm": 0.6391637734349588, "learning_rate": 4.546081948976348e-06, "loss": 0.02541956901550293, "step": 20575 }, { "epoch": 0.19369411764705882, "grad_norm": 0.692366758205936, "learning_rate": 4.545529643456339e-06, "loss": 0.02534353733062744, "step": 20580 }, { "epoch": 0.19374117647058822, "grad_norm": 2.216976590631678, "learning_rate": 4.54497753918696e-06, "loss": 0.02550649642944336, "step": 20585 }, { "epoch": 0.19378823529411765, "grad_norm": 0.5406689189762495, "learning_rate": 4.544425636046021e-06, "loss": 0.02482195496559143, "step": 20590 }, { "epoch": 0.19383529411764705, "grad_norm": 1.235532950837452, "learning_rate": 4.543873933911437e-06, "loss": 0.028553709387779236, "step": 20595 }, { "epoch": 0.19388235294117648, "grad_norm": 0.9075784765842758, "learning_rate": 4.543322432661222e-06, "loss": 0.029490482807159425, "step": 20600 }, { "epoch": 0.19392941176470588, "grad_norm": 0.7724724955275049, "learning_rate": 4.542771132173497e-06, "loss": 0.021450527012348175, "step": 20605 }, { "epoch": 0.1939764705882353, "grad_norm": 0.4625196301733423, "learning_rate": 4.542220032326487e-06, "loss": 0.03058076500892639, "step": 20610 }, { "epoch": 0.1940235294117647, "grad_norm": 0.7939325728944188, "learning_rate": 4.541669132998519e-06, "loss": 0.028143715858459473, "step": 20615 }, { "epoch": 0.1940705882352941, "grad_norm": 0.6106640276794496, "learning_rate": 4.541118434068023e-06, "loss": 0.027338007092475893, "step": 20620 }, { "epoch": 0.19411764705882353, "grad_norm": 0.6079387104779361, "learning_rate": 4.540567935413532e-06, "loss": 0.025189006328582765, "step": 20625 }, { "epoch": 0.19416470588235293, "grad_norm": 0.7115375468444323, "learning_rate": 4.540017636913686e-06, "loss": 0.03174129724502563, "step": 20630 }, { "epoch": 0.19421176470588236, "grad_norm": 0.5947991161976862, "learning_rate": 4.539467538447222e-06, "loss": 0.030556762218475343, "step": 20635 }, { "epoch": 0.19425882352941176, "grad_norm": 0.5194368893105853, "learning_rate": 4.538917639892982e-06, "loss": 0.031095939874649047, "step": 20640 }, { "epoch": 0.1943058823529412, "grad_norm": 0.5844946028485969, "learning_rate": 4.538367941129913e-06, "loss": 0.030271396040916443, "step": 20645 }, { "epoch": 0.1943529411764706, "grad_norm": 0.8366357037226684, "learning_rate": 4.537818442037062e-06, "loss": 0.03233969509601593, "step": 20650 }, { "epoch": 0.1944, "grad_norm": 0.6751852852049095, "learning_rate": 4.53726914249358e-06, "loss": 0.024359676241874694, "step": 20655 }, { "epoch": 0.19444705882352942, "grad_norm": 0.5588129139142095, "learning_rate": 4.536720042378718e-06, "loss": 0.030283504724502565, "step": 20660 }, { "epoch": 0.19449411764705882, "grad_norm": 0.8443699785558184, "learning_rate": 4.536171141571834e-06, "loss": 0.024803729355335237, "step": 20665 }, { "epoch": 0.19454117647058825, "grad_norm": 0.6692590983275601, "learning_rate": 4.535622439952383e-06, "loss": 0.028182730078697205, "step": 20670 }, { "epoch": 0.19458823529411765, "grad_norm": 0.6082041132292697, "learning_rate": 4.535073937399924e-06, "loss": 0.028167185187339783, "step": 20675 }, { "epoch": 0.19463529411764705, "grad_norm": 0.6243056491100957, "learning_rate": 4.534525633794119e-06, "loss": 0.0229397252202034, "step": 20680 }, { "epoch": 0.19468235294117647, "grad_norm": 0.6136274554315614, "learning_rate": 4.533977529014731e-06, "loss": 0.029755178093910217, "step": 20685 }, { "epoch": 0.19472941176470587, "grad_norm": 0.810537538914043, "learning_rate": 4.533429622941623e-06, "loss": 0.027269479632377625, "step": 20690 }, { "epoch": 0.1947764705882353, "grad_norm": 0.8958397986567751, "learning_rate": 4.532881915454764e-06, "loss": 0.029625308513641358, "step": 20695 }, { "epoch": 0.1948235294117647, "grad_norm": 0.5697880412272953, "learning_rate": 4.53233440643422e-06, "loss": 0.027305397391319274, "step": 20700 }, { "epoch": 0.19487058823529413, "grad_norm": 0.7355616865567461, "learning_rate": 4.531787095760159e-06, "loss": 0.03269065022468567, "step": 20705 }, { "epoch": 0.19491764705882353, "grad_norm": 0.4581454664951536, "learning_rate": 4.531239983312854e-06, "loss": 0.022342589497566224, "step": 20710 }, { "epoch": 0.19496470588235293, "grad_norm": 0.7440062897849962, "learning_rate": 4.530693068972675e-06, "loss": 0.028747397661209106, "step": 20715 }, { "epoch": 0.19501176470588236, "grad_norm": 0.7705183689590656, "learning_rate": 4.530146352620095e-06, "loss": 0.026341047883033753, "step": 20720 }, { "epoch": 0.19505882352941176, "grad_norm": 0.6250057882233008, "learning_rate": 4.529599834135688e-06, "loss": 0.050488603115081784, "step": 20725 }, { "epoch": 0.19510588235294118, "grad_norm": 0.6281713358785646, "learning_rate": 4.529053513400127e-06, "loss": 0.026353180408477783, "step": 20730 }, { "epoch": 0.19515294117647058, "grad_norm": 0.49991293184287255, "learning_rate": 4.528507390294187e-06, "loss": 0.023737382888793946, "step": 20735 }, { "epoch": 0.1952, "grad_norm": 0.540477835184857, "learning_rate": 4.527961464698746e-06, "loss": 0.024104177951812744, "step": 20740 }, { "epoch": 0.1952470588235294, "grad_norm": 0.7584611503699242, "learning_rate": 4.527415736494778e-06, "loss": 0.027278774976730348, "step": 20745 }, { "epoch": 0.1952941176470588, "grad_norm": 0.49778992543718176, "learning_rate": 4.52687020556336e-06, "loss": 0.024430552124977113, "step": 20750 }, { "epoch": 0.19534117647058824, "grad_norm": 0.5473940866643255, "learning_rate": 4.526324871785669e-06, "loss": 0.023962149024009706, "step": 20755 }, { "epoch": 0.19538823529411764, "grad_norm": 0.7887202333245892, "learning_rate": 4.5257797350429825e-06, "loss": 0.0323013573884964, "step": 20760 }, { "epoch": 0.19543529411764707, "grad_norm": 0.466954200786265, "learning_rate": 4.525234795216677e-06, "loss": 0.028701168298721314, "step": 20765 }, { "epoch": 0.19548235294117647, "grad_norm": 0.6714820059229198, "learning_rate": 4.52469005218823e-06, "loss": 0.031148573756217955, "step": 20770 }, { "epoch": 0.1955294117647059, "grad_norm": 0.66522241401307, "learning_rate": 4.524145505839219e-06, "loss": 0.02573336660861969, "step": 20775 }, { "epoch": 0.1955764705882353, "grad_norm": 0.9812519852421705, "learning_rate": 4.523601156051319e-06, "loss": 0.025444728136062623, "step": 20780 }, { "epoch": 0.1956235294117647, "grad_norm": 0.5938065616821614, "learning_rate": 4.523057002706307e-06, "loss": 0.026302027702331542, "step": 20785 }, { "epoch": 0.19567058823529412, "grad_norm": 0.6340679231373222, "learning_rate": 4.52251304568606e-06, "loss": 0.024133625626564025, "step": 20790 }, { "epoch": 0.19571764705882352, "grad_norm": 0.7285410063012338, "learning_rate": 4.521969284872551e-06, "loss": 0.026129907369613646, "step": 20795 }, { "epoch": 0.19576470588235295, "grad_norm": 0.6611769348134944, "learning_rate": 4.521425720147856e-06, "loss": 0.02762371003627777, "step": 20800 }, { "epoch": 0.19581176470588235, "grad_norm": 0.6546148109518717, "learning_rate": 4.520882351394149e-06, "loss": 0.025206518173217774, "step": 20805 }, { "epoch": 0.19585882352941175, "grad_norm": 0.6300543628877828, "learning_rate": 4.520339178493703e-06, "loss": 0.023287561535835267, "step": 20810 }, { "epoch": 0.19590588235294118, "grad_norm": 0.4912308146629884, "learning_rate": 4.519796201328886e-06, "loss": 0.027457302808761595, "step": 20815 }, { "epoch": 0.19595294117647058, "grad_norm": 0.6946924065370258, "learning_rate": 4.5192534197821746e-06, "loss": 0.02562817931175232, "step": 20820 }, { "epoch": 0.196, "grad_norm": 0.6698957311644801, "learning_rate": 4.518710833736135e-06, "loss": 0.03047395944595337, "step": 20825 }, { "epoch": 0.1960470588235294, "grad_norm": 0.5574999215745751, "learning_rate": 4.518168443073436e-06, "loss": 0.02931104600429535, "step": 20830 }, { "epoch": 0.19609411764705884, "grad_norm": 0.6563073878771489, "learning_rate": 4.517626247676844e-06, "loss": 0.03157381415367126, "step": 20835 }, { "epoch": 0.19614117647058824, "grad_norm": 0.8721526890318582, "learning_rate": 4.517084247429224e-06, "loss": 0.028815793991088866, "step": 20840 }, { "epoch": 0.19618823529411764, "grad_norm": 0.6316137059562489, "learning_rate": 4.516542442213541e-06, "loss": 0.024857425689697267, "step": 20845 }, { "epoch": 0.19623529411764706, "grad_norm": 0.619097299045035, "learning_rate": 4.516000831912855e-06, "loss": 0.03377910852432251, "step": 20850 }, { "epoch": 0.19628235294117646, "grad_norm": 0.5002206861008781, "learning_rate": 4.515459416410327e-06, "loss": 0.025317591428756715, "step": 20855 }, { "epoch": 0.1963294117647059, "grad_norm": 0.6379355435639412, "learning_rate": 4.514918195589214e-06, "loss": 0.02624525725841522, "step": 20860 }, { "epoch": 0.1963764705882353, "grad_norm": 0.6460477732857673, "learning_rate": 4.514377169332872e-06, "loss": 0.027788078784942626, "step": 20865 }, { "epoch": 0.19642352941176472, "grad_norm": 0.67879269403453, "learning_rate": 4.5138363375247546e-06, "loss": 0.023527050018310548, "step": 20870 }, { "epoch": 0.19647058823529412, "grad_norm": 0.7295398227689607, "learning_rate": 4.513295700048414e-06, "loss": 0.029606515169143678, "step": 20875 }, { "epoch": 0.19651764705882352, "grad_norm": 0.5379980210577721, "learning_rate": 4.512755256787499e-06, "loss": 0.022231468558311464, "step": 20880 }, { "epoch": 0.19656470588235295, "grad_norm": 0.4269746599494258, "learning_rate": 4.5122150076257554e-06, "loss": 0.030719009041786195, "step": 20885 }, { "epoch": 0.19661176470588235, "grad_norm": 0.8355787237351207, "learning_rate": 4.5116749524470285e-06, "loss": 0.02594478726387024, "step": 20890 }, { "epoch": 0.19665882352941177, "grad_norm": 0.6701546031219102, "learning_rate": 4.511135091135258e-06, "loss": 0.026529651880264283, "step": 20895 }, { "epoch": 0.19670588235294117, "grad_norm": 0.736124304070339, "learning_rate": 4.510595423574483e-06, "loss": 0.02670886814594269, "step": 20900 }, { "epoch": 0.19675294117647057, "grad_norm": 0.7548034283750985, "learning_rate": 4.510055949648838e-06, "loss": 0.02845162749290466, "step": 20905 }, { "epoch": 0.1968, "grad_norm": 0.6866744739387941, "learning_rate": 4.509516669242557e-06, "loss": 0.029140955209732054, "step": 20910 }, { "epoch": 0.1968470588235294, "grad_norm": 0.6038598978643197, "learning_rate": 4.508977582239968e-06, "loss": 0.02935936450958252, "step": 20915 }, { "epoch": 0.19689411764705883, "grad_norm": 0.5185126188195076, "learning_rate": 4.508438688525498e-06, "loss": 0.02664937973022461, "step": 20920 }, { "epoch": 0.19694117647058823, "grad_norm": 1.1291460391584756, "learning_rate": 4.507899987983669e-06, "loss": 0.03154151737689972, "step": 20925 }, { "epoch": 0.19698823529411766, "grad_norm": 0.5163174800303869, "learning_rate": 4.507361480499101e-06, "loss": 0.025706297159194945, "step": 20930 }, { "epoch": 0.19703529411764706, "grad_norm": 0.9009553773446702, "learning_rate": 4.50682316595651e-06, "loss": 0.026996806263923645, "step": 20935 }, { "epoch": 0.19708235294117646, "grad_norm": 0.7771571598997303, "learning_rate": 4.506285044240707e-06, "loss": 0.027541327476501464, "step": 20940 }, { "epoch": 0.1971294117647059, "grad_norm": 0.5853279826345421, "learning_rate": 4.5057471152366015e-06, "loss": 0.025620979070663453, "step": 20945 }, { "epoch": 0.1971764705882353, "grad_norm": 0.38499368917200344, "learning_rate": 4.5052093788291966e-06, "loss": 0.027201342582702636, "step": 20950 }, { "epoch": 0.19722352941176471, "grad_norm": 1.1110676465279985, "learning_rate": 4.504671834903595e-06, "loss": 0.0365979015827179, "step": 20955 }, { "epoch": 0.19727058823529411, "grad_norm": 0.592727148286851, "learning_rate": 4.504134483344991e-06, "loss": 0.03465582132339477, "step": 20960 }, { "epoch": 0.19731764705882354, "grad_norm": 0.5811539790861451, "learning_rate": 4.5035973240386785e-06, "loss": 0.031116220355033874, "step": 20965 }, { "epoch": 0.19736470588235294, "grad_norm": 0.5676423467091409, "learning_rate": 4.503060356870045e-06, "loss": 0.024886813759803773, "step": 20970 }, { "epoch": 0.19741176470588234, "grad_norm": 0.49904329368552525, "learning_rate": 4.502523581724574e-06, "loss": 0.023719611763954162, "step": 20975 }, { "epoch": 0.19745882352941177, "grad_norm": 0.6472753802163307, "learning_rate": 4.501986998487846e-06, "loss": 0.02629680037498474, "step": 20980 }, { "epoch": 0.19750588235294117, "grad_norm": 0.3203910997719214, "learning_rate": 4.501450607045534e-06, "loss": 0.022387944161891937, "step": 20985 }, { "epoch": 0.1975529411764706, "grad_norm": 0.43334622969932324, "learning_rate": 4.5009144072834086e-06, "loss": 0.026650288701057435, "step": 20990 }, { "epoch": 0.1976, "grad_norm": 1.8535697090643801, "learning_rate": 4.500378399087336e-06, "loss": 0.04037404656410217, "step": 20995 }, { "epoch": 0.1976470588235294, "grad_norm": 0.898932535223756, "learning_rate": 4.499842582343276e-06, "loss": 0.028438520431518555, "step": 21000 }, { "epoch": 0.19769411764705883, "grad_norm": 0.871804301219977, "learning_rate": 4.499306956937284e-06, "loss": 0.028670191764831543, "step": 21005 }, { "epoch": 0.19774117647058823, "grad_norm": 0.6441342779213676, "learning_rate": 4.49877152275551e-06, "loss": 0.028539153933525085, "step": 21010 }, { "epoch": 0.19778823529411765, "grad_norm": 0.49497032384817935, "learning_rate": 4.498236279684199e-06, "loss": 0.0224318727850914, "step": 21015 }, { "epoch": 0.19783529411764705, "grad_norm": 0.6538794940445178, "learning_rate": 4.4977012276096906e-06, "loss": 0.023165199160575866, "step": 21020 }, { "epoch": 0.19788235294117648, "grad_norm": 0.9184614143152902, "learning_rate": 4.49716636641842e-06, "loss": 0.026485365629196168, "step": 21025 }, { "epoch": 0.19792941176470588, "grad_norm": 0.6696880083048793, "learning_rate": 4.496631695996916e-06, "loss": 0.02876960039138794, "step": 21030 }, { "epoch": 0.19797647058823528, "grad_norm": 0.6384481793059285, "learning_rate": 4.4960972162318025e-06, "loss": 0.021406987309455873, "step": 21035 }, { "epoch": 0.1980235294117647, "grad_norm": 0.5475262017645497, "learning_rate": 4.4955629270097964e-06, "loss": 0.02629958391189575, "step": 21040 }, { "epoch": 0.1980705882352941, "grad_norm": 0.7674917085137716, "learning_rate": 4.495028828217709e-06, "loss": 0.032662692666053775, "step": 21045 }, { "epoch": 0.19811764705882354, "grad_norm": 1.2088043511199684, "learning_rate": 4.494494919742448e-06, "loss": 0.027686488628387452, "step": 21050 }, { "epoch": 0.19816470588235294, "grad_norm": 0.6002757458407307, "learning_rate": 4.493961201471012e-06, "loss": 0.02809408903121948, "step": 21055 }, { "epoch": 0.19821176470588237, "grad_norm": 0.6322354698030169, "learning_rate": 4.493427673290496e-06, "loss": 0.02550225257873535, "step": 21060 }, { "epoch": 0.19825882352941177, "grad_norm": 0.6190977643238981, "learning_rate": 4.4928943350880864e-06, "loss": 0.024295973777770995, "step": 21065 }, { "epoch": 0.19830588235294117, "grad_norm": 0.525556349420378, "learning_rate": 4.492361186751066e-06, "loss": 0.02742645740509033, "step": 21070 }, { "epoch": 0.1983529411764706, "grad_norm": 0.5526922730691647, "learning_rate": 4.491828228166809e-06, "loss": 0.027602764964103698, "step": 21075 }, { "epoch": 0.1984, "grad_norm": 0.5582375038602355, "learning_rate": 4.491295459222785e-06, "loss": 0.022905156016349792, "step": 21080 }, { "epoch": 0.19844705882352942, "grad_norm": 0.4822089713302145, "learning_rate": 4.490762879806554e-06, "loss": 0.027189576625823976, "step": 21085 }, { "epoch": 0.19849411764705882, "grad_norm": 0.6587960004070219, "learning_rate": 4.490230489805773e-06, "loss": 0.02291240692138672, "step": 21090 }, { "epoch": 0.19854117647058825, "grad_norm": 0.7185872156354361, "learning_rate": 4.489698289108191e-06, "loss": 0.0255096971988678, "step": 21095 }, { "epoch": 0.19858823529411765, "grad_norm": 0.4939920563132709, "learning_rate": 4.489166277601647e-06, "loss": 0.02851376235485077, "step": 21100 }, { "epoch": 0.19863529411764705, "grad_norm": 0.5320032533655338, "learning_rate": 4.488634455174078e-06, "loss": 0.02289184033870697, "step": 21105 }, { "epoch": 0.19868235294117648, "grad_norm": 0.8077994107473111, "learning_rate": 4.488102821713511e-06, "loss": 0.028253802657127382, "step": 21110 }, { "epoch": 0.19872941176470588, "grad_norm": 0.6018174085183571, "learning_rate": 4.487571377108066e-06, "loss": 0.026443374156951905, "step": 21115 }, { "epoch": 0.1987764705882353, "grad_norm": 0.6705950040575555, "learning_rate": 4.487040121245955e-06, "loss": 0.029495733976364135, "step": 21120 }, { "epoch": 0.1988235294117647, "grad_norm": 0.6542453395083354, "learning_rate": 4.486509054015486e-06, "loss": 0.024645574390888214, "step": 21125 }, { "epoch": 0.1988705882352941, "grad_norm": 0.6018555329036726, "learning_rate": 4.4859781753050556e-06, "loss": 0.02403417378664017, "step": 21130 }, { "epoch": 0.19891764705882353, "grad_norm": 0.5629323795758469, "learning_rate": 4.485447485003154e-06, "loss": 0.024683068692684173, "step": 21135 }, { "epoch": 0.19896470588235293, "grad_norm": 0.6571441101903469, "learning_rate": 4.4849169829983656e-06, "loss": 0.023187386989593505, "step": 21140 }, { "epoch": 0.19901176470588236, "grad_norm": 0.7274714267954392, "learning_rate": 4.484386669179363e-06, "loss": 0.032761764526367185, "step": 21145 }, { "epoch": 0.19905882352941176, "grad_norm": 0.6578601164786279, "learning_rate": 4.483856543434917e-06, "loss": 0.025053706765174866, "step": 21150 }, { "epoch": 0.1991058823529412, "grad_norm": 1.0213826002929478, "learning_rate": 4.483326605653885e-06, "loss": 0.03131826519966126, "step": 21155 }, { "epoch": 0.1991529411764706, "grad_norm": 0.619596449838254, "learning_rate": 4.4827968557252176e-06, "loss": 0.02937648296356201, "step": 21160 }, { "epoch": 0.1992, "grad_norm": 0.6223785554481681, "learning_rate": 4.482267293537958e-06, "loss": 0.02372618317604065, "step": 21165 }, { "epoch": 0.19924705882352942, "grad_norm": 0.5632904413624218, "learning_rate": 4.481737918981242e-06, "loss": 0.028015753626823424, "step": 21170 }, { "epoch": 0.19929411764705882, "grad_norm": 0.6422833406746987, "learning_rate": 4.481208731944297e-06, "loss": 0.030284440517425536, "step": 21175 }, { "epoch": 0.19934117647058824, "grad_norm": 0.5889316994692875, "learning_rate": 4.480679732316437e-06, "loss": 0.025599583983421326, "step": 21180 }, { "epoch": 0.19938823529411764, "grad_norm": 0.7183156632688812, "learning_rate": 4.480150919987075e-06, "loss": 0.027176940441131593, "step": 21185 }, { "epoch": 0.19943529411764707, "grad_norm": 0.6089319800380413, "learning_rate": 4.47962229484571e-06, "loss": 0.02182178646326065, "step": 21190 }, { "epoch": 0.19948235294117647, "grad_norm": 0.6828696160577551, "learning_rate": 4.479093856781935e-06, "loss": 0.023508578538894653, "step": 21195 }, { "epoch": 0.19952941176470587, "grad_norm": 0.6581381142265434, "learning_rate": 4.478565605685431e-06, "loss": 0.027971488237380982, "step": 21200 }, { "epoch": 0.1995764705882353, "grad_norm": 0.6309233983756992, "learning_rate": 4.478037541445975e-06, "loss": 0.028514602780342103, "step": 21205 }, { "epoch": 0.1996235294117647, "grad_norm": 0.5741935974036534, "learning_rate": 4.47750966395343e-06, "loss": 0.0336182564496994, "step": 21210 }, { "epoch": 0.19967058823529413, "grad_norm": 0.5660230379089144, "learning_rate": 4.476981973097755e-06, "loss": 0.029553759098052978, "step": 21215 }, { "epoch": 0.19971764705882353, "grad_norm": 0.39248915756941466, "learning_rate": 4.476454468768992e-06, "loss": 0.02507094144821167, "step": 21220 }, { "epoch": 0.19976470588235293, "grad_norm": 0.5087041725392383, "learning_rate": 4.4759271508572814e-06, "loss": 0.026211643218994142, "step": 21225 }, { "epoch": 0.19981176470588236, "grad_norm": 0.510095455342406, "learning_rate": 4.475400019252851e-06, "loss": 0.022738076746463776, "step": 21230 }, { "epoch": 0.19985882352941176, "grad_norm": 0.5729931322595284, "learning_rate": 4.474873073846019e-06, "loss": 0.022737161815166475, "step": 21235 }, { "epoch": 0.19990588235294118, "grad_norm": 1.0605017732756268, "learning_rate": 4.474346314527194e-06, "loss": 0.02608751952648163, "step": 21240 }, { "epoch": 0.19995294117647058, "grad_norm": 0.790690595753887, "learning_rate": 4.4738197411868756e-06, "loss": 0.025946179032325746, "step": 21245 }, { "epoch": 0.2, "grad_norm": 0.7066508940295588, "learning_rate": 4.473293353715653e-06, "loss": 0.03201587498188019, "step": 21250 }, { "epoch": 0.2000470588235294, "grad_norm": 0.8266160584976623, "learning_rate": 4.472767152004205e-06, "loss": 0.026999878883361816, "step": 21255 }, { "epoch": 0.2000941176470588, "grad_norm": 0.6326478716327875, "learning_rate": 4.472241135943301e-06, "loss": 0.02482413947582245, "step": 21260 }, { "epoch": 0.20014117647058824, "grad_norm": 0.49396069672040727, "learning_rate": 4.471715305423801e-06, "loss": 0.020820340514183043, "step": 21265 }, { "epoch": 0.20018823529411764, "grad_norm": 0.6082998818032762, "learning_rate": 4.471189660336653e-06, "loss": 0.022189909219741823, "step": 21270 }, { "epoch": 0.20023529411764707, "grad_norm": 0.6800217267478966, "learning_rate": 4.4706642005728975e-06, "loss": 0.02830217182636261, "step": 21275 }, { "epoch": 0.20028235294117647, "grad_norm": 0.654562488891467, "learning_rate": 4.470138926023662e-06, "loss": 0.02741277515888214, "step": 21280 }, { "epoch": 0.2003294117647059, "grad_norm": 0.6196290884060041, "learning_rate": 4.4696138365801635e-06, "loss": 0.024046897888183594, "step": 21285 }, { "epoch": 0.2003764705882353, "grad_norm": 0.5817481473017289, "learning_rate": 4.46908893213371e-06, "loss": 0.029911279678344727, "step": 21290 }, { "epoch": 0.2004235294117647, "grad_norm": 0.7074571329840195, "learning_rate": 4.4685642125756996e-06, "loss": 0.026125574111938478, "step": 21295 }, { "epoch": 0.20047058823529412, "grad_norm": 0.5014672660617312, "learning_rate": 4.468039677797616e-06, "loss": 0.023114793002605438, "step": 21300 }, { "epoch": 0.20051764705882352, "grad_norm": 0.6182593802069972, "learning_rate": 4.467515327691035e-06, "loss": 0.020285038650035857, "step": 21305 }, { "epoch": 0.20056470588235295, "grad_norm": 0.5673542098921207, "learning_rate": 4.466991162147622e-06, "loss": 0.026328104734420776, "step": 21310 }, { "epoch": 0.20061176470588235, "grad_norm": 0.5093947755106853, "learning_rate": 4.466467181059129e-06, "loss": 0.023050850629806517, "step": 21315 }, { "epoch": 0.20065882352941178, "grad_norm": 0.8382679912119864, "learning_rate": 4.465943384317397e-06, "loss": 0.02477583885192871, "step": 21320 }, { "epoch": 0.20070588235294118, "grad_norm": 0.46947345717014805, "learning_rate": 4.465419771814359e-06, "loss": 0.022284573316574095, "step": 21325 }, { "epoch": 0.20075294117647058, "grad_norm": 0.37036767552812505, "learning_rate": 4.464896343442033e-06, "loss": 0.02461118996143341, "step": 21330 }, { "epoch": 0.2008, "grad_norm": 0.8633155591351426, "learning_rate": 4.464373099092526e-06, "loss": 0.028143054246902464, "step": 21335 }, { "epoch": 0.2008470588235294, "grad_norm": 0.7380535744261144, "learning_rate": 4.463850038658036e-06, "loss": 0.03055635392665863, "step": 21340 }, { "epoch": 0.20089411764705883, "grad_norm": 0.7145409451413657, "learning_rate": 4.463327162030847e-06, "loss": 0.03887307643890381, "step": 21345 }, { "epoch": 0.20094117647058823, "grad_norm": 0.443468062036989, "learning_rate": 4.462804469103334e-06, "loss": 0.023834289610385896, "step": 21350 }, { "epoch": 0.20098823529411763, "grad_norm": 0.6039899845283283, "learning_rate": 4.4622819597679556e-06, "loss": 0.027022415399551393, "step": 21355 }, { "epoch": 0.20103529411764706, "grad_norm": 0.49431156250286346, "learning_rate": 4.461759633917263e-06, "loss": 0.026473116874694825, "step": 21360 }, { "epoch": 0.20108235294117646, "grad_norm": 0.45455635427216146, "learning_rate": 4.461237491443893e-06, "loss": 0.02448495030403137, "step": 21365 }, { "epoch": 0.2011294117647059, "grad_norm": 0.5435780306259235, "learning_rate": 4.460715532240571e-06, "loss": 0.022930648922920228, "step": 21370 }, { "epoch": 0.2011764705882353, "grad_norm": 0.5879409050772819, "learning_rate": 4.4601937562001106e-06, "loss": 0.029863423109054564, "step": 21375 }, { "epoch": 0.20122352941176472, "grad_norm": 0.4557457895380328, "learning_rate": 4.459672163215413e-06, "loss": 0.023241136968135834, "step": 21380 }, { "epoch": 0.20127058823529412, "grad_norm": 0.6009633425459479, "learning_rate": 4.459150753179466e-06, "loss": 0.03969727754592896, "step": 21385 }, { "epoch": 0.20131764705882352, "grad_norm": 0.606071613336487, "learning_rate": 4.458629525985346e-06, "loss": 0.023739068210124968, "step": 21390 }, { "epoch": 0.20136470588235295, "grad_norm": 0.7413839203048526, "learning_rate": 4.458108481526218e-06, "loss": 0.036324766278266904, "step": 21395 }, { "epoch": 0.20141176470588235, "grad_norm": 0.5960509442270607, "learning_rate": 4.4575876196953314e-06, "loss": 0.02265215516090393, "step": 21400 }, { "epoch": 0.20145882352941177, "grad_norm": 0.6886310984635539, "learning_rate": 4.457066940386026e-06, "loss": 0.029269301891326906, "step": 21405 }, { "epoch": 0.20150588235294117, "grad_norm": 0.6046958992830103, "learning_rate": 4.456546443491725e-06, "loss": 0.030926287174224854, "step": 21410 }, { "epoch": 0.2015529411764706, "grad_norm": 0.6171427011002265, "learning_rate": 4.456026128905943e-06, "loss": 0.031404119729995725, "step": 21415 }, { "epoch": 0.2016, "grad_norm": 0.5311679312417522, "learning_rate": 4.45550599652228e-06, "loss": 0.02477797269821167, "step": 21420 }, { "epoch": 0.2016470588235294, "grad_norm": 0.4656602874854126, "learning_rate": 4.45498604623442e-06, "loss": 0.025015318393707277, "step": 21425 }, { "epoch": 0.20169411764705883, "grad_norm": 0.708938129014206, "learning_rate": 4.454466277936139e-06, "loss": 0.027337300777435302, "step": 21430 }, { "epoch": 0.20174117647058823, "grad_norm": 0.6905408672195875, "learning_rate": 4.453946691521296e-06, "loss": 0.029707491397857666, "step": 21435 }, { "epoch": 0.20178823529411766, "grad_norm": 0.7632844805190141, "learning_rate": 4.453427286883839e-06, "loss": 0.021065530180931092, "step": 21440 }, { "epoch": 0.20183529411764706, "grad_norm": 0.6558472788111798, "learning_rate": 4.4529080639177995e-06, "loss": 0.02667764127254486, "step": 21445 }, { "epoch": 0.20188235294117646, "grad_norm": 0.4892895224346532, "learning_rate": 4.4523890225172975e-06, "loss": 0.027757585048675537, "step": 21450 }, { "epoch": 0.20192941176470589, "grad_norm": 0.8052211217012133, "learning_rate": 4.451870162576539e-06, "loss": 0.02355828881263733, "step": 21455 }, { "epoch": 0.20197647058823529, "grad_norm": 0.6319301811828477, "learning_rate": 4.4513514839898196e-06, "loss": 0.021394182741642, "step": 21460 }, { "epoch": 0.2020235294117647, "grad_norm": 0.8005201072474429, "learning_rate": 4.450832986651513e-06, "loss": 0.025340288877487183, "step": 21465 }, { "epoch": 0.2020705882352941, "grad_norm": 0.6421030835471322, "learning_rate": 4.450314670456087e-06, "loss": 0.029188096523284912, "step": 21470 }, { "epoch": 0.20211764705882354, "grad_norm": 0.6636420058562417, "learning_rate": 4.449796535298092e-06, "loss": 0.023963458836078644, "step": 21475 }, { "epoch": 0.20216470588235294, "grad_norm": 0.8961771713111406, "learning_rate": 4.449278581072163e-06, "loss": 0.03287954926490784, "step": 21480 }, { "epoch": 0.20221176470588234, "grad_norm": 0.5612717400036366, "learning_rate": 4.448760807673025e-06, "loss": 0.02683660387992859, "step": 21485 }, { "epoch": 0.20225882352941177, "grad_norm": 0.9939967168251646, "learning_rate": 4.448243214995484e-06, "loss": 0.023220974206924438, "step": 21490 }, { "epoch": 0.20230588235294117, "grad_norm": 0.6684797721519282, "learning_rate": 4.447725802934436e-06, "loss": 0.030555397272109985, "step": 21495 }, { "epoch": 0.2023529411764706, "grad_norm": 0.7567599823285817, "learning_rate": 4.447208571384858e-06, "loss": 0.029205167293548585, "step": 21500 }, { "epoch": 0.2024, "grad_norm": 0.8871034409620214, "learning_rate": 4.446691520241817e-06, "loss": 0.026242068409919737, "step": 21505 }, { "epoch": 0.20244705882352942, "grad_norm": 0.6840953419825451, "learning_rate": 4.446174649400462e-06, "loss": 0.026959887146949767, "step": 21510 }, { "epoch": 0.20249411764705882, "grad_norm": 0.7087660776168639, "learning_rate": 4.445657958756028e-06, "loss": 0.026356831192970276, "step": 21515 }, { "epoch": 0.20254117647058822, "grad_norm": 0.7400639317348919, "learning_rate": 4.445141448203837e-06, "loss": 0.02757796049118042, "step": 21520 }, { "epoch": 0.20258823529411765, "grad_norm": 0.7283524595062605, "learning_rate": 4.444625117639296e-06, "loss": 0.027158674597740174, "step": 21525 }, { "epoch": 0.20263529411764705, "grad_norm": 0.6379784523746261, "learning_rate": 4.4441089669578935e-06, "loss": 0.025410491228103637, "step": 21530 }, { "epoch": 0.20268235294117648, "grad_norm": 0.6444561764225307, "learning_rate": 4.443592996055206e-06, "loss": 0.02729859948158264, "step": 21535 }, { "epoch": 0.20272941176470588, "grad_norm": 0.8658870900340369, "learning_rate": 4.443077204826895e-06, "loss": 0.027996498346328735, "step": 21540 }, { "epoch": 0.20277647058823528, "grad_norm": 0.47045488051728324, "learning_rate": 4.442561593168706e-06, "loss": 0.02172818034887314, "step": 21545 }, { "epoch": 0.2028235294117647, "grad_norm": 0.5662742258748622, "learning_rate": 4.442046160976466e-06, "loss": 0.024917729198932648, "step": 21550 }, { "epoch": 0.2028705882352941, "grad_norm": 0.7520644096095167, "learning_rate": 4.441530908146095e-06, "loss": 0.027799376845359804, "step": 21555 }, { "epoch": 0.20291764705882354, "grad_norm": 0.4829692978621879, "learning_rate": 4.441015834573589e-06, "loss": 0.029491743445396422, "step": 21560 }, { "epoch": 0.20296470588235294, "grad_norm": 0.5637029945907133, "learning_rate": 4.440500940155031e-06, "loss": 0.023079711198806762, "step": 21565 }, { "epoch": 0.20301176470588236, "grad_norm": 0.5536329064347637, "learning_rate": 4.439986224786592e-06, "loss": 0.026079204678535462, "step": 21570 }, { "epoch": 0.20305882352941176, "grad_norm": 1.039036449400825, "learning_rate": 4.43947168836452e-06, "loss": 0.033609253168106076, "step": 21575 }, { "epoch": 0.20310588235294116, "grad_norm": 0.6430672722612175, "learning_rate": 4.438957330785155e-06, "loss": 0.025559276342391968, "step": 21580 }, { "epoch": 0.2031529411764706, "grad_norm": 0.654081011601188, "learning_rate": 4.4384431519449145e-06, "loss": 0.028020411729812622, "step": 21585 }, { "epoch": 0.2032, "grad_norm": 0.48402158977931936, "learning_rate": 4.437929151740304e-06, "loss": 0.019292131066322327, "step": 21590 }, { "epoch": 0.20324705882352942, "grad_norm": 0.7843415778885142, "learning_rate": 4.437415330067913e-06, "loss": 0.0271173894405365, "step": 21595 }, { "epoch": 0.20329411764705882, "grad_norm": 0.583790786485119, "learning_rate": 4.436901686824411e-06, "loss": 0.02557438611984253, "step": 21600 }, { "epoch": 0.20334117647058825, "grad_norm": 0.5676337703484695, "learning_rate": 4.436388221906556e-06, "loss": 0.032841643691062926, "step": 21605 }, { "epoch": 0.20338823529411765, "grad_norm": 0.74295892335848, "learning_rate": 4.435874935211186e-06, "loss": 0.02911847233772278, "step": 21610 }, { "epoch": 0.20343529411764705, "grad_norm": 0.6531671709329187, "learning_rate": 4.435361826635224e-06, "loss": 0.023105049133300783, "step": 21615 }, { "epoch": 0.20348235294117648, "grad_norm": 0.5736006591351541, "learning_rate": 4.434848896075675e-06, "loss": 0.02474707067012787, "step": 21620 }, { "epoch": 0.20352941176470588, "grad_norm": 0.5116003478330597, "learning_rate": 4.4343361434296324e-06, "loss": 0.023405596613883972, "step": 21625 }, { "epoch": 0.2035764705882353, "grad_norm": 0.5196525063672272, "learning_rate": 4.4338235685942645e-06, "loss": 0.023954012989997865, "step": 21630 }, { "epoch": 0.2036235294117647, "grad_norm": 0.43013906801969903, "learning_rate": 4.433311171466831e-06, "loss": 0.02762432098388672, "step": 21635 }, { "epoch": 0.20367058823529413, "grad_norm": 0.5888715506897324, "learning_rate": 4.432798951944669e-06, "loss": 0.029786962270736694, "step": 21640 }, { "epoch": 0.20371764705882353, "grad_norm": 0.7982930647849797, "learning_rate": 4.432286909925201e-06, "loss": 0.02879374623298645, "step": 21645 }, { "epoch": 0.20376470588235293, "grad_norm": 0.6714796279907778, "learning_rate": 4.4317750453059336e-06, "loss": 0.02945687472820282, "step": 21650 }, { "epoch": 0.20381176470588236, "grad_norm": 0.5300716791191388, "learning_rate": 4.431263357984452e-06, "loss": 0.02210685908794403, "step": 21655 }, { "epoch": 0.20385882352941176, "grad_norm": 0.8400750136154984, "learning_rate": 4.43075184785843e-06, "loss": 0.025736334919929504, "step": 21660 }, { "epoch": 0.2039058823529412, "grad_norm": 0.6603662399703597, "learning_rate": 4.43024051482562e-06, "loss": 0.028552943468093873, "step": 21665 }, { "epoch": 0.2039529411764706, "grad_norm": 0.5564613785453183, "learning_rate": 4.429729358783855e-06, "loss": 0.027071940898895263, "step": 21670 }, { "epoch": 0.204, "grad_norm": 0.9437525393172377, "learning_rate": 4.429218379631057e-06, "loss": 0.02932274341583252, "step": 21675 }, { "epoch": 0.20404705882352941, "grad_norm": 0.5381426922189614, "learning_rate": 4.428707577265226e-06, "loss": 0.024676814675331116, "step": 21680 }, { "epoch": 0.20409411764705881, "grad_norm": 0.568201460301439, "learning_rate": 4.428196951584445e-06, "loss": 0.026218846440315247, "step": 21685 }, { "epoch": 0.20414117647058824, "grad_norm": 0.5044466394168376, "learning_rate": 4.427686502486878e-06, "loss": 0.023926252126693727, "step": 21690 }, { "epoch": 0.20418823529411764, "grad_norm": 0.7164235768691981, "learning_rate": 4.427176229870776e-06, "loss": 0.031222113966941835, "step": 21695 }, { "epoch": 0.20423529411764707, "grad_norm": 0.565235604355109, "learning_rate": 4.426666133634465e-06, "loss": 0.026174911856651308, "step": 21700 }, { "epoch": 0.20428235294117647, "grad_norm": 0.5624919984673524, "learning_rate": 4.426156213676358e-06, "loss": 0.023444384336471558, "step": 21705 }, { "epoch": 0.20432941176470587, "grad_norm": 0.6366039283711207, "learning_rate": 4.425646469894949e-06, "loss": 0.02157982438802719, "step": 21710 }, { "epoch": 0.2043764705882353, "grad_norm": 0.6002226482724221, "learning_rate": 4.4251369021888126e-06, "loss": 0.02317272573709488, "step": 21715 }, { "epoch": 0.2044235294117647, "grad_norm": 0.5720496027957325, "learning_rate": 4.424627510456607e-06, "loss": 0.028201797604560853, "step": 21720 }, { "epoch": 0.20447058823529413, "grad_norm": 0.6502764008214528, "learning_rate": 4.424118294597069e-06, "loss": 0.028225046396255494, "step": 21725 }, { "epoch": 0.20451764705882353, "grad_norm": 0.513396175819958, "learning_rate": 4.4236092545090196e-06, "loss": 0.02555570602416992, "step": 21730 }, { "epoch": 0.20456470588235295, "grad_norm": 0.5842357685073319, "learning_rate": 4.423100390091362e-06, "loss": 0.022504732012748718, "step": 21735 }, { "epoch": 0.20461176470588235, "grad_norm": 0.514613237051035, "learning_rate": 4.422591701243077e-06, "loss": 0.02554783821105957, "step": 21740 }, { "epoch": 0.20465882352941175, "grad_norm": 0.5486693173297508, "learning_rate": 4.422083187863231e-06, "loss": 0.02367311865091324, "step": 21745 }, { "epoch": 0.20470588235294118, "grad_norm": 0.5814620823364031, "learning_rate": 4.421574849850968e-06, "loss": 0.02494193911552429, "step": 21750 }, { "epoch": 0.20475294117647058, "grad_norm": 0.8905579938531739, "learning_rate": 4.421066687105517e-06, "loss": 0.023297020792961122, "step": 21755 }, { "epoch": 0.2048, "grad_norm": 0.3927784572866874, "learning_rate": 4.4205586995261835e-06, "loss": 0.024684256315231322, "step": 21760 }, { "epoch": 0.2048470588235294, "grad_norm": 0.4255870427964411, "learning_rate": 4.420050887012356e-06, "loss": 0.027058571577072144, "step": 21765 }, { "epoch": 0.2048941176470588, "grad_norm": 0.6165222926722174, "learning_rate": 4.419543249463507e-06, "loss": 0.03793401718139648, "step": 21770 }, { "epoch": 0.20494117647058824, "grad_norm": 0.5745993575101557, "learning_rate": 4.419035786779185e-06, "loss": 0.025394135713577272, "step": 21775 }, { "epoch": 0.20498823529411764, "grad_norm": 0.6390631470403275, "learning_rate": 4.418528498859021e-06, "loss": 0.028445306420326232, "step": 21780 }, { "epoch": 0.20503529411764707, "grad_norm": 0.6196123019309687, "learning_rate": 4.418021385602727e-06, "loss": 0.02720843255519867, "step": 21785 }, { "epoch": 0.20508235294117647, "grad_norm": 0.4260383607723973, "learning_rate": 4.417514446910097e-06, "loss": 0.023354727029800414, "step": 21790 }, { "epoch": 0.2051294117647059, "grad_norm": 0.6770218739063301, "learning_rate": 4.417007682681001e-06, "loss": 0.027212125062942506, "step": 21795 }, { "epoch": 0.2051764705882353, "grad_norm": 0.6406927717763735, "learning_rate": 4.416501092815395e-06, "loss": 0.024011024832725526, "step": 21800 }, { "epoch": 0.2052235294117647, "grad_norm": 0.48540359991975734, "learning_rate": 4.4159946772133115e-06, "loss": 0.025596916675567627, "step": 21805 }, { "epoch": 0.20527058823529412, "grad_norm": 0.5279998272712253, "learning_rate": 4.415488435774864e-06, "loss": 0.025085338950157167, "step": 21810 }, { "epoch": 0.20531764705882352, "grad_norm": 0.5336253057106285, "learning_rate": 4.4149823684002464e-06, "loss": 0.02681472897529602, "step": 21815 }, { "epoch": 0.20536470588235295, "grad_norm": 0.8061608846408718, "learning_rate": 4.414476474989733e-06, "loss": 0.02649174928665161, "step": 21820 }, { "epoch": 0.20541176470588235, "grad_norm": 0.5252557953700087, "learning_rate": 4.413970755443677e-06, "loss": 0.02353396713733673, "step": 21825 }, { "epoch": 0.20545882352941178, "grad_norm": 0.5660427065860272, "learning_rate": 4.413465209662513e-06, "loss": 0.022964251041412354, "step": 21830 }, { "epoch": 0.20550588235294118, "grad_norm": 0.6424265570838769, "learning_rate": 4.412959837546755e-06, "loss": 0.0266552597284317, "step": 21835 }, { "epoch": 0.20555294117647058, "grad_norm": 0.7299665203855524, "learning_rate": 4.4124546389969956e-06, "loss": 0.025765770673751832, "step": 21840 }, { "epoch": 0.2056, "grad_norm": 0.7356863152920663, "learning_rate": 4.4119496139139066e-06, "loss": 0.03073829412460327, "step": 21845 }, { "epoch": 0.2056470588235294, "grad_norm": 0.572286490643696, "learning_rate": 4.411444762198243e-06, "loss": 0.02618076801300049, "step": 21850 }, { "epoch": 0.20569411764705883, "grad_norm": 0.6712981921705827, "learning_rate": 4.4109400837508365e-06, "loss": 0.02585026025772095, "step": 21855 }, { "epoch": 0.20574117647058823, "grad_norm": 0.7084829743021283, "learning_rate": 4.410435578472596e-06, "loss": 0.029403302073478698, "step": 21860 }, { "epoch": 0.20578823529411766, "grad_norm": 0.6283967231650348, "learning_rate": 4.409931246264515e-06, "loss": 0.025040721893310545, "step": 21865 }, { "epoch": 0.20583529411764706, "grad_norm": 0.7138221033534651, "learning_rate": 4.409427087027663e-06, "loss": 0.020602121949195862, "step": 21870 }, { "epoch": 0.20588235294117646, "grad_norm": 0.8778107837471364, "learning_rate": 4.408923100663187e-06, "loss": 0.026806330680847167, "step": 21875 }, { "epoch": 0.2059294117647059, "grad_norm": 0.5147790106818921, "learning_rate": 4.408419287072317e-06, "loss": 0.02516084015369415, "step": 21880 }, { "epoch": 0.2059764705882353, "grad_norm": 0.51300751215725, "learning_rate": 4.407915646156361e-06, "loss": 0.02246149182319641, "step": 21885 }, { "epoch": 0.20602352941176472, "grad_norm": 0.7417091516829735, "learning_rate": 4.407412177816704e-06, "loss": 0.02897353172302246, "step": 21890 }, { "epoch": 0.20607058823529412, "grad_norm": 0.7733167018689204, "learning_rate": 4.406908881954811e-06, "loss": 0.03221967816352844, "step": 21895 }, { "epoch": 0.20611764705882352, "grad_norm": 0.7187560720106462, "learning_rate": 4.406405758472225e-06, "loss": 0.024961283802986144, "step": 21900 }, { "epoch": 0.20616470588235294, "grad_norm": 0.6305085711085745, "learning_rate": 4.40590280727057e-06, "loss": 0.027417796850204467, "step": 21905 }, { "epoch": 0.20621176470588234, "grad_norm": 0.4930043698189823, "learning_rate": 4.405400028251545e-06, "loss": 0.025803405046463012, "step": 21910 }, { "epoch": 0.20625882352941177, "grad_norm": 1.0868916740006678, "learning_rate": 4.404897421316931e-06, "loss": 0.022465780377388, "step": 21915 }, { "epoch": 0.20630588235294117, "grad_norm": 0.574329535999574, "learning_rate": 4.404394986368584e-06, "loss": 0.030675628781318666, "step": 21920 }, { "epoch": 0.2063529411764706, "grad_norm": 0.4801767124308936, "learning_rate": 4.403892723308443e-06, "loss": 0.02862520217895508, "step": 21925 }, { "epoch": 0.2064, "grad_norm": 0.5704824946156436, "learning_rate": 4.4033906320385205e-06, "loss": 0.024623572826385498, "step": 21930 }, { "epoch": 0.2064470588235294, "grad_norm": 0.8942615978561077, "learning_rate": 4.402888712460909e-06, "loss": 0.021679842472076417, "step": 21935 }, { "epoch": 0.20649411764705883, "grad_norm": 0.6104842060464903, "learning_rate": 4.402386964477779e-06, "loss": 0.03039799928665161, "step": 21940 }, { "epoch": 0.20654117647058823, "grad_norm": 0.6524694124795529, "learning_rate": 4.4018853879913815e-06, "loss": 0.025210440158843994, "step": 21945 }, { "epoch": 0.20658823529411766, "grad_norm": 1.8837765308047614, "learning_rate": 4.401383982904041e-06, "loss": 0.02552245855331421, "step": 21950 }, { "epoch": 0.20663529411764706, "grad_norm": 0.508298240244913, "learning_rate": 4.400882749118163e-06, "loss": 0.026499345898628235, "step": 21955 }, { "epoch": 0.20668235294117648, "grad_norm": 0.6267348548617441, "learning_rate": 4.4003816865362296e-06, "loss": 0.025430023670196533, "step": 21960 }, { "epoch": 0.20672941176470588, "grad_norm": 0.5291274205578497, "learning_rate": 4.3998807950608e-06, "loss": 0.031075358390808105, "step": 21965 }, { "epoch": 0.20677647058823528, "grad_norm": 0.36269757712259204, "learning_rate": 4.399380074594514e-06, "loss": 0.02232416868209839, "step": 21970 }, { "epoch": 0.2068235294117647, "grad_norm": 0.731671315301, "learning_rate": 4.398879525040084e-06, "loss": 0.027039998769760133, "step": 21975 }, { "epoch": 0.2068705882352941, "grad_norm": 0.5265215133546144, "learning_rate": 4.398379146300306e-06, "loss": 0.030995309352874756, "step": 21980 }, { "epoch": 0.20691764705882354, "grad_norm": 0.8328746259911607, "learning_rate": 4.3978789382780465e-06, "loss": 0.02637707591056824, "step": 21985 }, { "epoch": 0.20696470588235294, "grad_norm": 0.5778842578053205, "learning_rate": 4.397378900876255e-06, "loss": 0.02510164976119995, "step": 21990 }, { "epoch": 0.20701176470588234, "grad_norm": 0.6138871988241726, "learning_rate": 4.396879033997957e-06, "loss": 0.03713657855987549, "step": 21995 }, { "epoch": 0.20705882352941177, "grad_norm": 0.6237160815164272, "learning_rate": 4.396379337546252e-06, "loss": 0.0260989248752594, "step": 22000 }, { "epoch": 0.20710588235294117, "grad_norm": 0.49007958382514055, "learning_rate": 4.39587981142432e-06, "loss": 0.025979006290435792, "step": 22005 }, { "epoch": 0.2071529411764706, "grad_norm": 0.4911338036229784, "learning_rate": 4.3953804555354165e-06, "loss": 0.021611711382865904, "step": 22010 }, { "epoch": 0.2072, "grad_norm": 0.6230531328138955, "learning_rate": 4.394881269782874e-06, "loss": 0.02040097564458847, "step": 22015 }, { "epoch": 0.20724705882352942, "grad_norm": 0.6243300577691125, "learning_rate": 4.394382254070102e-06, "loss": 0.02536349892616272, "step": 22020 }, { "epoch": 0.20729411764705882, "grad_norm": 0.41154648950798994, "learning_rate": 4.393883408300589e-06, "loss": 0.022161757946014403, "step": 22025 }, { "epoch": 0.20734117647058822, "grad_norm": 0.7621619988926193, "learning_rate": 4.393384732377895e-06, "loss": 0.029007571935653686, "step": 22030 }, { "epoch": 0.20738823529411765, "grad_norm": 0.4342792676888838, "learning_rate": 4.39288622620566e-06, "loss": 0.021418526768684387, "step": 22035 }, { "epoch": 0.20743529411764705, "grad_norm": 0.848722609444538, "learning_rate": 4.392387889687602e-06, "loss": 0.03565249741077423, "step": 22040 }, { "epoch": 0.20748235294117648, "grad_norm": 0.6554027055733186, "learning_rate": 4.391889722727511e-06, "loss": 0.026811397075653075, "step": 22045 }, { "epoch": 0.20752941176470588, "grad_norm": 0.5770609086853585, "learning_rate": 4.3913917252292575e-06, "loss": 0.02192641794681549, "step": 22050 }, { "epoch": 0.2075764705882353, "grad_norm": 0.4594266322609588, "learning_rate": 4.390893897096786e-06, "loss": 0.03291564285755157, "step": 22055 }, { "epoch": 0.2076235294117647, "grad_norm": 0.5171508947422198, "learning_rate": 4.3903962382341195e-06, "loss": 0.023617975413799286, "step": 22060 }, { "epoch": 0.2076705882352941, "grad_norm": 0.7181945820116589, "learning_rate": 4.389898748545354e-06, "loss": 0.024378547072410585, "step": 22065 }, { "epoch": 0.20771764705882353, "grad_norm": 0.48712816315238683, "learning_rate": 4.389401427934663e-06, "loss": 0.02854410409927368, "step": 22070 }, { "epoch": 0.20776470588235293, "grad_norm": 0.35581034955870466, "learning_rate": 4.388904276306296e-06, "loss": 0.020897941291332246, "step": 22075 }, { "epoch": 0.20781176470588236, "grad_norm": 0.5960896278720851, "learning_rate": 4.388407293564579e-06, "loss": 0.022349968552589417, "step": 22080 }, { "epoch": 0.20785882352941176, "grad_norm": 0.7524451748900529, "learning_rate": 4.387910479613913e-06, "loss": 0.026169902086257933, "step": 22085 }, { "epoch": 0.20790588235294116, "grad_norm": 0.6004975783451197, "learning_rate": 4.387413834358775e-06, "loss": 0.025632885098457337, "step": 22090 }, { "epoch": 0.2079529411764706, "grad_norm": 0.6084293253376168, "learning_rate": 4.386917357703718e-06, "loss": 0.022499047219753265, "step": 22095 }, { "epoch": 0.208, "grad_norm": 0.7371630723792103, "learning_rate": 4.3864210495533705e-06, "loss": 0.023833946883678438, "step": 22100 }, { "epoch": 0.20804705882352942, "grad_norm": 0.6284872243617708, "learning_rate": 4.385924909812435e-06, "loss": 0.022634930908679962, "step": 22105 }, { "epoch": 0.20809411764705882, "grad_norm": 0.711189161760758, "learning_rate": 4.3854289383856915e-06, "loss": 0.027755826711654663, "step": 22110 }, { "epoch": 0.20814117647058825, "grad_norm": 0.579712342140908, "learning_rate": 4.384933135177994e-06, "loss": 0.026386833190917967, "step": 22115 }, { "epoch": 0.20818823529411765, "grad_norm": 0.5766644701357281, "learning_rate": 4.384437500094274e-06, "loss": 0.025945791602134706, "step": 22120 }, { "epoch": 0.20823529411764705, "grad_norm": 0.8146710371318971, "learning_rate": 4.383942033039535e-06, "loss": 0.02661944031715393, "step": 22125 }, { "epoch": 0.20828235294117647, "grad_norm": 0.5587423144945733, "learning_rate": 4.3834467339188565e-06, "loss": 0.025603145360946655, "step": 22130 }, { "epoch": 0.20832941176470587, "grad_norm": 0.5446284364713259, "learning_rate": 4.3829516026373955e-06, "loss": 0.026923584938049316, "step": 22135 }, { "epoch": 0.2083764705882353, "grad_norm": 0.573865964695956, "learning_rate": 4.382456639100382e-06, "loss": 0.024014365673065186, "step": 22140 }, { "epoch": 0.2084235294117647, "grad_norm": 0.9150138731102626, "learning_rate": 4.38196184321312e-06, "loss": 0.023897144198417663, "step": 22145 }, { "epoch": 0.20847058823529413, "grad_norm": 0.5768089328652817, "learning_rate": 4.38146721488099e-06, "loss": 0.026374387741088866, "step": 22150 }, { "epoch": 0.20851764705882353, "grad_norm": 0.6678879526391449, "learning_rate": 4.380972754009446e-06, "loss": 0.026811560988426207, "step": 22155 }, { "epoch": 0.20856470588235293, "grad_norm": 0.6581818503912701, "learning_rate": 4.380478460504019e-06, "loss": 0.02811487913131714, "step": 22160 }, { "epoch": 0.20861176470588236, "grad_norm": 0.39762540258119516, "learning_rate": 4.3799843342703125e-06, "loss": 0.02133687138557434, "step": 22165 }, { "epoch": 0.20865882352941176, "grad_norm": 0.5788216671470123, "learning_rate": 4.379490375214003e-06, "loss": 0.023631688952445985, "step": 22170 }, { "epoch": 0.20870588235294119, "grad_norm": 0.697420413975885, "learning_rate": 4.378996583240845e-06, "loss": 0.02198196351528168, "step": 22175 }, { "epoch": 0.20875294117647059, "grad_norm": 0.7055265251318779, "learning_rate": 4.378502958256667e-06, "loss": 0.032406982779502866, "step": 22180 }, { "epoch": 0.2088, "grad_norm": 0.4929644218199163, "learning_rate": 4.378009500167367e-06, "loss": 0.02274909019470215, "step": 22185 }, { "epoch": 0.2088470588235294, "grad_norm": 0.7827400861984384, "learning_rate": 4.377516208878924e-06, "loss": 0.026716679334640503, "step": 22190 }, { "epoch": 0.2088941176470588, "grad_norm": 0.9854219589407932, "learning_rate": 4.377023084297386e-06, "loss": 0.025792595744132996, "step": 22195 }, { "epoch": 0.20894117647058824, "grad_norm": 0.5466896741283244, "learning_rate": 4.376530126328878e-06, "loss": 0.02523861825466156, "step": 22200 }, { "epoch": 0.20898823529411764, "grad_norm": 0.5343197493548774, "learning_rate": 4.376037334879598e-06, "loss": 0.025511583685874938, "step": 22205 }, { "epoch": 0.20903529411764707, "grad_norm": 0.5054013298642928, "learning_rate": 4.375544709855818e-06, "loss": 0.02321496307849884, "step": 22210 }, { "epoch": 0.20908235294117647, "grad_norm": 0.734088761785434, "learning_rate": 4.375052251163882e-06, "loss": 0.02329680621623993, "step": 22215 }, { "epoch": 0.20912941176470587, "grad_norm": 0.7791487364986345, "learning_rate": 4.374559958710212e-06, "loss": 0.023209452629089355, "step": 22220 }, { "epoch": 0.2091764705882353, "grad_norm": 0.6258480955086437, "learning_rate": 4.3740678324013e-06, "loss": 0.02770172357559204, "step": 22225 }, { "epoch": 0.2092235294117647, "grad_norm": 0.4748789298879195, "learning_rate": 4.373575872143712e-06, "loss": 0.02316313236951828, "step": 22230 }, { "epoch": 0.20927058823529412, "grad_norm": 0.6324314817861778, "learning_rate": 4.373084077844091e-06, "loss": 0.023399131000041963, "step": 22235 }, { "epoch": 0.20931764705882352, "grad_norm": 0.84968461315261, "learning_rate": 4.372592449409147e-06, "loss": 0.03275604844093323, "step": 22240 }, { "epoch": 0.20936470588235295, "grad_norm": 0.5192984044032014, "learning_rate": 4.372100986745671e-06, "loss": 0.026884055137634276, "step": 22245 }, { "epoch": 0.20941176470588235, "grad_norm": 0.6321855814747304, "learning_rate": 4.371609689760521e-06, "loss": 0.03165543079376221, "step": 22250 }, { "epoch": 0.20945882352941175, "grad_norm": 0.6028053166830718, "learning_rate": 4.371118558360631e-06, "loss": 0.023956547677516937, "step": 22255 }, { "epoch": 0.20950588235294118, "grad_norm": 0.478545483521444, "learning_rate": 4.37062759245301e-06, "loss": 0.027256864309310912, "step": 22260 }, { "epoch": 0.20955294117647058, "grad_norm": 0.463767206703272, "learning_rate": 4.370136791944735e-06, "loss": 0.026264137029647826, "step": 22265 }, { "epoch": 0.2096, "grad_norm": 0.6345868805177867, "learning_rate": 4.369646156742961e-06, "loss": 0.026895198225975036, "step": 22270 }, { "epoch": 0.2096470588235294, "grad_norm": 0.5660970810356775, "learning_rate": 4.369155686754915e-06, "loss": 0.020928087830543517, "step": 22275 }, { "epoch": 0.20969411764705884, "grad_norm": 0.5759094109523056, "learning_rate": 4.3686653818878945e-06, "loss": 0.02294084131717682, "step": 22280 }, { "epoch": 0.20974117647058824, "grad_norm": 0.7314516746532258, "learning_rate": 4.368175242049271e-06, "loss": 0.024428796768188477, "step": 22285 }, { "epoch": 0.20978823529411764, "grad_norm": 0.6912441509714043, "learning_rate": 4.367685267146488e-06, "loss": 0.025879257917404176, "step": 22290 }, { "epoch": 0.20983529411764706, "grad_norm": 0.4692189054490656, "learning_rate": 4.367195457087065e-06, "loss": 0.026147696375846862, "step": 22295 }, { "epoch": 0.20988235294117646, "grad_norm": 0.5437489484681148, "learning_rate": 4.366705811778592e-06, "loss": 0.02967279851436615, "step": 22300 }, { "epoch": 0.2099294117647059, "grad_norm": 0.698279937215578, "learning_rate": 4.3662163311287285e-06, "loss": 0.026554012298583986, "step": 22305 }, { "epoch": 0.2099764705882353, "grad_norm": 0.7453968909386319, "learning_rate": 4.365727015045209e-06, "loss": 0.02139293998479843, "step": 22310 }, { "epoch": 0.2100235294117647, "grad_norm": 0.5103891020956269, "learning_rate": 4.365237863435845e-06, "loss": 0.028171798586845397, "step": 22315 }, { "epoch": 0.21007058823529412, "grad_norm": 0.635287401683947, "learning_rate": 4.364748876208511e-06, "loss": 0.025600379705429076, "step": 22320 }, { "epoch": 0.21011764705882352, "grad_norm": 0.7098688559606378, "learning_rate": 4.36426005327116e-06, "loss": 0.03277009725570679, "step": 22325 }, { "epoch": 0.21016470588235295, "grad_norm": 0.6038420927662537, "learning_rate": 4.363771394531818e-06, "loss": 0.026752129197120667, "step": 22330 }, { "epoch": 0.21021176470588235, "grad_norm": 0.42025796254140907, "learning_rate": 4.363282899898577e-06, "loss": 0.020527443289756774, "step": 22335 }, { "epoch": 0.21025882352941178, "grad_norm": 0.6703394207141167, "learning_rate": 4.3627945692796065e-06, "loss": 0.02194296717643738, "step": 22340 }, { "epoch": 0.21030588235294118, "grad_norm": 0.5426965596869169, "learning_rate": 4.362306402583146e-06, "loss": 0.02494785338640213, "step": 22345 }, { "epoch": 0.21035294117647058, "grad_norm": 0.9080873034657733, "learning_rate": 4.361818399717507e-06, "loss": 0.02541574239730835, "step": 22350 }, { "epoch": 0.2104, "grad_norm": 0.6657085768129786, "learning_rate": 4.361330560591073e-06, "loss": 0.027167007327079773, "step": 22355 }, { "epoch": 0.2104470588235294, "grad_norm": 0.6402723571751396, "learning_rate": 4.360842885112298e-06, "loss": 0.027488639950752257, "step": 22360 }, { "epoch": 0.21049411764705883, "grad_norm": 0.4599110595058871, "learning_rate": 4.36035537318971e-06, "loss": 0.025456839799880983, "step": 22365 }, { "epoch": 0.21054117647058823, "grad_norm": 1.0303625715359535, "learning_rate": 4.359868024731909e-06, "loss": 0.028241783380508423, "step": 22370 }, { "epoch": 0.21058823529411766, "grad_norm": 0.631689880253786, "learning_rate": 4.35938083964756e-06, "loss": 0.026535260677337646, "step": 22375 }, { "epoch": 0.21063529411764706, "grad_norm": 0.9208453188205158, "learning_rate": 4.3588938178454064e-06, "loss": 0.02821296453475952, "step": 22380 }, { "epoch": 0.21068235294117646, "grad_norm": 0.5464450421925866, "learning_rate": 4.358406959234262e-06, "loss": 0.024494822323322295, "step": 22385 }, { "epoch": 0.2107294117647059, "grad_norm": 0.466056581776276, "learning_rate": 4.357920263723009e-06, "loss": 0.027947205305099487, "step": 22390 }, { "epoch": 0.2107764705882353, "grad_norm": 0.7381382947748761, "learning_rate": 4.357433731220603e-06, "loss": 0.03092098832130432, "step": 22395 }, { "epoch": 0.21082352941176472, "grad_norm": 0.6961867056270095, "learning_rate": 4.356947361636069e-06, "loss": 0.030962097644805908, "step": 22400 }, { "epoch": 0.21087058823529412, "grad_norm": 1.3527545363934184, "learning_rate": 4.356461154878505e-06, "loss": 0.03325237631797791, "step": 22405 }, { "epoch": 0.21091764705882354, "grad_norm": 0.5099361800264817, "learning_rate": 4.3559751108570805e-06, "loss": 0.02463478147983551, "step": 22410 }, { "epoch": 0.21096470588235294, "grad_norm": 0.5016206336103619, "learning_rate": 4.355489229481033e-06, "loss": 0.021014225482940675, "step": 22415 }, { "epoch": 0.21101176470588234, "grad_norm": 0.5759487585924118, "learning_rate": 4.355003510659672e-06, "loss": 0.023210960626602172, "step": 22420 }, { "epoch": 0.21105882352941177, "grad_norm": 0.6678976148267911, "learning_rate": 4.35451795430238e-06, "loss": 0.02815253138542175, "step": 22425 }, { "epoch": 0.21110588235294117, "grad_norm": 0.4370356071340923, "learning_rate": 4.354032560318606e-06, "loss": 0.02175343334674835, "step": 22430 }, { "epoch": 0.2111529411764706, "grad_norm": 0.8008947500384569, "learning_rate": 4.353547328617875e-06, "loss": 0.034018343687057494, "step": 22435 }, { "epoch": 0.2112, "grad_norm": 0.5720855821816819, "learning_rate": 4.353062259109777e-06, "loss": 0.025223159790039064, "step": 22440 }, { "epoch": 0.2112470588235294, "grad_norm": 0.6974335187331254, "learning_rate": 4.352577351703975e-06, "loss": 0.02587924599647522, "step": 22445 }, { "epoch": 0.21129411764705883, "grad_norm": 0.7218113486542957, "learning_rate": 4.3520926063102045e-06, "loss": 0.02554929256439209, "step": 22450 }, { "epoch": 0.21134117647058823, "grad_norm": 0.6539036614388225, "learning_rate": 4.35160802283827e-06, "loss": 0.022437195479869842, "step": 22455 }, { "epoch": 0.21138823529411765, "grad_norm": 0.6901864026247992, "learning_rate": 4.351123601198043e-06, "loss": 0.025417423248291014, "step": 22460 }, { "epoch": 0.21143529411764705, "grad_norm": 0.5083593195942482, "learning_rate": 4.350639341299469e-06, "loss": 0.029027986526489257, "step": 22465 }, { "epoch": 0.21148235294117648, "grad_norm": 0.7675977998382436, "learning_rate": 4.350155243052563e-06, "loss": 0.02522560954093933, "step": 22470 }, { "epoch": 0.21152941176470588, "grad_norm": 0.9179061537769029, "learning_rate": 4.34967130636741e-06, "loss": 0.03359803557395935, "step": 22475 }, { "epoch": 0.21157647058823528, "grad_norm": 0.516513229230736, "learning_rate": 4.349187531154164e-06, "loss": 0.027665966749191286, "step": 22480 }, { "epoch": 0.2116235294117647, "grad_norm": 0.46040094426572237, "learning_rate": 4.348703917323049e-06, "loss": 0.02765776515007019, "step": 22485 }, { "epoch": 0.2116705882352941, "grad_norm": 0.5373692115786831, "learning_rate": 4.34822046478436e-06, "loss": 0.0250049889087677, "step": 22490 }, { "epoch": 0.21171764705882354, "grad_norm": 0.8008990334055978, "learning_rate": 4.3477371734484615e-06, "loss": 0.02622387409210205, "step": 22495 }, { "epoch": 0.21176470588235294, "grad_norm": 0.6888012517268485, "learning_rate": 4.347254043225786e-06, "loss": 0.027914398908615114, "step": 22500 }, { "epoch": 0.21181176470588237, "grad_norm": 0.710413557351339, "learning_rate": 4.34677107402684e-06, "loss": 0.03038742542266846, "step": 22505 }, { "epoch": 0.21185882352941177, "grad_norm": 0.5667223579303723, "learning_rate": 4.3462882657621944e-06, "loss": 0.02928307056427002, "step": 22510 }, { "epoch": 0.21190588235294117, "grad_norm": 0.4478633520874723, "learning_rate": 4.345805618342492e-06, "loss": 0.02408858835697174, "step": 22515 }, { "epoch": 0.2119529411764706, "grad_norm": 0.5821894833431357, "learning_rate": 4.345323131678445e-06, "loss": 0.02687017321586609, "step": 22520 }, { "epoch": 0.212, "grad_norm": 0.9131258770468559, "learning_rate": 4.344840805680836e-06, "loss": 0.028927934169769288, "step": 22525 }, { "epoch": 0.21204705882352942, "grad_norm": 0.5804869578721163, "learning_rate": 4.344358640260514e-06, "loss": 0.02706325054168701, "step": 22530 }, { "epoch": 0.21209411764705882, "grad_norm": 0.8858872667970492, "learning_rate": 4.3438766353284e-06, "loss": 0.025294399261474608, "step": 22535 }, { "epoch": 0.21214117647058822, "grad_norm": 0.4813022334373257, "learning_rate": 4.343394790795484e-06, "loss": 0.023732207715511322, "step": 22540 }, { "epoch": 0.21218823529411765, "grad_norm": 0.6190257292078698, "learning_rate": 4.342913106572824e-06, "loss": 0.025146853923797608, "step": 22545 }, { "epoch": 0.21223529411764705, "grad_norm": 0.7159400257272226, "learning_rate": 4.342431582571546e-06, "loss": 0.023972755670547484, "step": 22550 }, { "epoch": 0.21228235294117648, "grad_norm": 0.5607550325084487, "learning_rate": 4.341950218702849e-06, "loss": 0.021684756875038146, "step": 22555 }, { "epoch": 0.21232941176470588, "grad_norm": 0.6131699006406359, "learning_rate": 4.341469014877996e-06, "loss": 0.031746435165405276, "step": 22560 }, { "epoch": 0.2123764705882353, "grad_norm": 0.5194181948194759, "learning_rate": 4.3409879710083205e-06, "loss": 0.018988688290119172, "step": 22565 }, { "epoch": 0.2124235294117647, "grad_norm": 0.6904686984040496, "learning_rate": 4.340507087005226e-06, "loss": 0.024325215816497804, "step": 22570 }, { "epoch": 0.2124705882352941, "grad_norm": 0.6462267659051335, "learning_rate": 4.340026362780186e-06, "loss": 0.023760992288589477, "step": 22575 }, { "epoch": 0.21251764705882353, "grad_norm": 0.6721545647841908, "learning_rate": 4.3395457982447375e-06, "loss": 0.029628843069076538, "step": 22580 }, { "epoch": 0.21256470588235293, "grad_norm": 0.568800790210487, "learning_rate": 4.339065393310492e-06, "loss": 0.026522552967071532, "step": 22585 }, { "epoch": 0.21261176470588236, "grad_norm": 0.5124082786300415, "learning_rate": 4.3385851478891236e-06, "loss": 0.03132057785987854, "step": 22590 }, { "epoch": 0.21265882352941176, "grad_norm": 0.7421887672100429, "learning_rate": 4.33810506189238e-06, "loss": 0.02666463255882263, "step": 22595 }, { "epoch": 0.2127058823529412, "grad_norm": 0.6238395523888188, "learning_rate": 4.337625135232074e-06, "loss": 0.024434950947761536, "step": 22600 }, { "epoch": 0.2127529411764706, "grad_norm": 0.5982749165443765, "learning_rate": 4.33714536782009e-06, "loss": 0.026803648471832274, "step": 22605 }, { "epoch": 0.2128, "grad_norm": 0.5977498840407762, "learning_rate": 4.336665759568375e-06, "loss": 0.027047595381736754, "step": 22610 }, { "epoch": 0.21284705882352942, "grad_norm": 0.6591181961561179, "learning_rate": 4.33618631038895e-06, "loss": 0.02348143607378006, "step": 22615 }, { "epoch": 0.21289411764705882, "grad_norm": 0.5922719457537243, "learning_rate": 4.3357070201939e-06, "loss": 0.027016296982765198, "step": 22620 }, { "epoch": 0.21294117647058824, "grad_norm": 0.6193510403445653, "learning_rate": 4.33522788889538e-06, "loss": 0.02700066566467285, "step": 22625 }, { "epoch": 0.21298823529411764, "grad_norm": 0.7133495713952903, "learning_rate": 4.334748916405614e-06, "loss": 0.02604042589664459, "step": 22630 }, { "epoch": 0.21303529411764707, "grad_norm": 0.6206785906990134, "learning_rate": 4.33427010263689e-06, "loss": 0.024529910087585448, "step": 22635 }, { "epoch": 0.21308235294117647, "grad_norm": 1.4493256527847564, "learning_rate": 4.333791447501569e-06, "loss": 0.024223321676254274, "step": 22640 }, { "epoch": 0.21312941176470587, "grad_norm": 0.5092880956207997, "learning_rate": 4.333312950912074e-06, "loss": 0.028517144918441772, "step": 22645 }, { "epoch": 0.2131764705882353, "grad_norm": 0.629038632095962, "learning_rate": 4.332834612780901e-06, "loss": 0.025859248638153077, "step": 22650 }, { "epoch": 0.2132235294117647, "grad_norm": 0.5512010566307448, "learning_rate": 4.33235643302061e-06, "loss": 0.023251652717590332, "step": 22655 }, { "epoch": 0.21327058823529413, "grad_norm": 0.4622211862075248, "learning_rate": 4.33187841154383e-06, "loss": 0.02464371472597122, "step": 22660 }, { "epoch": 0.21331764705882353, "grad_norm": 0.7495983894506486, "learning_rate": 4.331400548263257e-06, "loss": 0.025998234748840332, "step": 22665 }, { "epoch": 0.21336470588235293, "grad_norm": 0.8686696429235192, "learning_rate": 4.3309228430916535e-06, "loss": 0.02648288607597351, "step": 22670 }, { "epoch": 0.21341176470588236, "grad_norm": 0.6142089604208418, "learning_rate": 4.330445295941854e-06, "loss": 0.030221444368362427, "step": 22675 }, { "epoch": 0.21345882352941176, "grad_norm": 0.5507963249335714, "learning_rate": 4.329967906726753e-06, "loss": 0.019518022239208222, "step": 22680 }, { "epoch": 0.21350588235294118, "grad_norm": 0.8074349254409886, "learning_rate": 4.329490675359317e-06, "loss": 0.022438201308250427, "step": 22685 }, { "epoch": 0.21355294117647058, "grad_norm": 0.5792820700462075, "learning_rate": 4.32901360175258e-06, "loss": 0.022846123576164244, "step": 22690 }, { "epoch": 0.2136, "grad_norm": 0.7502648837454172, "learning_rate": 4.32853668581964e-06, "loss": 0.02486935257911682, "step": 22695 }, { "epoch": 0.2136470588235294, "grad_norm": 0.5804514147621351, "learning_rate": 4.328059927473664e-06, "loss": 0.026940366625785826, "step": 22700 }, { "epoch": 0.2136941176470588, "grad_norm": 0.674981153384592, "learning_rate": 4.327583326627887e-06, "loss": 0.02680816650390625, "step": 22705 }, { "epoch": 0.21374117647058824, "grad_norm": 0.6099477489452325, "learning_rate": 4.327106883195606e-06, "loss": 0.027624380588531495, "step": 22710 }, { "epoch": 0.21378823529411764, "grad_norm": 0.5397361179653788, "learning_rate": 4.326630597090191e-06, "loss": 0.02601754367351532, "step": 22715 }, { "epoch": 0.21383529411764707, "grad_norm": 0.6169659549127131, "learning_rate": 4.326154468225077e-06, "loss": 0.027829641103744508, "step": 22720 }, { "epoch": 0.21388235294117647, "grad_norm": 0.3579786218154506, "learning_rate": 4.325678496513761e-06, "loss": 0.0272769570350647, "step": 22725 }, { "epoch": 0.2139294117647059, "grad_norm": 0.4817164895012225, "learning_rate": 4.325202681869813e-06, "loss": 0.027007097005844118, "step": 22730 }, { "epoch": 0.2139764705882353, "grad_norm": 0.548699799591405, "learning_rate": 4.324727024206866e-06, "loss": 0.023871973156929016, "step": 22735 }, { "epoch": 0.2140235294117647, "grad_norm": 0.5681543031290553, "learning_rate": 4.324251523438618e-06, "loss": 0.030722761154174806, "step": 22740 }, { "epoch": 0.21407058823529412, "grad_norm": 0.5031032869473291, "learning_rate": 4.3237761794788405e-06, "loss": 0.027228760719299316, "step": 22745 }, { "epoch": 0.21411764705882352, "grad_norm": 0.6349597440382014, "learning_rate": 4.323300992241363e-06, "loss": 0.02756332457065582, "step": 22750 }, { "epoch": 0.21416470588235295, "grad_norm": 0.6778103306763009, "learning_rate": 4.322825961640084e-06, "loss": 0.021766912937164307, "step": 22755 }, { "epoch": 0.21421176470588235, "grad_norm": 0.5518214982330182, "learning_rate": 4.322351087588972e-06, "loss": 0.017622224986553192, "step": 22760 }, { "epoch": 0.21425882352941175, "grad_norm": 0.6314513113633163, "learning_rate": 4.321876370002056e-06, "loss": 0.020533180236816405, "step": 22765 }, { "epoch": 0.21430588235294118, "grad_norm": 0.3763244577715789, "learning_rate": 4.3214018087934365e-06, "loss": 0.023666462302207945, "step": 22770 }, { "epoch": 0.21435294117647058, "grad_norm": 0.5594023490734837, "learning_rate": 4.320927403877273e-06, "loss": 0.023940259218215944, "step": 22775 }, { "epoch": 0.2144, "grad_norm": 0.580217667689733, "learning_rate": 4.320453155167799e-06, "loss": 0.023619973659515382, "step": 22780 }, { "epoch": 0.2144470588235294, "grad_norm": 0.6846255097618008, "learning_rate": 4.3199790625793085e-06, "loss": 0.019948574900627136, "step": 22785 }, { "epoch": 0.21449411764705884, "grad_norm": 0.6755463227974489, "learning_rate": 4.319505126026162e-06, "loss": 0.02644197940826416, "step": 22790 }, { "epoch": 0.21454117647058824, "grad_norm": 0.5481950889347782, "learning_rate": 4.319031345422789e-06, "loss": 0.026025152206420897, "step": 22795 }, { "epoch": 0.21458823529411764, "grad_norm": 0.7184120994726605, "learning_rate": 4.31855772068368e-06, "loss": 0.021849969029426576, "step": 22800 }, { "epoch": 0.21463529411764706, "grad_norm": 0.6765682816760218, "learning_rate": 4.318084251723395e-06, "loss": 0.02272777259349823, "step": 22805 }, { "epoch": 0.21468235294117646, "grad_norm": 0.7382764241123332, "learning_rate": 4.3176109384565555e-06, "loss": 0.028295719623565675, "step": 22810 }, { "epoch": 0.2147294117647059, "grad_norm": 0.6251198293476802, "learning_rate": 4.317137780797853e-06, "loss": 0.027094632387161255, "step": 22815 }, { "epoch": 0.2147764705882353, "grad_norm": 0.5460414722938518, "learning_rate": 4.316664778662044e-06, "loss": 0.025464171171188356, "step": 22820 }, { "epoch": 0.21482352941176472, "grad_norm": 0.553677423287788, "learning_rate": 4.3161919319639454e-06, "loss": 0.02270423024892807, "step": 22825 }, { "epoch": 0.21487058823529412, "grad_norm": 0.4640430512114109, "learning_rate": 4.315719240618444e-06, "loss": 0.023986227810382843, "step": 22830 }, { "epoch": 0.21491764705882352, "grad_norm": 0.6551026489458759, "learning_rate": 4.315246704540491e-06, "loss": 0.027794322371482848, "step": 22835 }, { "epoch": 0.21496470588235295, "grad_norm": 0.5657894119865373, "learning_rate": 4.314774323645102e-06, "loss": 0.02343565970659256, "step": 22840 }, { "epoch": 0.21501176470588235, "grad_norm": 0.898746062101033, "learning_rate": 4.314302097847358e-06, "loss": 0.022578738629817963, "step": 22845 }, { "epoch": 0.21505882352941177, "grad_norm": 0.6582191905627552, "learning_rate": 4.313830027062405e-06, "loss": 0.02440590113401413, "step": 22850 }, { "epoch": 0.21510588235294117, "grad_norm": 0.47299137456023393, "learning_rate": 4.313358111205455e-06, "loss": 0.02143782824277878, "step": 22855 }, { "epoch": 0.21515294117647057, "grad_norm": 0.6049728175279218, "learning_rate": 4.312886350191782e-06, "loss": 0.022725436091423034, "step": 22860 }, { "epoch": 0.2152, "grad_norm": 0.5758466037122407, "learning_rate": 4.312414743936729e-06, "loss": 0.02180686891078949, "step": 22865 }, { "epoch": 0.2152470588235294, "grad_norm": 0.6834017000496214, "learning_rate": 4.3119432923557006e-06, "loss": 0.030136579275131227, "step": 22870 }, { "epoch": 0.21529411764705883, "grad_norm": 0.8887562915446914, "learning_rate": 4.311471995364168e-06, "loss": 0.024795615673065187, "step": 22875 }, { "epoch": 0.21534117647058823, "grad_norm": 0.5531178272909699, "learning_rate": 4.311000852877665e-06, "loss": 0.02649247646331787, "step": 22880 }, { "epoch": 0.21538823529411766, "grad_norm": 0.5809610922433517, "learning_rate": 4.3105298648117925e-06, "loss": 0.027296695113182067, "step": 22885 }, { "epoch": 0.21543529411764706, "grad_norm": 0.5664827759999299, "learning_rate": 4.310059031082213e-06, "loss": 0.025864392518997192, "step": 22890 }, { "epoch": 0.21548235294117646, "grad_norm": 0.6070296744487511, "learning_rate": 4.309588351604657e-06, "loss": 0.025107544660568238, "step": 22895 }, { "epoch": 0.21552941176470589, "grad_norm": 0.6661263970752695, "learning_rate": 4.309117826294916e-06, "loss": 0.02393001914024353, "step": 22900 }, { "epoch": 0.21557647058823529, "grad_norm": 0.5799125022390064, "learning_rate": 4.308647455068848e-06, "loss": 0.02933879792690277, "step": 22905 }, { "epoch": 0.21562352941176471, "grad_norm": 0.5508262146180709, "learning_rate": 4.3081772378423734e-06, "loss": 0.027200216054916383, "step": 22910 }, { "epoch": 0.21567058823529411, "grad_norm": 0.5377576364960969, "learning_rate": 4.30770717453148e-06, "loss": 0.02464187443256378, "step": 22915 }, { "epoch": 0.21571764705882354, "grad_norm": 0.5448145066141128, "learning_rate": 4.307237265052219e-06, "loss": 0.022358906269073487, "step": 22920 }, { "epoch": 0.21576470588235294, "grad_norm": 0.7282396371518856, "learning_rate": 4.306767509320701e-06, "loss": 0.024292725324630737, "step": 22925 }, { "epoch": 0.21581176470588234, "grad_norm": 0.6814818862581193, "learning_rate": 4.306297907253106e-06, "loss": 0.028441524505615233, "step": 22930 }, { "epoch": 0.21585882352941177, "grad_norm": 0.5053388813106556, "learning_rate": 4.3058284587656755e-06, "loss": 0.023212838172912597, "step": 22935 }, { "epoch": 0.21590588235294117, "grad_norm": 0.6038748267260526, "learning_rate": 4.3053591637747155e-06, "loss": 0.02792992293834686, "step": 22940 }, { "epoch": 0.2159529411764706, "grad_norm": 0.4545516033299954, "learning_rate": 4.304890022196596e-06, "loss": 0.026399120688438416, "step": 22945 }, { "epoch": 0.216, "grad_norm": 0.7234220560909492, "learning_rate": 4.304421033947751e-06, "loss": 0.023801931738853456, "step": 22950 }, { "epoch": 0.21604705882352943, "grad_norm": 0.6716144231542409, "learning_rate": 4.303952198944678e-06, "loss": 0.02560136318206787, "step": 22955 }, { "epoch": 0.21609411764705883, "grad_norm": 0.6081925001305306, "learning_rate": 4.303483517103938e-06, "loss": 0.024949654936790466, "step": 22960 }, { "epoch": 0.21614117647058823, "grad_norm": 0.5985912920466533, "learning_rate": 4.303014988342154e-06, "loss": 0.019781431555747984, "step": 22965 }, { "epoch": 0.21618823529411765, "grad_norm": 0.7517717878111246, "learning_rate": 4.302546612576017e-06, "loss": 0.02300325930118561, "step": 22970 }, { "epoch": 0.21623529411764705, "grad_norm": 0.6680673061265021, "learning_rate": 4.302078389722275e-06, "loss": 0.028208857774734496, "step": 22975 }, { "epoch": 0.21628235294117648, "grad_norm": 0.6346787727032699, "learning_rate": 4.3016103196977445e-06, "loss": 0.024707141518592834, "step": 22980 }, { "epoch": 0.21632941176470588, "grad_norm": 0.6215770794932355, "learning_rate": 4.301142402419306e-06, "loss": 0.03192384243011474, "step": 22985 }, { "epoch": 0.21637647058823528, "grad_norm": 0.6668658666580811, "learning_rate": 4.300674637803897e-06, "loss": 0.025489822030067444, "step": 22990 }, { "epoch": 0.2164235294117647, "grad_norm": 0.6398597012266566, "learning_rate": 4.300207025768526e-06, "loss": 0.01991533041000366, "step": 22995 }, { "epoch": 0.2164705882352941, "grad_norm": 0.5291017623358812, "learning_rate": 4.299739566230259e-06, "loss": 0.023689547181129457, "step": 23000 }, { "epoch": 0.21651764705882354, "grad_norm": 0.5485329591366301, "learning_rate": 4.299272259106227e-06, "loss": 0.022146303951740266, "step": 23005 }, { "epoch": 0.21656470588235294, "grad_norm": 0.6673823870800203, "learning_rate": 4.298805104313625e-06, "loss": 0.024995745718479158, "step": 23010 }, { "epoch": 0.21661176470588236, "grad_norm": 0.546014895828123, "learning_rate": 4.29833810176971e-06, "loss": 0.023824334144592285, "step": 23015 }, { "epoch": 0.21665882352941176, "grad_norm": 0.8489223012532731, "learning_rate": 4.2978712513918e-06, "loss": 0.03020077645778656, "step": 23020 }, { "epoch": 0.21670588235294116, "grad_norm": 0.6629336211627839, "learning_rate": 4.2974045530972805e-06, "loss": 0.024182236194610594, "step": 23025 }, { "epoch": 0.2167529411764706, "grad_norm": 0.6066446820914096, "learning_rate": 4.2969380068035966e-06, "loss": 0.02624327540397644, "step": 23030 }, { "epoch": 0.2168, "grad_norm": 0.6412818954517172, "learning_rate": 4.296471612428254e-06, "loss": 0.020261710882186888, "step": 23035 }, { "epoch": 0.21684705882352942, "grad_norm": 0.5306241383334734, "learning_rate": 4.296005369888827e-06, "loss": 0.025885483622550963, "step": 23040 }, { "epoch": 0.21689411764705882, "grad_norm": 0.6910646237796989, "learning_rate": 4.295539279102947e-06, "loss": 0.027988046407699585, "step": 23045 }, { "epoch": 0.21694117647058825, "grad_norm": 0.45436984467495944, "learning_rate": 4.295073339988311e-06, "loss": 0.021845331788063048, "step": 23050 }, { "epoch": 0.21698823529411765, "grad_norm": 0.50720900041169, "learning_rate": 4.294607552462679e-06, "loss": 0.02709382176399231, "step": 23055 }, { "epoch": 0.21703529411764705, "grad_norm": 0.6762511829825487, "learning_rate": 4.294141916443868e-06, "loss": 0.029355883598327637, "step": 23060 }, { "epoch": 0.21708235294117648, "grad_norm": 0.634380308061807, "learning_rate": 4.293676431849765e-06, "loss": 0.020970943570137023, "step": 23065 }, { "epoch": 0.21712941176470588, "grad_norm": 0.6027160202108672, "learning_rate": 4.293211098598315e-06, "loss": 0.022754880785942077, "step": 23070 }, { "epoch": 0.2171764705882353, "grad_norm": 0.41868221364576674, "learning_rate": 4.292745916607525e-06, "loss": 0.020310382544994354, "step": 23075 }, { "epoch": 0.2172235294117647, "grad_norm": 0.6275958937165124, "learning_rate": 4.292280885795466e-06, "loss": 0.025275957584381104, "step": 23080 }, { "epoch": 0.2172705882352941, "grad_norm": 0.577939274986969, "learning_rate": 4.291816006080269e-06, "loss": 0.019934108853340148, "step": 23085 }, { "epoch": 0.21731764705882353, "grad_norm": 0.8235911520606445, "learning_rate": 4.291351277380128e-06, "loss": 0.02173459529876709, "step": 23090 }, { "epoch": 0.21736470588235293, "grad_norm": 0.7091289112387599, "learning_rate": 4.290886699613303e-06, "loss": 0.023944994807243346, "step": 23095 }, { "epoch": 0.21741176470588236, "grad_norm": 0.6779488189760299, "learning_rate": 4.290422272698107e-06, "loss": 0.028427118062973024, "step": 23100 }, { "epoch": 0.21745882352941176, "grad_norm": 0.8227784180130958, "learning_rate": 4.289957996552923e-06, "loss": 0.03105643093585968, "step": 23105 }, { "epoch": 0.2175058823529412, "grad_norm": 0.5069771070900779, "learning_rate": 4.289493871096192e-06, "loss": 0.031678307056427005, "step": 23110 }, { "epoch": 0.2175529411764706, "grad_norm": 0.5643620824437365, "learning_rate": 4.289029896246418e-06, "loss": 0.023143643140792848, "step": 23115 }, { "epoch": 0.2176, "grad_norm": 0.5755672071745386, "learning_rate": 4.2885660719221675e-06, "loss": 0.022395311295986174, "step": 23120 }, { "epoch": 0.21764705882352942, "grad_norm": 0.4573587122652667, "learning_rate": 4.288102398042065e-06, "loss": 0.026671859622001647, "step": 23125 }, { "epoch": 0.21769411764705882, "grad_norm": 0.7888324581024407, "learning_rate": 4.2876388745248e-06, "loss": 0.0296136736869812, "step": 23130 }, { "epoch": 0.21774117647058824, "grad_norm": 0.5614508208808814, "learning_rate": 4.287175501289122e-06, "loss": 0.023332220315933228, "step": 23135 }, { "epoch": 0.21778823529411764, "grad_norm": 0.5236599056489082, "learning_rate": 4.286712278253845e-06, "loss": 0.02549009323120117, "step": 23140 }, { "epoch": 0.21783529411764707, "grad_norm": 0.4319219310870657, "learning_rate": 4.2862492053378385e-06, "loss": 0.02192261666059494, "step": 23145 }, { "epoch": 0.21788235294117647, "grad_norm": 0.7606740586997969, "learning_rate": 4.2857862824600385e-06, "loss": 0.027093911170959474, "step": 23150 }, { "epoch": 0.21792941176470587, "grad_norm": 0.5699662750318236, "learning_rate": 4.28532350953944e-06, "loss": 0.03797236680984497, "step": 23155 }, { "epoch": 0.2179764705882353, "grad_norm": 0.816401792547294, "learning_rate": 4.284860886495099e-06, "loss": 0.03003612458705902, "step": 23160 }, { "epoch": 0.2180235294117647, "grad_norm": 0.4394352177155652, "learning_rate": 4.284398413246136e-06, "loss": 0.028214994072914123, "step": 23165 }, { "epoch": 0.21807058823529413, "grad_norm": 0.6473130133093657, "learning_rate": 4.283936089711727e-06, "loss": 0.025993236899375917, "step": 23170 }, { "epoch": 0.21811764705882353, "grad_norm": 0.43729770489546127, "learning_rate": 4.283473915811114e-06, "loss": 0.024073895812034608, "step": 23175 }, { "epoch": 0.21816470588235296, "grad_norm": 0.7689535976814478, "learning_rate": 4.283011891463597e-06, "loss": 0.02509196400642395, "step": 23180 }, { "epoch": 0.21821176470588236, "grad_norm": 0.6704867174794397, "learning_rate": 4.2825500165885365e-06, "loss": 0.03236188888549805, "step": 23185 }, { "epoch": 0.21825882352941176, "grad_norm": 0.6144191556545388, "learning_rate": 4.2820882911053565e-06, "loss": 0.024879136681556703, "step": 23190 }, { "epoch": 0.21830588235294118, "grad_norm": 0.7129993353432043, "learning_rate": 4.281626714933543e-06, "loss": 0.026944437623023988, "step": 23195 }, { "epoch": 0.21835294117647058, "grad_norm": 0.4650806539698008, "learning_rate": 4.281165287992636e-06, "loss": 0.022107040882110594, "step": 23200 }, { "epoch": 0.2184, "grad_norm": 0.6585562954673992, "learning_rate": 4.280704010202242e-06, "loss": 0.03095645010471344, "step": 23205 }, { "epoch": 0.2184470588235294, "grad_norm": 0.6316040530233933, "learning_rate": 4.280242881482029e-06, "loss": 0.028166115283966064, "step": 23210 }, { "epoch": 0.2184941176470588, "grad_norm": 0.48525487969306197, "learning_rate": 4.279781901751719e-06, "loss": 0.025740131735801697, "step": 23215 }, { "epoch": 0.21854117647058824, "grad_norm": 0.6499269380648158, "learning_rate": 4.279321070931101e-06, "loss": 0.02262871265411377, "step": 23220 }, { "epoch": 0.21858823529411764, "grad_norm": 0.4849213557137623, "learning_rate": 4.2788603889400225e-06, "loss": 0.025945064425468446, "step": 23225 }, { "epoch": 0.21863529411764707, "grad_norm": 0.7210532673874689, "learning_rate": 4.27839985569839e-06, "loss": 0.02814747393131256, "step": 23230 }, { "epoch": 0.21868235294117647, "grad_norm": 0.607100897930304, "learning_rate": 4.277939471126171e-06, "loss": 0.03124634623527527, "step": 23235 }, { "epoch": 0.2187294117647059, "grad_norm": 0.7581332059013991, "learning_rate": 4.2774792351433945e-06, "loss": 0.025984537601470948, "step": 23240 }, { "epoch": 0.2187764705882353, "grad_norm": 0.5900708304787923, "learning_rate": 4.27701914767015e-06, "loss": 0.024102360010147095, "step": 23245 }, { "epoch": 0.2188235294117647, "grad_norm": 0.8469111093210441, "learning_rate": 4.276559208626582e-06, "loss": 0.0321811318397522, "step": 23250 }, { "epoch": 0.21887058823529412, "grad_norm": 0.7590542474732065, "learning_rate": 4.276099417932902e-06, "loss": 0.02216072827577591, "step": 23255 }, { "epoch": 0.21891764705882352, "grad_norm": 0.686933288520249, "learning_rate": 4.275639775509378e-06, "loss": 0.025549697875976562, "step": 23260 }, { "epoch": 0.21896470588235295, "grad_norm": 0.6130172212461834, "learning_rate": 4.275180281276338e-06, "loss": 0.023369279503822327, "step": 23265 }, { "epoch": 0.21901176470588235, "grad_norm": 0.6983611708298587, "learning_rate": 4.274720935154172e-06, "loss": 0.02674746811389923, "step": 23270 }, { "epoch": 0.21905882352941178, "grad_norm": 0.5973115693210069, "learning_rate": 4.274261737063327e-06, "loss": 0.024389928579330443, "step": 23275 }, { "epoch": 0.21910588235294118, "grad_norm": 0.46086719920038455, "learning_rate": 4.273802686924311e-06, "loss": 0.02346019446849823, "step": 23280 }, { "epoch": 0.21915294117647058, "grad_norm": 0.6024446732272577, "learning_rate": 4.273343784657692e-06, "loss": 0.027652212977409364, "step": 23285 }, { "epoch": 0.2192, "grad_norm": 0.7387100849612344, "learning_rate": 4.272885030184099e-06, "loss": 0.022895848751068114, "step": 23290 }, { "epoch": 0.2192470588235294, "grad_norm": 0.513699348949572, "learning_rate": 4.272426423424218e-06, "loss": 0.023351901769638063, "step": 23295 }, { "epoch": 0.21929411764705883, "grad_norm": 0.421833403537077, "learning_rate": 4.271967964298795e-06, "loss": 0.023301604390144347, "step": 23300 }, { "epoch": 0.21934117647058823, "grad_norm": 0.4648380945919582, "learning_rate": 4.2715096527286385e-06, "loss": 0.026437243819236754, "step": 23305 }, { "epoch": 0.21938823529411763, "grad_norm": 0.6787642546363145, "learning_rate": 4.271051488634613e-06, "loss": 0.02717956304550171, "step": 23310 }, { "epoch": 0.21943529411764706, "grad_norm": 0.7737313058020194, "learning_rate": 4.2705934719376426e-06, "loss": 0.027700358629226686, "step": 23315 }, { "epoch": 0.21948235294117646, "grad_norm": 0.6824433543805453, "learning_rate": 4.270135602558713e-06, "loss": 0.02906805872917175, "step": 23320 }, { "epoch": 0.2195294117647059, "grad_norm": 0.9078128712606965, "learning_rate": 4.269677880418868e-06, "loss": 0.027329352498054505, "step": 23325 }, { "epoch": 0.2195764705882353, "grad_norm": 0.41236271573661476, "learning_rate": 4.269220305439212e-06, "loss": 0.023696596920490264, "step": 23330 }, { "epoch": 0.21962352941176472, "grad_norm": 0.38542104940601046, "learning_rate": 4.268762877540905e-06, "loss": 0.019239683449268342, "step": 23335 }, { "epoch": 0.21967058823529412, "grad_norm": 0.8171950320576743, "learning_rate": 4.268305596645168e-06, "loss": 0.027942121028900146, "step": 23340 }, { "epoch": 0.21971764705882352, "grad_norm": 0.4934102463652879, "learning_rate": 4.267848462673283e-06, "loss": 0.03270534873008728, "step": 23345 }, { "epoch": 0.21976470588235295, "grad_norm": 0.4763378512923427, "learning_rate": 4.267391475546588e-06, "loss": 0.02452365756034851, "step": 23350 }, { "epoch": 0.21981176470588235, "grad_norm": 0.302315724023171, "learning_rate": 4.266934635186485e-06, "loss": 0.019986677169799804, "step": 23355 }, { "epoch": 0.21985882352941177, "grad_norm": 0.4155164662058062, "learning_rate": 4.266477941514425e-06, "loss": 0.019725768268108367, "step": 23360 }, { "epoch": 0.21990588235294117, "grad_norm": 0.5766857747541143, "learning_rate": 4.26602139445193e-06, "loss": 0.03484467267990112, "step": 23365 }, { "epoch": 0.2199529411764706, "grad_norm": 0.6017108369200599, "learning_rate": 4.265564993920572e-06, "loss": 0.023830294609069824, "step": 23370 }, { "epoch": 0.22, "grad_norm": 0.6151986591129209, "learning_rate": 4.265108739841984e-06, "loss": 0.0243718683719635, "step": 23375 }, { "epoch": 0.2200470588235294, "grad_norm": 0.8017231606723316, "learning_rate": 4.264652632137861e-06, "loss": 0.024203738570213316, "step": 23380 }, { "epoch": 0.22009411764705883, "grad_norm": 0.7083475095623287, "learning_rate": 4.26419667072995e-06, "loss": 0.045266592502593996, "step": 23385 }, { "epoch": 0.22014117647058823, "grad_norm": 0.5707403821473224, "learning_rate": 4.263740855540063e-06, "loss": 0.02199849784374237, "step": 23390 }, { "epoch": 0.22018823529411766, "grad_norm": 0.5628328513448606, "learning_rate": 4.263285186490067e-06, "loss": 0.02757858335971832, "step": 23395 }, { "epoch": 0.22023529411764706, "grad_norm": 0.686165135678982, "learning_rate": 4.262829663501889e-06, "loss": 0.025266033411026, "step": 23400 }, { "epoch": 0.22028235294117646, "grad_norm": 0.6300319839517374, "learning_rate": 4.2623742864975134e-06, "loss": 0.025235474109649658, "step": 23405 }, { "epoch": 0.22032941176470588, "grad_norm": 0.5245557269941491, "learning_rate": 4.261919055398982e-06, "loss": 0.02530595064163208, "step": 23410 }, { "epoch": 0.22037647058823528, "grad_norm": 0.7437533493385627, "learning_rate": 4.261463970128398e-06, "loss": 0.022451329231262206, "step": 23415 }, { "epoch": 0.2204235294117647, "grad_norm": 0.7824695189409475, "learning_rate": 4.26100903060792e-06, "loss": 0.027599358558654787, "step": 23420 }, { "epoch": 0.2204705882352941, "grad_norm": 0.785810439746216, "learning_rate": 4.260554236759766e-06, "loss": 0.02102302461862564, "step": 23425 }, { "epoch": 0.22051764705882354, "grad_norm": 0.9100787969616378, "learning_rate": 4.2600995885062105e-06, "loss": 0.02654775381088257, "step": 23430 }, { "epoch": 0.22056470588235294, "grad_norm": 0.7701922117819762, "learning_rate": 4.2596450857695885e-06, "loss": 0.030445778369903566, "step": 23435 }, { "epoch": 0.22061176470588234, "grad_norm": 0.7923761435132417, "learning_rate": 4.259190728472292e-06, "loss": 0.027347669005393982, "step": 23440 }, { "epoch": 0.22065882352941177, "grad_norm": 0.5531415018062518, "learning_rate": 4.25873651653677e-06, "loss": 0.023001191020011903, "step": 23445 }, { "epoch": 0.22070588235294117, "grad_norm": 0.4072069236483805, "learning_rate": 4.258282449885531e-06, "loss": 0.019870340824127197, "step": 23450 }, { "epoch": 0.2207529411764706, "grad_norm": 0.6642470598004203, "learning_rate": 4.257828528441139e-06, "loss": 0.024785107374191283, "step": 23455 }, { "epoch": 0.2208, "grad_norm": 0.7635872779854646, "learning_rate": 4.257374752126218e-06, "loss": 0.027747824788093567, "step": 23460 }, { "epoch": 0.22084705882352942, "grad_norm": 0.6204693513179573, "learning_rate": 4.2569211208634495e-06, "loss": 0.026294732093811037, "step": 23465 }, { "epoch": 0.22089411764705882, "grad_norm": 0.7277812895427423, "learning_rate": 4.2564676345755715e-06, "loss": 0.02662946581840515, "step": 23470 }, { "epoch": 0.22094117647058822, "grad_norm": 1.151065942999253, "learning_rate": 4.25601429318538e-06, "loss": 0.026668089628219604, "step": 23475 }, { "epoch": 0.22098823529411765, "grad_norm": 0.6390177237392114, "learning_rate": 4.2555610966157284e-06, "loss": 0.023888102173805235, "step": 23480 }, { "epoch": 0.22103529411764705, "grad_norm": 0.47920464584267175, "learning_rate": 4.25510804478953e-06, "loss": 0.020244836807250977, "step": 23485 }, { "epoch": 0.22108235294117648, "grad_norm": 0.6706189422053509, "learning_rate": 4.254655137629751e-06, "loss": 0.0243606299161911, "step": 23490 }, { "epoch": 0.22112941176470588, "grad_norm": 0.8071489419883978, "learning_rate": 4.2542023750594165e-06, "loss": 0.027558088302612305, "step": 23495 }, { "epoch": 0.2211764705882353, "grad_norm": 0.585306566070185, "learning_rate": 4.253749757001613e-06, "loss": 0.025015872716903687, "step": 23500 }, { "epoch": 0.2212235294117647, "grad_norm": 0.7000882424908258, "learning_rate": 4.253297283379481e-06, "loss": 0.026372331380844116, "step": 23505 }, { "epoch": 0.2212705882352941, "grad_norm": 1.1741326053453636, "learning_rate": 4.252844954116215e-06, "loss": 0.032539987564086915, "step": 23510 }, { "epoch": 0.22131764705882354, "grad_norm": 0.704866083285715, "learning_rate": 4.252392769135074e-06, "loss": 0.023957273364067076, "step": 23515 }, { "epoch": 0.22136470588235294, "grad_norm": 0.7552033210503, "learning_rate": 4.2519407283593675e-06, "loss": 0.026893728971481325, "step": 23520 }, { "epoch": 0.22141176470588236, "grad_norm": 0.42391559321286243, "learning_rate": 4.251488831712465e-06, "loss": 0.02381783127784729, "step": 23525 }, { "epoch": 0.22145882352941176, "grad_norm": 0.4797735316278428, "learning_rate": 4.2510370791177935e-06, "loss": 0.03012051582336426, "step": 23530 }, { "epoch": 0.22150588235294116, "grad_norm": 0.5910748410813084, "learning_rate": 4.250585470498836e-06, "loss": 0.02520284950733185, "step": 23535 }, { "epoch": 0.2215529411764706, "grad_norm": 0.6972706296110806, "learning_rate": 4.250134005779132e-06, "loss": 0.03165746927261352, "step": 23540 }, { "epoch": 0.2216, "grad_norm": 0.6692660921812665, "learning_rate": 4.2496826848822784e-06, "loss": 0.02160836160182953, "step": 23545 }, { "epoch": 0.22164705882352942, "grad_norm": 0.5223057545163249, "learning_rate": 4.249231507731929e-06, "loss": 0.030038800835609437, "step": 23550 }, { "epoch": 0.22169411764705882, "grad_norm": 0.4253919275736107, "learning_rate": 4.248780474251793e-06, "loss": 0.03817829489707947, "step": 23555 }, { "epoch": 0.22174117647058825, "grad_norm": 0.5980923254158664, "learning_rate": 4.248329584365639e-06, "loss": 0.02418771982192993, "step": 23560 }, { "epoch": 0.22178823529411765, "grad_norm": 0.807076265096767, "learning_rate": 4.2478788379972905e-06, "loss": 0.022681377828121185, "step": 23565 }, { "epoch": 0.22183529411764705, "grad_norm": 0.7617103609726292, "learning_rate": 4.2474282350706275e-06, "loss": 0.02804373502731323, "step": 23570 }, { "epoch": 0.22188235294117648, "grad_norm": 0.7834547039880233, "learning_rate": 4.246977775509586e-06, "loss": 0.024427735805511476, "step": 23575 }, { "epoch": 0.22192941176470588, "grad_norm": 0.6256747494918868, "learning_rate": 4.24652745923816e-06, "loss": 0.02672919034957886, "step": 23580 }, { "epoch": 0.2219764705882353, "grad_norm": 0.7096007889488267, "learning_rate": 4.246077286180399e-06, "loss": 0.02356138527393341, "step": 23585 }, { "epoch": 0.2220235294117647, "grad_norm": 1.409679648404587, "learning_rate": 4.245627256260409e-06, "loss": 0.025325673818588256, "step": 23590 }, { "epoch": 0.22207058823529413, "grad_norm": 0.6287159379024553, "learning_rate": 4.24517736940235e-06, "loss": 0.02666487991809845, "step": 23595 }, { "epoch": 0.22211764705882353, "grad_norm": 0.7169864170271383, "learning_rate": 4.244727625530443e-06, "loss": 0.022271087765693663, "step": 23600 }, { "epoch": 0.22216470588235293, "grad_norm": 0.8487548894642579, "learning_rate": 4.244278024568962e-06, "loss": 0.026158377528190613, "step": 23605 }, { "epoch": 0.22221176470588236, "grad_norm": 0.5689405228104241, "learning_rate": 4.243828566442239e-06, "loss": 0.01990475058555603, "step": 23610 }, { "epoch": 0.22225882352941176, "grad_norm": 0.640155547977731, "learning_rate": 4.243379251074658e-06, "loss": 0.030839920043945312, "step": 23615 }, { "epoch": 0.2223058823529412, "grad_norm": 0.6870162401433036, "learning_rate": 4.242930078390663e-06, "loss": 0.026534116268157958, "step": 23620 }, { "epoch": 0.2223529411764706, "grad_norm": 0.7542760474328345, "learning_rate": 4.242481048314754e-06, "loss": 0.02568962574005127, "step": 23625 }, { "epoch": 0.2224, "grad_norm": 0.48010357495263756, "learning_rate": 4.242032160771485e-06, "loss": 0.020818516612052917, "step": 23630 }, { "epoch": 0.22244705882352941, "grad_norm": 0.9723355437039071, "learning_rate": 4.241583415685467e-06, "loss": 0.03152768611907959, "step": 23635 }, { "epoch": 0.22249411764705881, "grad_norm": 0.7327395845222984, "learning_rate": 4.241134812981366e-06, "loss": 0.030429840087890625, "step": 23640 }, { "epoch": 0.22254117647058824, "grad_norm": 0.5424711599854911, "learning_rate": 4.240686352583904e-06, "loss": 0.02205093502998352, "step": 23645 }, { "epoch": 0.22258823529411764, "grad_norm": 0.5307171674992035, "learning_rate": 4.240238034417859e-06, "loss": 0.030128955841064453, "step": 23650 }, { "epoch": 0.22263529411764707, "grad_norm": 0.6744496127268998, "learning_rate": 4.239789858408066e-06, "loss": 0.03150979578495026, "step": 23655 }, { "epoch": 0.22268235294117647, "grad_norm": 0.6440854794170703, "learning_rate": 4.239341824479411e-06, "loss": 0.02589028775691986, "step": 23660 }, { "epoch": 0.22272941176470587, "grad_norm": 0.4897581222409839, "learning_rate": 4.238893932556843e-06, "loss": 0.021547780930995943, "step": 23665 }, { "epoch": 0.2227764705882353, "grad_norm": 0.5583529157245014, "learning_rate": 4.238446182565358e-06, "loss": 0.030035218596458434, "step": 23670 }, { "epoch": 0.2228235294117647, "grad_norm": 0.5307289701244996, "learning_rate": 4.237998574430014e-06, "loss": 0.027080178260803223, "step": 23675 }, { "epoch": 0.22287058823529413, "grad_norm": 0.6785628113692027, "learning_rate": 4.237551108075922e-06, "loss": 0.028535398840904235, "step": 23680 }, { "epoch": 0.22291764705882353, "grad_norm": 0.7655855297255553, "learning_rate": 4.237103783428248e-06, "loss": 0.027514445781707763, "step": 23685 }, { "epoch": 0.22296470588235295, "grad_norm": 0.428052076703316, "learning_rate": 4.236656600412213e-06, "loss": 0.02445225268602371, "step": 23690 }, { "epoch": 0.22301176470588235, "grad_norm": 0.5541522972911733, "learning_rate": 4.236209558953096e-06, "loss": 0.02393224686384201, "step": 23695 }, { "epoch": 0.22305882352941175, "grad_norm": 0.693308755917014, "learning_rate": 4.235762658976227e-06, "loss": 0.026172667741775513, "step": 23700 }, { "epoch": 0.22310588235294118, "grad_norm": 0.4821833141783047, "learning_rate": 4.235315900406994e-06, "loss": 0.023292845487594603, "step": 23705 }, { "epoch": 0.22315294117647058, "grad_norm": 0.5395432510640303, "learning_rate": 4.2348692831708385e-06, "loss": 0.02567862570285797, "step": 23710 }, { "epoch": 0.2232, "grad_norm": 0.41816887684566867, "learning_rate": 4.234422807193259e-06, "loss": 0.023939453065395355, "step": 23715 }, { "epoch": 0.2232470588235294, "grad_norm": 0.5172324585886812, "learning_rate": 4.233976472399807e-06, "loss": 0.019869720935821532, "step": 23720 }, { "epoch": 0.22329411764705884, "grad_norm": 0.44487901037633104, "learning_rate": 4.23353027871609e-06, "loss": 0.02357217073440552, "step": 23725 }, { "epoch": 0.22334117647058824, "grad_norm": 0.715835767658459, "learning_rate": 4.2330842260677715e-06, "loss": 0.027304288744926453, "step": 23730 }, { "epoch": 0.22338823529411764, "grad_norm": 0.5128283244947525, "learning_rate": 4.232638314380565e-06, "loss": 0.03431721031665802, "step": 23735 }, { "epoch": 0.22343529411764707, "grad_norm": 0.8858444106082928, "learning_rate": 4.232192543580245e-06, "loss": 0.02635766863822937, "step": 23740 }, { "epoch": 0.22348235294117647, "grad_norm": 0.6149627112595262, "learning_rate": 4.2317469135926375e-06, "loss": 0.028186169266700745, "step": 23745 }, { "epoch": 0.2235294117647059, "grad_norm": 0.7282853391939517, "learning_rate": 4.231301424343622e-06, "loss": 0.025731995701789856, "step": 23750 }, { "epoch": 0.2235764705882353, "grad_norm": 0.6458434623125336, "learning_rate": 4.230856075759136e-06, "loss": 0.0234313890337944, "step": 23755 }, { "epoch": 0.2236235294117647, "grad_norm": 1.687797703101511, "learning_rate": 4.230410867765169e-06, "loss": 0.028300559520721434, "step": 23760 }, { "epoch": 0.22367058823529412, "grad_norm": 0.5548963863096372, "learning_rate": 4.229965800287765e-06, "loss": 0.025300389528274535, "step": 23765 }, { "epoch": 0.22371764705882352, "grad_norm": 0.5853143880805338, "learning_rate": 4.229520873253024e-06, "loss": 0.026983916759490967, "step": 23770 }, { "epoch": 0.22376470588235295, "grad_norm": 0.7516163881910352, "learning_rate": 4.229076086587099e-06, "loss": 0.02487354725599289, "step": 23775 }, { "epoch": 0.22381176470588235, "grad_norm": 0.7944985523469772, "learning_rate": 4.228631440216197e-06, "loss": 0.029986879229545592, "step": 23780 }, { "epoch": 0.22385882352941178, "grad_norm": 1.0101962895741945, "learning_rate": 4.228186934066583e-06, "loss": 0.02460441440343857, "step": 23785 }, { "epoch": 0.22390588235294118, "grad_norm": 0.8444296360059883, "learning_rate": 4.2277425680645715e-06, "loss": 0.02796330153942108, "step": 23790 }, { "epoch": 0.22395294117647058, "grad_norm": 0.6513916834661949, "learning_rate": 4.227298342136533e-06, "loss": 0.025402259826660157, "step": 23795 }, { "epoch": 0.224, "grad_norm": 0.9464092389758232, "learning_rate": 4.226854256208893e-06, "loss": 0.03170944452285766, "step": 23800 }, { "epoch": 0.2240470588235294, "grad_norm": 0.6948363682676915, "learning_rate": 4.22641031020813e-06, "loss": 0.03467991352081299, "step": 23805 }, { "epoch": 0.22409411764705883, "grad_norm": 0.5914436897993867, "learning_rate": 4.225966504060776e-06, "loss": 0.019663548469543456, "step": 23810 }, { "epoch": 0.22414117647058823, "grad_norm": 0.48507884178410565, "learning_rate": 4.2255228376934195e-06, "loss": 0.028574037551879882, "step": 23815 }, { "epoch": 0.22418823529411766, "grad_norm": 0.5590156141964917, "learning_rate": 4.225079311032702e-06, "loss": 0.03340115547180176, "step": 23820 }, { "epoch": 0.22423529411764706, "grad_norm": 0.4124723474953579, "learning_rate": 4.224635924005316e-06, "loss": 0.0311515212059021, "step": 23825 }, { "epoch": 0.22428235294117646, "grad_norm": 0.5045335713584066, "learning_rate": 4.22419267653801e-06, "loss": 0.024477542936801912, "step": 23830 }, { "epoch": 0.2243294117647059, "grad_norm": 0.5854449952704583, "learning_rate": 4.223749568557588e-06, "loss": 0.028143006563186645, "step": 23835 }, { "epoch": 0.2243764705882353, "grad_norm": 0.6458928662757532, "learning_rate": 4.2233065999909055e-06, "loss": 0.025325876474380494, "step": 23840 }, { "epoch": 0.22442352941176472, "grad_norm": 0.6095692746223612, "learning_rate": 4.222863770764872e-06, "loss": 0.025737693905830382, "step": 23845 }, { "epoch": 0.22447058823529412, "grad_norm": 0.6304957567583694, "learning_rate": 4.222421080806452e-06, "loss": 0.024945661425590515, "step": 23850 }, { "epoch": 0.22451764705882352, "grad_norm": 0.44679838463490906, "learning_rate": 4.221978530042662e-06, "loss": 0.0272970974445343, "step": 23855 }, { "epoch": 0.22456470588235294, "grad_norm": 0.5878670058612234, "learning_rate": 4.221536118400571e-06, "loss": 0.023194664716720582, "step": 23860 }, { "epoch": 0.22461176470588234, "grad_norm": 0.6692586217016279, "learning_rate": 4.221093845807305e-06, "loss": 0.025814270973205565, "step": 23865 }, { "epoch": 0.22465882352941177, "grad_norm": 0.5971275615042246, "learning_rate": 4.220651712190041e-06, "loss": 0.02501099109649658, "step": 23870 }, { "epoch": 0.22470588235294117, "grad_norm": 0.810684966415909, "learning_rate": 4.220209717476009e-06, "loss": 0.025136253237724303, "step": 23875 }, { "epoch": 0.2247529411764706, "grad_norm": 0.4500799301498483, "learning_rate": 4.219767861592493e-06, "loss": 0.021077489852905272, "step": 23880 }, { "epoch": 0.2248, "grad_norm": 0.7148298274004335, "learning_rate": 4.2193261444668324e-06, "loss": 0.027055084705352783, "step": 23885 }, { "epoch": 0.2248470588235294, "grad_norm": 1.038292778307766, "learning_rate": 4.218884566026417e-06, "loss": 0.023848208785057067, "step": 23890 }, { "epoch": 0.22489411764705883, "grad_norm": 0.6861470996705464, "learning_rate": 4.218443126198689e-06, "loss": 0.029845064878463744, "step": 23895 }, { "epoch": 0.22494117647058823, "grad_norm": 0.599285077724589, "learning_rate": 4.218001824911147e-06, "loss": 0.02996998429298401, "step": 23900 }, { "epoch": 0.22498823529411766, "grad_norm": 0.5876524429123716, "learning_rate": 4.217560662091341e-06, "loss": 0.027393895387649535, "step": 23905 }, { "epoch": 0.22503529411764706, "grad_norm": 0.7463497952175779, "learning_rate": 4.217119637666874e-06, "loss": 0.026116371154785156, "step": 23910 }, { "epoch": 0.22508235294117648, "grad_norm": 0.6749202814659456, "learning_rate": 4.216678751565402e-06, "loss": 0.0225245401263237, "step": 23915 }, { "epoch": 0.22512941176470588, "grad_norm": 0.6920612287581855, "learning_rate": 4.2162380037146335e-06, "loss": 0.02657661437988281, "step": 23920 }, { "epoch": 0.22517647058823528, "grad_norm": 0.6626373550384919, "learning_rate": 4.215797394042331e-06, "loss": 0.022860164940357208, "step": 23925 }, { "epoch": 0.2252235294117647, "grad_norm": 0.7869490759590023, "learning_rate": 4.21535692247631e-06, "loss": 0.02167007029056549, "step": 23930 }, { "epoch": 0.2252705882352941, "grad_norm": 0.7135565164750903, "learning_rate": 4.214916588944436e-06, "loss": 0.030036038160324095, "step": 23935 }, { "epoch": 0.22531764705882354, "grad_norm": 0.3414700586531267, "learning_rate": 4.214476393374631e-06, "loss": 0.02107730507850647, "step": 23940 }, { "epoch": 0.22536470588235294, "grad_norm": 0.5990030499505354, "learning_rate": 4.2140363356948686e-06, "loss": 0.021422174572944642, "step": 23945 }, { "epoch": 0.22541176470588234, "grad_norm": 0.7275820574305405, "learning_rate": 4.213596415833172e-06, "loss": 0.023993882536888122, "step": 23950 }, { "epoch": 0.22545882352941177, "grad_norm": 0.8238979256055069, "learning_rate": 4.21315663371762e-06, "loss": 0.025227671861648558, "step": 23955 }, { "epoch": 0.22550588235294117, "grad_norm": 0.7552296130812028, "learning_rate": 4.2127169892763445e-06, "loss": 0.02339223921298981, "step": 23960 }, { "epoch": 0.2255529411764706, "grad_norm": 0.38651354924725173, "learning_rate": 4.212277482437529e-06, "loss": 0.02406451404094696, "step": 23965 }, { "epoch": 0.2256, "grad_norm": 0.609548062379294, "learning_rate": 4.211838113129407e-06, "loss": 0.023411962389945983, "step": 23970 }, { "epoch": 0.22564705882352942, "grad_norm": 1.0474309649686793, "learning_rate": 4.211398881280268e-06, "loss": 0.021102502942085266, "step": 23975 }, { "epoch": 0.22569411764705882, "grad_norm": 0.5751815794969578, "learning_rate": 4.210959786818451e-06, "loss": 0.02392442375421524, "step": 23980 }, { "epoch": 0.22574117647058822, "grad_norm": 1.2727752288400518, "learning_rate": 4.21052082967235e-06, "loss": 0.031134000420570372, "step": 23985 }, { "epoch": 0.22578823529411765, "grad_norm": 0.79354811604246, "learning_rate": 4.21008200977041e-06, "loss": 0.03197096288204193, "step": 23990 }, { "epoch": 0.22583529411764705, "grad_norm": 0.5333121699967958, "learning_rate": 4.209643327041128e-06, "loss": 0.024030271172523498, "step": 23995 }, { "epoch": 0.22588235294117648, "grad_norm": 0.6439716280358421, "learning_rate": 4.209204781413052e-06, "loss": 0.02740721106529236, "step": 24000 }, { "epoch": 0.22592941176470588, "grad_norm": 0.5016440382154816, "learning_rate": 4.208766372814784e-06, "loss": 0.02290305495262146, "step": 24005 }, { "epoch": 0.2259764705882353, "grad_norm": 0.7742192776922691, "learning_rate": 4.208328101174978e-06, "loss": 0.027514809370040895, "step": 24010 }, { "epoch": 0.2260235294117647, "grad_norm": 1.000697439792815, "learning_rate": 4.2078899664223385e-06, "loss": 0.02416212558746338, "step": 24015 }, { "epoch": 0.2260705882352941, "grad_norm": 0.7317450164367129, "learning_rate": 4.207451968485622e-06, "loss": 0.0317994624376297, "step": 24020 }, { "epoch": 0.22611764705882353, "grad_norm": 0.46115120221504086, "learning_rate": 4.20701410729364e-06, "loss": 0.02573187053203583, "step": 24025 }, { "epoch": 0.22616470588235293, "grad_norm": 0.7839902199796457, "learning_rate": 4.206576382775252e-06, "loss": 0.026280435919761657, "step": 24030 }, { "epoch": 0.22621176470588236, "grad_norm": 0.535187211316182, "learning_rate": 4.206138794859371e-06, "loss": 0.02411198616027832, "step": 24035 }, { "epoch": 0.22625882352941176, "grad_norm": 0.5918772331066487, "learning_rate": 4.205701343474962e-06, "loss": 0.020785228908061983, "step": 24040 }, { "epoch": 0.2263058823529412, "grad_norm": 0.6287657946471568, "learning_rate": 4.2052640285510396e-06, "loss": 0.02655777335166931, "step": 24045 }, { "epoch": 0.2263529411764706, "grad_norm": 0.9731437027701262, "learning_rate": 4.2048268500166744e-06, "loss": 0.031489604711532594, "step": 24050 }, { "epoch": 0.2264, "grad_norm": 0.3990321244711865, "learning_rate": 4.204389807800985e-06, "loss": 0.025499242544174194, "step": 24055 }, { "epoch": 0.22644705882352942, "grad_norm": 0.4866370967535878, "learning_rate": 4.203952901833142e-06, "loss": 0.02256474494934082, "step": 24060 }, { "epoch": 0.22649411764705882, "grad_norm": 0.6272153430229329, "learning_rate": 4.203516132042368e-06, "loss": 0.026372984051704407, "step": 24065 }, { "epoch": 0.22654117647058825, "grad_norm": 0.6021792276351178, "learning_rate": 4.203079498357938e-06, "loss": 0.02774525582790375, "step": 24070 }, { "epoch": 0.22658823529411765, "grad_norm": 0.4989403049373945, "learning_rate": 4.202643000709176e-06, "loss": 0.0268636554479599, "step": 24075 }, { "epoch": 0.22663529411764705, "grad_norm": 0.4911341561609998, "learning_rate": 4.2022066390254595e-06, "loss": 0.025997793674468993, "step": 24080 }, { "epoch": 0.22668235294117647, "grad_norm": 0.7302131625850127, "learning_rate": 4.201770413236217e-06, "loss": 0.029438328742980958, "step": 24085 }, { "epoch": 0.22672941176470587, "grad_norm": 0.49346203203910805, "learning_rate": 4.201334323270929e-06, "loss": 0.02462112307548523, "step": 24090 }, { "epoch": 0.2267764705882353, "grad_norm": 0.5456209245752229, "learning_rate": 4.200898369059124e-06, "loss": 0.024438247084617615, "step": 24095 }, { "epoch": 0.2268235294117647, "grad_norm": 0.6930932365148735, "learning_rate": 4.200462550530386e-06, "loss": 0.022245480120182036, "step": 24100 }, { "epoch": 0.22687058823529413, "grad_norm": 0.6245334991449473, "learning_rate": 4.200026867614347e-06, "loss": 0.023777127265930176, "step": 24105 }, { "epoch": 0.22691764705882353, "grad_norm": 0.7012554784251517, "learning_rate": 4.19959132024069e-06, "loss": 0.030697667598724367, "step": 24110 }, { "epoch": 0.22696470588235293, "grad_norm": 0.6758621032799993, "learning_rate": 4.199155908339151e-06, "loss": 0.022606474161148072, "step": 24115 }, { "epoch": 0.22701176470588236, "grad_norm": 0.6236105505466865, "learning_rate": 4.198720631839517e-06, "loss": 0.02736375331878662, "step": 24120 }, { "epoch": 0.22705882352941176, "grad_norm": 0.829365921099409, "learning_rate": 4.198285490671625e-06, "loss": 0.022533461451530457, "step": 24125 }, { "epoch": 0.22710588235294119, "grad_norm": 0.6696392144455775, "learning_rate": 4.197850484765361e-06, "loss": 0.022144250571727753, "step": 24130 }, { "epoch": 0.22715294117647059, "grad_norm": 0.6520920238021849, "learning_rate": 4.197415614050666e-06, "loss": 0.026013511419296264, "step": 24135 }, { "epoch": 0.2272, "grad_norm": 0.7931658233521854, "learning_rate": 4.1969808784575275e-06, "loss": 0.02693573236465454, "step": 24140 }, { "epoch": 0.2272470588235294, "grad_norm": 0.6284044361224554, "learning_rate": 4.196546277915987e-06, "loss": 0.027348506450653075, "step": 24145 }, { "epoch": 0.2272941176470588, "grad_norm": 0.5483063814712926, "learning_rate": 4.1961118123561354e-06, "loss": 0.027961799502372743, "step": 24150 }, { "epoch": 0.22734117647058824, "grad_norm": 0.7509494221767639, "learning_rate": 4.195677481708116e-06, "loss": 0.034600216150283816, "step": 24155 }, { "epoch": 0.22738823529411764, "grad_norm": 0.49699245919593477, "learning_rate": 4.195243285902117e-06, "loss": 0.02104736566543579, "step": 24160 }, { "epoch": 0.22743529411764707, "grad_norm": 0.7534497027804614, "learning_rate": 4.194809224868384e-06, "loss": 0.022609177231788635, "step": 24165 }, { "epoch": 0.22748235294117647, "grad_norm": 0.561712705007703, "learning_rate": 4.19437529853721e-06, "loss": 0.024001270532608032, "step": 24170 }, { "epoch": 0.22752941176470587, "grad_norm": 0.5775460831734552, "learning_rate": 4.193941506838938e-06, "loss": 0.026763397455215453, "step": 24175 }, { "epoch": 0.2275764705882353, "grad_norm": 0.6559153317465097, "learning_rate": 4.193507849703965e-06, "loss": 0.029137545824050905, "step": 24180 }, { "epoch": 0.2276235294117647, "grad_norm": 0.5441376773325696, "learning_rate": 4.1930743270627315e-06, "loss": 0.025788259506225587, "step": 24185 }, { "epoch": 0.22767058823529412, "grad_norm": 0.6097864356651357, "learning_rate": 4.192640938845736e-06, "loss": 0.02648831009864807, "step": 24190 }, { "epoch": 0.22771764705882352, "grad_norm": 0.4785252027917601, "learning_rate": 4.19220768498352e-06, "loss": 0.02244311720132828, "step": 24195 }, { "epoch": 0.22776470588235295, "grad_norm": 0.6247486747891515, "learning_rate": 4.191774565406683e-06, "loss": 0.025957608222961427, "step": 24200 }, { "epoch": 0.22781176470588235, "grad_norm": 0.38377296051314974, "learning_rate": 4.191341580045867e-06, "loss": 0.025079917907714844, "step": 24205 }, { "epoch": 0.22785882352941175, "grad_norm": 0.5404783001862756, "learning_rate": 4.1909087288317706e-06, "loss": 0.020506860315799715, "step": 24210 }, { "epoch": 0.22790588235294118, "grad_norm": 0.5617527527756442, "learning_rate": 4.190476011695138e-06, "loss": 0.030151250958442687, "step": 24215 }, { "epoch": 0.22795294117647058, "grad_norm": 0.5061854630597304, "learning_rate": 4.190043428566764e-06, "loss": 0.019661229848861695, "step": 24220 }, { "epoch": 0.228, "grad_norm": 0.8489672166217852, "learning_rate": 4.189610979377498e-06, "loss": 0.03321096003055572, "step": 24225 }, { "epoch": 0.2280470588235294, "grad_norm": 0.6085235745620741, "learning_rate": 4.189178664058233e-06, "loss": 0.02442101836204529, "step": 24230 }, { "epoch": 0.22809411764705884, "grad_norm": 0.7256141284672445, "learning_rate": 4.188746482539916e-06, "loss": 0.027372080087661742, "step": 24235 }, { "epoch": 0.22814117647058824, "grad_norm": 0.47476018621673477, "learning_rate": 4.188314434753541e-06, "loss": 0.02273421585559845, "step": 24240 }, { "epoch": 0.22818823529411764, "grad_norm": 1.1212409783497186, "learning_rate": 4.187882520630155e-06, "loss": 0.02446773648262024, "step": 24245 }, { "epoch": 0.22823529411764706, "grad_norm": 0.7128275044189375, "learning_rate": 4.187450740100852e-06, "loss": 0.02479354739189148, "step": 24250 }, { "epoch": 0.22828235294117646, "grad_norm": 0.6790981816378517, "learning_rate": 4.1870190930967765e-06, "loss": 0.029614317417144775, "step": 24255 }, { "epoch": 0.2283294117647059, "grad_norm": 1.2874530041875338, "learning_rate": 4.186587579549126e-06, "loss": 0.025708526372909546, "step": 24260 }, { "epoch": 0.2283764705882353, "grad_norm": 0.7574378740650208, "learning_rate": 4.186156199389141e-06, "loss": 0.028972160816192628, "step": 24265 }, { "epoch": 0.22842352941176472, "grad_norm": 4.690219826445193, "learning_rate": 4.185724952548118e-06, "loss": 0.025348186492919922, "step": 24270 }, { "epoch": 0.22847058823529412, "grad_norm": 0.495485996464799, "learning_rate": 4.185293838957399e-06, "loss": 0.028125345706939697, "step": 24275 }, { "epoch": 0.22851764705882352, "grad_norm": 0.794626737734442, "learning_rate": 4.184862858548378e-06, "loss": 0.026852059364318847, "step": 24280 }, { "epoch": 0.22856470588235295, "grad_norm": 0.7920046942975412, "learning_rate": 4.184432011252495e-06, "loss": 0.027585113048553468, "step": 24285 }, { "epoch": 0.22861176470588235, "grad_norm": 0.5288810453221074, "learning_rate": 4.1840012970012435e-06, "loss": 0.02865113615989685, "step": 24290 }, { "epoch": 0.22865882352941178, "grad_norm": 0.563617871938037, "learning_rate": 4.183570715726164e-06, "loss": 0.02392212748527527, "step": 24295 }, { "epoch": 0.22870588235294118, "grad_norm": 0.5279724985077944, "learning_rate": 4.183140267358845e-06, "loss": 0.023708760738372803, "step": 24300 }, { "epoch": 0.22875294117647058, "grad_norm": 0.5496887660982321, "learning_rate": 4.182709951830929e-06, "loss": 0.024320843815803527, "step": 24305 }, { "epoch": 0.2288, "grad_norm": 0.4948637949711983, "learning_rate": 4.182279769074104e-06, "loss": 0.025779205560684203, "step": 24310 }, { "epoch": 0.2288470588235294, "grad_norm": 0.7444930286869454, "learning_rate": 4.181849719020106e-06, "loss": 0.028444743156433104, "step": 24315 }, { "epoch": 0.22889411764705883, "grad_norm": 0.47497026687453997, "learning_rate": 4.181419801600724e-06, "loss": 0.024480901658535004, "step": 24320 }, { "epoch": 0.22894117647058823, "grad_norm": 0.5648645562488372, "learning_rate": 4.180990016747793e-06, "loss": 0.026875433325767518, "step": 24325 }, { "epoch": 0.22898823529411766, "grad_norm": 0.7633887075684953, "learning_rate": 4.1805603643931995e-06, "loss": 0.024910739064216612, "step": 24330 }, { "epoch": 0.22903529411764706, "grad_norm": 0.48399933801085904, "learning_rate": 4.180130844468877e-06, "loss": 0.027294325828552245, "step": 24335 }, { "epoch": 0.22908235294117646, "grad_norm": 0.5064769944553901, "learning_rate": 4.179701456906808e-06, "loss": 0.022746717929840087, "step": 24340 }, { "epoch": 0.2291294117647059, "grad_norm": 0.7263568868458077, "learning_rate": 4.179272201639026e-06, "loss": 0.022446243464946745, "step": 24345 }, { "epoch": 0.2291764705882353, "grad_norm": 0.7883370977948929, "learning_rate": 4.178843078597609e-06, "loss": 0.025973063707351685, "step": 24350 }, { "epoch": 0.22922352941176471, "grad_norm": 0.6635573151990918, "learning_rate": 4.1784140877146905e-06, "loss": 0.03246172666549683, "step": 24355 }, { "epoch": 0.22927058823529411, "grad_norm": 0.7401676333046303, "learning_rate": 4.177985228922446e-06, "loss": 0.02366113066673279, "step": 24360 }, { "epoch": 0.22931764705882354, "grad_norm": 0.7162613536152843, "learning_rate": 4.1775565021531025e-06, "loss": 0.028840464353561402, "step": 24365 }, { "epoch": 0.22936470588235294, "grad_norm": 0.5991037406630852, "learning_rate": 4.177127907338938e-06, "loss": 0.02439592480659485, "step": 24370 }, { "epoch": 0.22941176470588234, "grad_norm": 0.6434458948704256, "learning_rate": 4.176699444412275e-06, "loss": 0.024862495064735413, "step": 24375 }, { "epoch": 0.22945882352941177, "grad_norm": 0.8749100290992871, "learning_rate": 4.176271113305488e-06, "loss": 0.027107536792755127, "step": 24380 }, { "epoch": 0.22950588235294117, "grad_norm": 0.6621905189089663, "learning_rate": 4.175842913950996e-06, "loss": 0.02287769615650177, "step": 24385 }, { "epoch": 0.2295529411764706, "grad_norm": 0.5466722147790212, "learning_rate": 4.175414846281274e-06, "loss": 0.029353317618370057, "step": 24390 }, { "epoch": 0.2296, "grad_norm": 0.6370621304767059, "learning_rate": 4.174986910228835e-06, "loss": 0.02417459785938263, "step": 24395 }, { "epoch": 0.2296470588235294, "grad_norm": 0.41649755378155856, "learning_rate": 4.174559105726249e-06, "loss": 0.026344579458236695, "step": 24400 }, { "epoch": 0.22969411764705883, "grad_norm": 0.7544203704489826, "learning_rate": 4.17413143270613e-06, "loss": 0.027169051766395568, "step": 24405 }, { "epoch": 0.22974117647058823, "grad_norm": 0.6447799181835214, "learning_rate": 4.173703891101142e-06, "loss": 0.028590837121009828, "step": 24410 }, { "epoch": 0.22978823529411765, "grad_norm": 0.4140177877367649, "learning_rate": 4.173276480843998e-06, "loss": 0.026443344354629517, "step": 24415 }, { "epoch": 0.22983529411764705, "grad_norm": 0.6415059957106217, "learning_rate": 4.172849201867456e-06, "loss": 0.02678954303264618, "step": 24420 }, { "epoch": 0.22988235294117648, "grad_norm": 0.6756019368785577, "learning_rate": 4.1724220541043255e-06, "loss": 0.020069953799247742, "step": 24425 }, { "epoch": 0.22992941176470588, "grad_norm": 0.9384016730358175, "learning_rate": 4.171995037487462e-06, "loss": 0.026723420619964598, "step": 24430 }, { "epoch": 0.22997647058823528, "grad_norm": 0.6481848272972011, "learning_rate": 4.171568151949771e-06, "loss": 0.027645567059516908, "step": 24435 }, { "epoch": 0.2300235294117647, "grad_norm": 0.6890667298851493, "learning_rate": 4.171141397424205e-06, "loss": 0.02409379780292511, "step": 24440 }, { "epoch": 0.2300705882352941, "grad_norm": 0.7678337010165505, "learning_rate": 4.170714773843763e-06, "loss": 0.023807787895202638, "step": 24445 }, { "epoch": 0.23011764705882354, "grad_norm": 0.4605984217071726, "learning_rate": 4.170288281141496e-06, "loss": 0.025286749005317688, "step": 24450 }, { "epoch": 0.23016470588235294, "grad_norm": 0.5492572972955735, "learning_rate": 4.169861919250497e-06, "loss": 0.027755197882652283, "step": 24455 }, { "epoch": 0.23021176470588237, "grad_norm": 0.5857198892491745, "learning_rate": 4.169435688103914e-06, "loss": 0.026152157783508302, "step": 24460 }, { "epoch": 0.23025882352941177, "grad_norm": 0.5694099595532691, "learning_rate": 4.169009587634936e-06, "loss": 0.028590884804725648, "step": 24465 }, { "epoch": 0.23030588235294117, "grad_norm": 0.5279074057537037, "learning_rate": 4.168583617776806e-06, "loss": 0.02138054370880127, "step": 24470 }, { "epoch": 0.2303529411764706, "grad_norm": 0.5895114496646664, "learning_rate": 4.168157778462807e-06, "loss": 0.02672288715839386, "step": 24475 }, { "epoch": 0.2304, "grad_norm": 0.5643653499735144, "learning_rate": 4.167732069626279e-06, "loss": 0.024282246828079224, "step": 24480 }, { "epoch": 0.23044705882352942, "grad_norm": 0.6622533214810917, "learning_rate": 4.1673064912006035e-06, "loss": 0.027398473024368285, "step": 24485 }, { "epoch": 0.23049411764705882, "grad_norm": 0.7213014526342135, "learning_rate": 4.1668810431192095e-06, "loss": 0.02415231317281723, "step": 24490 }, { "epoch": 0.23054117647058822, "grad_norm": 0.5205514935039699, "learning_rate": 4.166455725315577e-06, "loss": 0.021108171343803404, "step": 24495 }, { "epoch": 0.23058823529411765, "grad_norm": 0.7061128409917969, "learning_rate": 4.1660305377232295e-06, "loss": 0.02648155391216278, "step": 24500 }, { "epoch": 0.23063529411764705, "grad_norm": 0.6060089900403076, "learning_rate": 4.165605480275743e-06, "loss": 0.024309101700782775, "step": 24505 }, { "epoch": 0.23068235294117648, "grad_norm": 0.8378317385035726, "learning_rate": 4.165180552906735e-06, "loss": 0.026397424936294555, "step": 24510 }, { "epoch": 0.23072941176470588, "grad_norm": 0.7015380537026303, "learning_rate": 4.164755755549876e-06, "loss": 0.01828461289405823, "step": 24515 }, { "epoch": 0.2307764705882353, "grad_norm": 0.6415370230276601, "learning_rate": 4.164331088138881e-06, "loss": 0.028986644744873048, "step": 24520 }, { "epoch": 0.2308235294117647, "grad_norm": 0.7600452205604978, "learning_rate": 4.163906550607511e-06, "loss": 0.02817111611366272, "step": 24525 }, { "epoch": 0.2308705882352941, "grad_norm": 0.7014741675005305, "learning_rate": 4.1634821428895776e-06, "loss": 0.026314735412597656, "step": 24530 }, { "epoch": 0.23091764705882353, "grad_norm": 1.1508096057063115, "learning_rate": 4.1630578649189355e-06, "loss": 0.04059304893016815, "step": 24535 }, { "epoch": 0.23096470588235293, "grad_norm": 0.43714464689517196, "learning_rate": 4.1626337166294925e-06, "loss": 0.021563884615898133, "step": 24540 }, { "epoch": 0.23101176470588236, "grad_norm": 0.6033475620426699, "learning_rate": 4.162209697955196e-06, "loss": 0.023887056112289428, "step": 24545 }, { "epoch": 0.23105882352941176, "grad_norm": 0.628415546257265, "learning_rate": 4.161785808830047e-06, "loss": 0.024495404958724976, "step": 24550 }, { "epoch": 0.2311058823529412, "grad_norm": 0.6763860139699258, "learning_rate": 4.161362049188091e-06, "loss": 0.02731376886367798, "step": 24555 }, { "epoch": 0.2311529411764706, "grad_norm": 0.8247623006185846, "learning_rate": 4.160938418963419e-06, "loss": 0.02649538516998291, "step": 24560 }, { "epoch": 0.2312, "grad_norm": 0.44226866063410136, "learning_rate": 4.160514918090173e-06, "loss": 0.028159263730049133, "step": 24565 }, { "epoch": 0.23124705882352942, "grad_norm": 0.6449922164760175, "learning_rate": 4.160091546502537e-06, "loss": 0.026466375589370726, "step": 24570 }, { "epoch": 0.23129411764705882, "grad_norm": 0.8344248583820038, "learning_rate": 4.159668304134744e-06, "loss": 0.025289854407310484, "step": 24575 }, { "epoch": 0.23134117647058824, "grad_norm": 0.9378912944473666, "learning_rate": 4.159245190921075e-06, "loss": 0.023830652236938477, "step": 24580 }, { "epoch": 0.23138823529411764, "grad_norm": 0.5579563389422684, "learning_rate": 4.158822206795857e-06, "loss": 0.023309966921806334, "step": 24585 }, { "epoch": 0.23143529411764707, "grad_norm": 0.5404793574415516, "learning_rate": 4.158399351693465e-06, "loss": 0.019748860597610475, "step": 24590 }, { "epoch": 0.23148235294117647, "grad_norm": 0.7485569124025733, "learning_rate": 4.157976625548317e-06, "loss": 0.02623176872730255, "step": 24595 }, { "epoch": 0.23152941176470587, "grad_norm": 0.6098950387447487, "learning_rate": 4.1575540282948804e-06, "loss": 0.02080707848072052, "step": 24600 }, { "epoch": 0.2315764705882353, "grad_norm": 1.0529821911380637, "learning_rate": 4.157131559867669e-06, "loss": 0.028889405727386474, "step": 24605 }, { "epoch": 0.2316235294117647, "grad_norm": 0.6012584291723843, "learning_rate": 4.1567092202012435e-06, "loss": 0.03086382746696472, "step": 24610 }, { "epoch": 0.23167058823529413, "grad_norm": 0.6619234984680505, "learning_rate": 4.156287009230209e-06, "loss": 0.030497175455093384, "step": 24615 }, { "epoch": 0.23171764705882353, "grad_norm": 0.6340225861183322, "learning_rate": 4.1558649268892215e-06, "loss": 0.025644531846046446, "step": 24620 }, { "epoch": 0.23176470588235293, "grad_norm": 0.4679365055580092, "learning_rate": 4.155442973112978e-06, "loss": 0.0233277827501297, "step": 24625 }, { "epoch": 0.23181176470588236, "grad_norm": 0.398690822803576, "learning_rate": 4.155021147836225e-06, "loss": 0.020314502716064452, "step": 24630 }, { "epoch": 0.23185882352941176, "grad_norm": 0.775084319814113, "learning_rate": 4.154599450993756e-06, "loss": 0.024700793623924255, "step": 24635 }, { "epoch": 0.23190588235294118, "grad_norm": 0.5243102410280043, "learning_rate": 4.154177882520408e-06, "loss": 0.02311995029449463, "step": 24640 }, { "epoch": 0.23195294117647058, "grad_norm": 0.6233792134120464, "learning_rate": 4.153756442351067e-06, "loss": 0.02603701651096344, "step": 24645 }, { "epoch": 0.232, "grad_norm": 0.5145453285380497, "learning_rate": 4.153335130420665e-06, "loss": 0.024075095355510712, "step": 24650 }, { "epoch": 0.2320470588235294, "grad_norm": 0.6453803795597525, "learning_rate": 4.152913946664176e-06, "loss": 0.02703026533126831, "step": 24655 }, { "epoch": 0.2320941176470588, "grad_norm": 0.6071678512060767, "learning_rate": 4.152492891016628e-06, "loss": 0.025279930233955382, "step": 24660 }, { "epoch": 0.23214117647058824, "grad_norm": 0.6815214249467119, "learning_rate": 4.152071963413088e-06, "loss": 0.023231786489486695, "step": 24665 }, { "epoch": 0.23218823529411764, "grad_norm": 0.5496708707936255, "learning_rate": 4.151651163788672e-06, "loss": 0.023026828467845917, "step": 24670 }, { "epoch": 0.23223529411764707, "grad_norm": 0.7033169373354373, "learning_rate": 4.151230492078541e-06, "loss": 0.02274186760187149, "step": 24675 }, { "epoch": 0.23228235294117647, "grad_norm": 0.5886976985398682, "learning_rate": 4.1508099482179045e-06, "loss": 0.025962039828300476, "step": 24680 }, { "epoch": 0.2323294117647059, "grad_norm": 0.7084715748618109, "learning_rate": 4.150389532142015e-06, "loss": 0.02443370521068573, "step": 24685 }, { "epoch": 0.2323764705882353, "grad_norm": 0.5543205210239687, "learning_rate": 4.149969243786173e-06, "loss": 0.026761114597320557, "step": 24690 }, { "epoch": 0.2324235294117647, "grad_norm": 0.6066150670476981, "learning_rate": 4.149549083085722e-06, "loss": 0.018005380034446718, "step": 24695 }, { "epoch": 0.23247058823529412, "grad_norm": 0.4645694718242528, "learning_rate": 4.149129049976055e-06, "loss": 0.028266727924346924, "step": 24700 }, { "epoch": 0.23251764705882352, "grad_norm": 0.4432299465998549, "learning_rate": 4.148709144392607e-06, "loss": 0.02276419848203659, "step": 24705 }, { "epoch": 0.23256470588235295, "grad_norm": 0.5054360772965765, "learning_rate": 4.148289366270864e-06, "loss": 0.032479527592659, "step": 24710 }, { "epoch": 0.23261176470588235, "grad_norm": 0.6346908990028353, "learning_rate": 4.14786971554635e-06, "loss": 0.023832011222839355, "step": 24715 }, { "epoch": 0.23265882352941175, "grad_norm": 0.5355356399852517, "learning_rate": 4.147450192154642e-06, "loss": 0.03009004592895508, "step": 24720 }, { "epoch": 0.23270588235294118, "grad_norm": 0.5773350375687498, "learning_rate": 4.1470307960313595e-06, "loss": 0.026139742136001586, "step": 24725 }, { "epoch": 0.23275294117647058, "grad_norm": 0.6349182123394479, "learning_rate": 4.146611527112167e-06, "loss": 0.028456595540046693, "step": 24730 }, { "epoch": 0.2328, "grad_norm": 0.5247780079563478, "learning_rate": 4.146192385332775e-06, "loss": 0.020452576875686645, "step": 24735 }, { "epoch": 0.2328470588235294, "grad_norm": 0.5199540481001794, "learning_rate": 4.145773370628939e-06, "loss": 0.02126637250185013, "step": 24740 }, { "epoch": 0.23289411764705883, "grad_norm": 0.777591119632445, "learning_rate": 4.145354482936462e-06, "loss": 0.02673637866973877, "step": 24745 }, { "epoch": 0.23294117647058823, "grad_norm": 0.5923830386081093, "learning_rate": 4.144935722191191e-06, "loss": 0.022856110334396364, "step": 24750 }, { "epoch": 0.23298823529411763, "grad_norm": 0.5475442020301969, "learning_rate": 4.144517088329019e-06, "loss": 0.024575471878051758, "step": 24755 }, { "epoch": 0.23303529411764706, "grad_norm": 0.49600783769220746, "learning_rate": 4.14409858128588e-06, "loss": 0.02390858381986618, "step": 24760 }, { "epoch": 0.23308235294117646, "grad_norm": 0.830978489539896, "learning_rate": 4.143680200997762e-06, "loss": 0.02463173121213913, "step": 24765 }, { "epoch": 0.2331294117647059, "grad_norm": 0.6640446827420015, "learning_rate": 4.14326194740069e-06, "loss": 0.022525940835475922, "step": 24770 }, { "epoch": 0.2331764705882353, "grad_norm": 0.5399281285578296, "learning_rate": 4.142843820430738e-06, "loss": 0.02528883218765259, "step": 24775 }, { "epoch": 0.23322352941176472, "grad_norm": 0.6771578748243285, "learning_rate": 4.142425820024026e-06, "loss": 0.022760735452175142, "step": 24780 }, { "epoch": 0.23327058823529412, "grad_norm": 0.5756848271381895, "learning_rate": 4.142007946116714e-06, "loss": 0.0236873984336853, "step": 24785 }, { "epoch": 0.23331764705882352, "grad_norm": 0.5483872471444842, "learning_rate": 4.141590198645014e-06, "loss": 0.021856272220611574, "step": 24790 }, { "epoch": 0.23336470588235295, "grad_norm": 0.7000796864521865, "learning_rate": 4.141172577545179e-06, "loss": 0.02876678705215454, "step": 24795 }, { "epoch": 0.23341176470588235, "grad_norm": 0.5349887495485458, "learning_rate": 4.1407550827535075e-06, "loss": 0.018963347375392913, "step": 24800 }, { "epoch": 0.23345882352941177, "grad_norm": 0.7504661674506349, "learning_rate": 4.140337714206344e-06, "loss": 0.02008657157421112, "step": 24805 }, { "epoch": 0.23350588235294117, "grad_norm": 0.4876488718684911, "learning_rate": 4.139920471840076e-06, "loss": 0.021254602074623107, "step": 24810 }, { "epoch": 0.2335529411764706, "grad_norm": 0.7387569153273843, "learning_rate": 4.139503355591136e-06, "loss": 0.025714582204818724, "step": 24815 }, { "epoch": 0.2336, "grad_norm": 0.5453641001732336, "learning_rate": 4.139086365396004e-06, "loss": 0.021940185129642485, "step": 24820 }, { "epoch": 0.2336470588235294, "grad_norm": 0.501509650781079, "learning_rate": 4.1386695011912025e-06, "loss": 0.02412145733833313, "step": 24825 }, { "epoch": 0.23369411764705883, "grad_norm": 0.6524071957334185, "learning_rate": 4.138252762913299e-06, "loss": 0.02648017406463623, "step": 24830 }, { "epoch": 0.23374117647058823, "grad_norm": 0.4514734587551145, "learning_rate": 4.137836150498907e-06, "loss": 0.02177366614341736, "step": 24835 }, { "epoch": 0.23378823529411766, "grad_norm": 0.5652207801587364, "learning_rate": 4.137419663884683e-06, "loss": 0.02740046977996826, "step": 24840 }, { "epoch": 0.23383529411764706, "grad_norm": 0.5924432820296855, "learning_rate": 4.137003303007329e-06, "loss": 0.02614252269268036, "step": 24845 }, { "epoch": 0.23388235294117646, "grad_norm": 0.6090843484910698, "learning_rate": 4.1365870678035904e-06, "loss": 0.023389843106269837, "step": 24850 }, { "epoch": 0.23392941176470589, "grad_norm": 1.187469474131402, "learning_rate": 4.136170958210259e-06, "loss": 0.022695085406303404, "step": 24855 }, { "epoch": 0.23397647058823529, "grad_norm": 0.5368309894358247, "learning_rate": 4.135754974164169e-06, "loss": 0.023541258275508882, "step": 24860 }, { "epoch": 0.2340235294117647, "grad_norm": 0.7827416640056601, "learning_rate": 4.135339115602202e-06, "loss": 0.025046724081039428, "step": 24865 }, { "epoch": 0.2340705882352941, "grad_norm": 0.5667573907509947, "learning_rate": 4.134923382461281e-06, "loss": 0.02105640321969986, "step": 24870 }, { "epoch": 0.23411764705882354, "grad_norm": 0.8394396092754033, "learning_rate": 4.134507774678376e-06, "loss": 0.025649961829185487, "step": 24875 }, { "epoch": 0.23416470588235294, "grad_norm": 0.742234227227633, "learning_rate": 4.134092292190499e-06, "loss": 0.02449401617050171, "step": 24880 }, { "epoch": 0.23421176470588234, "grad_norm": 0.6609512484609361, "learning_rate": 4.133676934934707e-06, "loss": 0.02789369225502014, "step": 24885 }, { "epoch": 0.23425882352941177, "grad_norm": 0.734719279551896, "learning_rate": 4.133261702848101e-06, "loss": 0.02483827918767929, "step": 24890 }, { "epoch": 0.23430588235294117, "grad_norm": 0.7111879452795741, "learning_rate": 4.132846595867829e-06, "loss": 0.025005525350570677, "step": 24895 }, { "epoch": 0.2343529411764706, "grad_norm": 0.7143936955355721, "learning_rate": 4.132431613931079e-06, "loss": 0.021090298891067505, "step": 24900 }, { "epoch": 0.2344, "grad_norm": 0.40059199361787223, "learning_rate": 4.1320167569750855e-06, "loss": 0.02178596556186676, "step": 24905 }, { "epoch": 0.23444705882352943, "grad_norm": 0.7176265047062693, "learning_rate": 4.131602024937128e-06, "loss": 0.025844323635101318, "step": 24910 }, { "epoch": 0.23449411764705883, "grad_norm": 0.5733586261481043, "learning_rate": 4.131187417754527e-06, "loss": 0.029214051365852357, "step": 24915 }, { "epoch": 0.23454117647058823, "grad_norm": 0.6044771950505499, "learning_rate": 4.130772935364649e-06, "loss": 0.02384112775325775, "step": 24920 }, { "epoch": 0.23458823529411765, "grad_norm": 0.5712109938374853, "learning_rate": 4.1303585777049045e-06, "loss": 0.022075602412223817, "step": 24925 }, { "epoch": 0.23463529411764705, "grad_norm": 0.6620028179186237, "learning_rate": 4.129944344712748e-06, "loss": 0.020827484130859376, "step": 24930 }, { "epoch": 0.23468235294117648, "grad_norm": 0.516089931646685, "learning_rate": 4.129530236325677e-06, "loss": 0.0259610652923584, "step": 24935 }, { "epoch": 0.23472941176470588, "grad_norm": 0.6967025790254057, "learning_rate": 4.129116252481234e-06, "loss": 0.019789037108421326, "step": 24940 }, { "epoch": 0.23477647058823528, "grad_norm": 0.8113035191582255, "learning_rate": 4.128702393117005e-06, "loss": 0.02526848316192627, "step": 24945 }, { "epoch": 0.2348235294117647, "grad_norm": 0.5937998135587531, "learning_rate": 4.128288658170619e-06, "loss": 0.02145618498325348, "step": 24950 }, { "epoch": 0.2348705882352941, "grad_norm": 0.6528351962464, "learning_rate": 4.12787504757975e-06, "loss": 0.02709590196609497, "step": 24955 }, { "epoch": 0.23491764705882354, "grad_norm": 0.6079733126372702, "learning_rate": 4.1274615612821125e-06, "loss": 0.02572469711303711, "step": 24960 }, { "epoch": 0.23496470588235294, "grad_norm": 0.4848549959117932, "learning_rate": 4.1270481992154704e-06, "loss": 0.027259036898612976, "step": 24965 }, { "epoch": 0.23501176470588236, "grad_norm": 0.6712950894984362, "learning_rate": 4.126634961317626e-06, "loss": 0.025938194990158082, "step": 24970 }, { "epoch": 0.23505882352941176, "grad_norm": 0.5995809709592701, "learning_rate": 4.126221847526428e-06, "loss": 0.024634429812431337, "step": 24975 }, { "epoch": 0.23510588235294116, "grad_norm": 0.5906327459972023, "learning_rate": 4.125808857779768e-06, "loss": 0.02414916008710861, "step": 24980 }, { "epoch": 0.2351529411764706, "grad_norm": 0.43904368296096474, "learning_rate": 4.125395992015578e-06, "loss": 0.021728095412254334, "step": 24985 }, { "epoch": 0.2352, "grad_norm": 0.47250794373527966, "learning_rate": 4.124983250171841e-06, "loss": 0.01738305389881134, "step": 24990 }, { "epoch": 0.23524705882352942, "grad_norm": 0.6837553518088031, "learning_rate": 4.1245706321865764e-06, "loss": 0.02464146018028259, "step": 24995 }, { "epoch": 0.23529411764705882, "grad_norm": 0.5087050125096454, "learning_rate": 4.124158137997849e-06, "loss": 0.026585036516189577, "step": 25000 }, { "epoch": 0.23534117647058825, "grad_norm": 0.38015809118055305, "learning_rate": 4.1237457675437694e-06, "loss": 0.017923061549663544, "step": 25005 }, { "epoch": 0.23538823529411765, "grad_norm": 0.5934378540660854, "learning_rate": 4.123333520762487e-06, "loss": 0.026784199476242065, "step": 25010 }, { "epoch": 0.23543529411764705, "grad_norm": 0.5167771708709626, "learning_rate": 4.1229213975921975e-06, "loss": 0.02548765540122986, "step": 25015 }, { "epoch": 0.23548235294117648, "grad_norm": 0.7406446442537823, "learning_rate": 4.12250939797114e-06, "loss": 0.027188432216644288, "step": 25020 }, { "epoch": 0.23552941176470588, "grad_norm": 0.6076567890650881, "learning_rate": 4.122097521837595e-06, "loss": 0.018989989161491395, "step": 25025 }, { "epoch": 0.2355764705882353, "grad_norm": 0.4280408898181953, "learning_rate": 4.121685769129889e-06, "loss": 0.026850223541259766, "step": 25030 }, { "epoch": 0.2356235294117647, "grad_norm": 0.5547832897892891, "learning_rate": 4.121274139786388e-06, "loss": 0.019813743233680726, "step": 25035 }, { "epoch": 0.2356705882352941, "grad_norm": 0.6373588540193278, "learning_rate": 4.1208626337455034e-06, "loss": 0.02582675814628601, "step": 25040 }, { "epoch": 0.23571764705882353, "grad_norm": 0.4649020383775221, "learning_rate": 4.120451250945691e-06, "loss": 0.024104508757591247, "step": 25045 }, { "epoch": 0.23576470588235293, "grad_norm": 0.5319156923994361, "learning_rate": 4.120039991325444e-06, "loss": 0.020997197926044465, "step": 25050 }, { "epoch": 0.23581176470588236, "grad_norm": 0.6949155744838211, "learning_rate": 4.119628854823305e-06, "loss": 0.02593105435371399, "step": 25055 }, { "epoch": 0.23585882352941176, "grad_norm": 0.6652018389215615, "learning_rate": 4.1192178413778564e-06, "loss": 0.02290647029876709, "step": 25060 }, { "epoch": 0.2359058823529412, "grad_norm": 0.6478169545941831, "learning_rate": 4.118806950927724e-06, "loss": 0.030966868996620177, "step": 25065 }, { "epoch": 0.2359529411764706, "grad_norm": 0.47299714732596276, "learning_rate": 4.118396183411576e-06, "loss": 0.0266133189201355, "step": 25070 }, { "epoch": 0.236, "grad_norm": 0.929270435557989, "learning_rate": 4.117985538768123e-06, "loss": 0.022278732061386107, "step": 25075 }, { "epoch": 0.23604705882352942, "grad_norm": 1.1582307382722252, "learning_rate": 4.117575016936121e-06, "loss": 0.025588130950927733, "step": 25080 }, { "epoch": 0.23609411764705882, "grad_norm": 0.5667099348914783, "learning_rate": 4.117164617854366e-06, "loss": 0.02604677677154541, "step": 25085 }, { "epoch": 0.23614117647058824, "grad_norm": 0.5183782552980589, "learning_rate": 4.116754341461696e-06, "loss": 0.025956499576568603, "step": 25090 }, { "epoch": 0.23618823529411764, "grad_norm": 0.531755945153958, "learning_rate": 4.116344187696996e-06, "loss": 0.026971834897994994, "step": 25095 }, { "epoch": 0.23623529411764707, "grad_norm": 0.4832309482619941, "learning_rate": 4.115934156499188e-06, "loss": 0.029850953817367555, "step": 25100 }, { "epoch": 0.23628235294117647, "grad_norm": 0.7579794725102147, "learning_rate": 4.115524247807241e-06, "loss": 0.029000449180603027, "step": 25105 }, { "epoch": 0.23632941176470587, "grad_norm": 0.6092644700919178, "learning_rate": 4.115114461560165e-06, "loss": 0.024370522797107698, "step": 25110 }, { "epoch": 0.2363764705882353, "grad_norm": 0.5524426394645853, "learning_rate": 4.1147047976970125e-06, "loss": 0.028452616930007935, "step": 25115 }, { "epoch": 0.2364235294117647, "grad_norm": 0.6488774658213494, "learning_rate": 4.114295256156878e-06, "loss": 0.023757827281951905, "step": 25120 }, { "epoch": 0.23647058823529413, "grad_norm": 0.5931897247550934, "learning_rate": 4.113885836878899e-06, "loss": 0.024524521827697755, "step": 25125 }, { "epoch": 0.23651764705882353, "grad_norm": 0.5093963747464374, "learning_rate": 4.113476539802254e-06, "loss": 0.02165760099887848, "step": 25130 }, { "epoch": 0.23656470588235295, "grad_norm": 0.5881608758487006, "learning_rate": 4.113067364866168e-06, "loss": 0.017591132223606108, "step": 25135 }, { "epoch": 0.23661176470588235, "grad_norm": 0.71044566546813, "learning_rate": 4.112658312009901e-06, "loss": 0.028556865453720093, "step": 25140 }, { "epoch": 0.23665882352941175, "grad_norm": 0.5911908530824349, "learning_rate": 4.112249381172764e-06, "loss": 0.025669419765472413, "step": 25145 }, { "epoch": 0.23670588235294118, "grad_norm": 0.5846537896876021, "learning_rate": 4.111840572294103e-06, "loss": 0.025005775690078735, "step": 25150 }, { "epoch": 0.23675294117647058, "grad_norm": 0.9049947555167135, "learning_rate": 4.11143188531331e-06, "loss": 0.030104199051856996, "step": 25155 }, { "epoch": 0.2368, "grad_norm": 0.5761069668247136, "learning_rate": 4.11102332016982e-06, "loss": 0.016866233944892884, "step": 25160 }, { "epoch": 0.2368470588235294, "grad_norm": 2.2070736906524147, "learning_rate": 4.110614876803106e-06, "loss": 0.024487346410751343, "step": 25165 }, { "epoch": 0.2368941176470588, "grad_norm": 0.5014262936858462, "learning_rate": 4.1102065551526855e-06, "loss": 0.023547524213790895, "step": 25170 }, { "epoch": 0.23694117647058824, "grad_norm": 0.4791972546654832, "learning_rate": 4.109798355158119e-06, "loss": 0.026012492179870606, "step": 25175 }, { "epoch": 0.23698823529411764, "grad_norm": 0.5256311921519815, "learning_rate": 4.109390276759008e-06, "loss": 0.023679202795028685, "step": 25180 }, { "epoch": 0.23703529411764707, "grad_norm": 0.5680440340037722, "learning_rate": 4.108982319894995e-06, "loss": 0.02751149535179138, "step": 25185 }, { "epoch": 0.23708235294117647, "grad_norm": 0.7315528087131304, "learning_rate": 4.108574484505767e-06, "loss": 0.030776900053024293, "step": 25190 }, { "epoch": 0.2371294117647059, "grad_norm": 0.7608315777115748, "learning_rate": 4.108166770531049e-06, "loss": 0.030325084924697876, "step": 25195 }, { "epoch": 0.2371764705882353, "grad_norm": 0.5879581357929825, "learning_rate": 4.107759177910613e-06, "loss": 0.026279351115226744, "step": 25200 }, { "epoch": 0.2372235294117647, "grad_norm": 0.5721692494364955, "learning_rate": 4.107351706584269e-06, "loss": 0.024252864718437194, "step": 25205 }, { "epoch": 0.23727058823529412, "grad_norm": 0.6483300983066465, "learning_rate": 4.106944356491868e-06, "loss": 0.025145381689071655, "step": 25210 }, { "epoch": 0.23731764705882352, "grad_norm": 0.4802051902423804, "learning_rate": 4.106537127573307e-06, "loss": 0.025715452432632447, "step": 25215 }, { "epoch": 0.23736470588235295, "grad_norm": 0.583102266480072, "learning_rate": 4.106130019768519e-06, "loss": 0.025076478719711304, "step": 25220 }, { "epoch": 0.23741176470588235, "grad_norm": 0.5617968104033303, "learning_rate": 4.105723033017486e-06, "loss": 0.021387892961502075, "step": 25225 }, { "epoch": 0.23745882352941178, "grad_norm": 0.6500933009557233, "learning_rate": 4.105316167260225e-06, "loss": 0.020470039546489717, "step": 25230 }, { "epoch": 0.23750588235294118, "grad_norm": 0.8576171217195948, "learning_rate": 4.104909422436797e-06, "loss": 0.02441759705543518, "step": 25235 }, { "epoch": 0.23755294117647058, "grad_norm": 0.6026356566066795, "learning_rate": 4.104502798487306e-06, "loss": 0.023105664551258086, "step": 25240 }, { "epoch": 0.2376, "grad_norm": 0.5517731765801308, "learning_rate": 4.104096295351896e-06, "loss": 0.026140573620796203, "step": 25245 }, { "epoch": 0.2376470588235294, "grad_norm": 0.8331931223907385, "learning_rate": 4.103689912970752e-06, "loss": 0.028162434697151184, "step": 25250 }, { "epoch": 0.23769411764705883, "grad_norm": 0.5082809572061636, "learning_rate": 4.103283651284101e-06, "loss": 0.03260146081447601, "step": 25255 }, { "epoch": 0.23774117647058823, "grad_norm": 0.767277018260388, "learning_rate": 4.1028775102322125e-06, "loss": 0.028956374526023863, "step": 25260 }, { "epoch": 0.23778823529411763, "grad_norm": 0.5492447360802215, "learning_rate": 4.102471489755396e-06, "loss": 0.026907461881637573, "step": 25265 }, { "epoch": 0.23783529411764706, "grad_norm": 0.5746242918875615, "learning_rate": 4.102065589794004e-06, "loss": 0.026768845319747925, "step": 25270 }, { "epoch": 0.23788235294117646, "grad_norm": 0.5675247083307491, "learning_rate": 4.101659810288426e-06, "loss": 0.02884204387664795, "step": 25275 }, { "epoch": 0.2379294117647059, "grad_norm": 0.6967554978401025, "learning_rate": 4.101254151179099e-06, "loss": 0.025274556875228883, "step": 25280 }, { "epoch": 0.2379764705882353, "grad_norm": 0.5819948376505337, "learning_rate": 4.1008486124064965e-06, "loss": 0.0265483021736145, "step": 25285 }, { "epoch": 0.23802352941176472, "grad_norm": 0.6631819186995769, "learning_rate": 4.100443193911136e-06, "loss": 0.023447084426879882, "step": 25290 }, { "epoch": 0.23807058823529412, "grad_norm": 0.4579039928801868, "learning_rate": 4.1000378956335735e-06, "loss": 0.02436549812555313, "step": 25295 }, { "epoch": 0.23811764705882352, "grad_norm": 0.6050409074642067, "learning_rate": 4.099632717514409e-06, "loss": 0.027758777141571045, "step": 25300 }, { "epoch": 0.23816470588235295, "grad_norm": 0.6354210288985553, "learning_rate": 4.0992276594942794e-06, "loss": 0.028925663232803343, "step": 25305 }, { "epoch": 0.23821176470588235, "grad_norm": 0.7140184449668036, "learning_rate": 4.098822721513869e-06, "loss": 0.024649748206138612, "step": 25310 }, { "epoch": 0.23825882352941177, "grad_norm": 0.700631037746269, "learning_rate": 4.098417903513897e-06, "loss": 0.02162129580974579, "step": 25315 }, { "epoch": 0.23830588235294117, "grad_norm": 0.5906857445597293, "learning_rate": 4.098013205435128e-06, "loss": 0.028186625242233275, "step": 25320 }, { "epoch": 0.2383529411764706, "grad_norm": 0.640484110366604, "learning_rate": 4.097608627218364e-06, "loss": 0.02751270532608032, "step": 25325 }, { "epoch": 0.2384, "grad_norm": 0.6881000478045717, "learning_rate": 4.09720416880445e-06, "loss": 0.02078566253185272, "step": 25330 }, { "epoch": 0.2384470588235294, "grad_norm": 0.46257964352086317, "learning_rate": 4.096799830134272e-06, "loss": 0.023914039134979248, "step": 25335 }, { "epoch": 0.23849411764705883, "grad_norm": 0.6457772129297934, "learning_rate": 4.0963956111487565e-06, "loss": 0.020146554708480834, "step": 25340 }, { "epoch": 0.23854117647058823, "grad_norm": 0.5724417619523162, "learning_rate": 4.095991511788869e-06, "loss": 0.02594858407974243, "step": 25345 }, { "epoch": 0.23858823529411766, "grad_norm": 0.5260865225298496, "learning_rate": 4.095587531995618e-06, "loss": 0.029037350416183473, "step": 25350 }, { "epoch": 0.23863529411764706, "grad_norm": 0.5580694064199448, "learning_rate": 4.095183671710052e-06, "loss": 0.0285150945186615, "step": 25355 }, { "epoch": 0.23868235294117648, "grad_norm": 0.5524400226615122, "learning_rate": 4.094779930873261e-06, "loss": 0.025634557008743286, "step": 25360 }, { "epoch": 0.23872941176470588, "grad_norm": 0.5178865711050256, "learning_rate": 4.094376309426373e-06, "loss": 0.027180427312850954, "step": 25365 }, { "epoch": 0.23877647058823528, "grad_norm": 0.5941080463470773, "learning_rate": 4.09397280731056e-06, "loss": 0.022555869817733765, "step": 25370 }, { "epoch": 0.2388235294117647, "grad_norm": 0.6375417761220082, "learning_rate": 4.093569424467033e-06, "loss": 0.031426945328712465, "step": 25375 }, { "epoch": 0.2388705882352941, "grad_norm": 0.576892701391912, "learning_rate": 4.0931661608370415e-06, "loss": 0.024429117143154145, "step": 25380 }, { "epoch": 0.23891764705882354, "grad_norm": 0.866881305975488, "learning_rate": 4.09276301636188e-06, "loss": 0.03178984522819519, "step": 25385 }, { "epoch": 0.23896470588235294, "grad_norm": 0.824229824988087, "learning_rate": 4.09235999098288e-06, "loss": 0.028540879487991333, "step": 25390 }, { "epoch": 0.23901176470588234, "grad_norm": 0.5456637483061737, "learning_rate": 4.0919570846414146e-06, "loss": 0.023005867004394533, "step": 25395 }, { "epoch": 0.23905882352941177, "grad_norm": 0.5605042334571875, "learning_rate": 4.091554297278897e-06, "loss": 0.02648838758468628, "step": 25400 }, { "epoch": 0.23910588235294117, "grad_norm": 0.48622707681068905, "learning_rate": 4.091151628836781e-06, "loss": 0.029319518804550172, "step": 25405 }, { "epoch": 0.2391529411764706, "grad_norm": 0.6196928744516312, "learning_rate": 4.09074907925656e-06, "loss": 0.023272402584552765, "step": 25410 }, { "epoch": 0.2392, "grad_norm": 0.6065892568062556, "learning_rate": 4.090346648479769e-06, "loss": 0.025186291337013243, "step": 25415 }, { "epoch": 0.23924705882352942, "grad_norm": 0.529247649590308, "learning_rate": 4.089944336447983e-06, "loss": 0.022965109348297118, "step": 25420 }, { "epoch": 0.23929411764705882, "grad_norm": 0.7823152226511375, "learning_rate": 4.089542143102817e-06, "loss": 0.026199352741241456, "step": 25425 }, { "epoch": 0.23934117647058822, "grad_norm": 0.5680619784253883, "learning_rate": 4.089140068385923e-06, "loss": 0.023478461802005766, "step": 25430 }, { "epoch": 0.23938823529411765, "grad_norm": 0.660249947635512, "learning_rate": 4.088738112239001e-06, "loss": 0.024294975399971008, "step": 25435 }, { "epoch": 0.23943529411764705, "grad_norm": 0.4732863590128251, "learning_rate": 4.0883362746037816e-06, "loss": 0.02873438596725464, "step": 25440 }, { "epoch": 0.23948235294117648, "grad_norm": 0.6453704562289017, "learning_rate": 4.087934555422043e-06, "loss": 0.026720651984214784, "step": 25445 }, { "epoch": 0.23952941176470588, "grad_norm": 0.761418579751205, "learning_rate": 4.087532954635599e-06, "loss": 0.02229492664337158, "step": 25450 }, { "epoch": 0.2395764705882353, "grad_norm": 0.5385186392401528, "learning_rate": 4.087131472186307e-06, "loss": 0.025667715072631835, "step": 25455 }, { "epoch": 0.2396235294117647, "grad_norm": 0.46507988319872157, "learning_rate": 4.0867301080160595e-06, "loss": 0.022328975796699523, "step": 25460 }, { "epoch": 0.2396705882352941, "grad_norm": 0.8203340696718557, "learning_rate": 4.0863288620667945e-06, "loss": 0.02328529804944992, "step": 25465 }, { "epoch": 0.23971764705882354, "grad_norm": 0.7581220379635203, "learning_rate": 4.085927734280486e-06, "loss": 0.02620513439178467, "step": 25470 }, { "epoch": 0.23976470588235294, "grad_norm": 0.6708239612285726, "learning_rate": 4.085526724599147e-06, "loss": 0.02258276641368866, "step": 25475 }, { "epoch": 0.23981176470588236, "grad_norm": 0.5725138111348775, "learning_rate": 4.0851258329648355e-06, "loss": 0.02450050264596939, "step": 25480 }, { "epoch": 0.23985882352941176, "grad_norm": 0.6418821457376922, "learning_rate": 4.084725059319644e-06, "loss": 0.02403205633163452, "step": 25485 }, { "epoch": 0.23990588235294116, "grad_norm": 0.7464679541074356, "learning_rate": 4.084324403605709e-06, "loss": 0.022059881687164308, "step": 25490 }, { "epoch": 0.2399529411764706, "grad_norm": 0.9644192063074698, "learning_rate": 4.083923865765203e-06, "loss": 0.027488458156585693, "step": 25495 }, { "epoch": 0.24, "grad_norm": 0.6129204351304847, "learning_rate": 4.083523445740339e-06, "loss": 0.023879382014274596, "step": 25500 }, { "epoch": 0.24004705882352942, "grad_norm": 0.5372153740300715, "learning_rate": 4.083123143473373e-06, "loss": 0.025296121835708618, "step": 25505 }, { "epoch": 0.24009411764705882, "grad_norm": 0.7307743492202017, "learning_rate": 4.082722958906597e-06, "loss": 0.02328825891017914, "step": 25510 }, { "epoch": 0.24014117647058825, "grad_norm": 1.3019134691026117, "learning_rate": 4.082322891982343e-06, "loss": 0.022960750758647917, "step": 25515 }, { "epoch": 0.24018823529411765, "grad_norm": 0.456967535926286, "learning_rate": 4.081922942642983e-06, "loss": 0.02749042510986328, "step": 25520 }, { "epoch": 0.24023529411764705, "grad_norm": 0.5614321164720044, "learning_rate": 4.081523110830929e-06, "loss": 0.023134355247020722, "step": 25525 }, { "epoch": 0.24028235294117647, "grad_norm": 0.47198788210415793, "learning_rate": 4.081123396488633e-06, "loss": 0.022589094936847687, "step": 25530 }, { "epoch": 0.24032941176470587, "grad_norm": 0.5068449426706599, "learning_rate": 4.080723799558585e-06, "loss": 0.028070336580276488, "step": 25535 }, { "epoch": 0.2403764705882353, "grad_norm": 0.7901749659419814, "learning_rate": 4.080324319983316e-06, "loss": 0.022529785335063935, "step": 25540 }, { "epoch": 0.2404235294117647, "grad_norm": 0.6335221808281802, "learning_rate": 4.079924957705395e-06, "loss": 0.027312397956848145, "step": 25545 }, { "epoch": 0.24047058823529413, "grad_norm": 1.1450175093663746, "learning_rate": 4.079525712667429e-06, "loss": 0.024341103434562684, "step": 25550 }, { "epoch": 0.24051764705882353, "grad_norm": 0.5596415995029232, "learning_rate": 4.079126584812068e-06, "loss": 0.025130254030227662, "step": 25555 }, { "epoch": 0.24056470588235293, "grad_norm": 0.5994560964124265, "learning_rate": 4.078727574081998e-06, "loss": 0.024064673483371733, "step": 25560 }, { "epoch": 0.24061176470588236, "grad_norm": 0.536817984555895, "learning_rate": 4.078328680419948e-06, "loss": 0.024126218259334566, "step": 25565 }, { "epoch": 0.24065882352941176, "grad_norm": 0.4836477719434433, "learning_rate": 4.077929903768682e-06, "loss": 0.021713507175445557, "step": 25570 }, { "epoch": 0.2407058823529412, "grad_norm": 0.810288928856785, "learning_rate": 4.077531244071003e-06, "loss": 0.029057884216308595, "step": 25575 }, { "epoch": 0.2407529411764706, "grad_norm": 0.5363668736917966, "learning_rate": 4.07713270126976e-06, "loss": 0.024073125422000886, "step": 25580 }, { "epoch": 0.2408, "grad_norm": 0.7144438043269381, "learning_rate": 4.0767342753078315e-06, "loss": 0.019832658767700195, "step": 25585 }, { "epoch": 0.24084705882352941, "grad_norm": 0.626369498022168, "learning_rate": 4.076335966128143e-06, "loss": 0.025184738636016845, "step": 25590 }, { "epoch": 0.24089411764705881, "grad_norm": 0.46304991669844237, "learning_rate": 4.075937773673653e-06, "loss": 0.023510533571243285, "step": 25595 }, { "epoch": 0.24094117647058824, "grad_norm": 0.5535123121501607, "learning_rate": 4.075539697887363e-06, "loss": 0.021195217967033386, "step": 25600 }, { "epoch": 0.24098823529411764, "grad_norm": 0.5259625101110236, "learning_rate": 4.075141738712315e-06, "loss": 0.024406275153160094, "step": 25605 }, { "epoch": 0.24103529411764707, "grad_norm": 0.5107358436371878, "learning_rate": 4.074743896091582e-06, "loss": 0.022334560751914978, "step": 25610 }, { "epoch": 0.24108235294117647, "grad_norm": 0.604455944445284, "learning_rate": 4.074346169968284e-06, "loss": 0.026492944359779357, "step": 25615 }, { "epoch": 0.24112941176470587, "grad_norm": 0.7539249457673098, "learning_rate": 4.073948560285577e-06, "loss": 0.03252094686031341, "step": 25620 }, { "epoch": 0.2411764705882353, "grad_norm": 0.5645803516597531, "learning_rate": 4.073551066986655e-06, "loss": 0.025972554087638856, "step": 25625 }, { "epoch": 0.2412235294117647, "grad_norm": 0.4378972250535804, "learning_rate": 4.073153690014752e-06, "loss": 0.024912935495376588, "step": 25630 }, { "epoch": 0.24127058823529413, "grad_norm": 0.5818149952607109, "learning_rate": 4.072756429313139e-06, "loss": 0.020309360325336458, "step": 25635 }, { "epoch": 0.24131764705882353, "grad_norm": 0.7604469471557058, "learning_rate": 4.072359284825129e-06, "loss": 0.02238088846206665, "step": 25640 }, { "epoch": 0.24136470588235295, "grad_norm": 0.55458100608157, "learning_rate": 4.07196225649407e-06, "loss": 0.02216838449239731, "step": 25645 }, { "epoch": 0.24141176470588235, "grad_norm": 0.5194292621456083, "learning_rate": 4.071565344263352e-06, "loss": 0.026326239109039307, "step": 25650 }, { "epoch": 0.24145882352941175, "grad_norm": 0.5722711516644976, "learning_rate": 4.0711685480764e-06, "loss": 0.019423043727874754, "step": 25655 }, { "epoch": 0.24150588235294118, "grad_norm": 0.5224712377121775, "learning_rate": 4.07077186787668e-06, "loss": 0.025875034928321838, "step": 25660 }, { "epoch": 0.24155294117647058, "grad_norm": 0.6186356934568693, "learning_rate": 4.070375303607697e-06, "loss": 0.024074923992156983, "step": 25665 }, { "epoch": 0.2416, "grad_norm": 0.8180718224123021, "learning_rate": 4.069978855212993e-06, "loss": 0.0264695405960083, "step": 25670 }, { "epoch": 0.2416470588235294, "grad_norm": 0.8646212984235407, "learning_rate": 4.06958252263615e-06, "loss": 0.029555052518844604, "step": 25675 }, { "epoch": 0.24169411764705884, "grad_norm": 0.7604516588372512, "learning_rate": 4.069186305820787e-06, "loss": 0.021192431449890137, "step": 25680 }, { "epoch": 0.24174117647058824, "grad_norm": 0.4614210766168413, "learning_rate": 4.068790204710561e-06, "loss": 0.024307247996330262, "step": 25685 }, { "epoch": 0.24178823529411764, "grad_norm": 0.5466893228767964, "learning_rate": 4.068394219249169e-06, "loss": 0.024235296249389648, "step": 25690 }, { "epoch": 0.24183529411764706, "grad_norm": 0.5806940620584619, "learning_rate": 4.067998349380346e-06, "loss": 0.021607330441474913, "step": 25695 }, { "epoch": 0.24188235294117647, "grad_norm": 0.6132025635683986, "learning_rate": 4.067602595047866e-06, "loss": 0.024969351291656495, "step": 25700 }, { "epoch": 0.2419294117647059, "grad_norm": 0.5907291719887753, "learning_rate": 4.067206956195538e-06, "loss": 0.022430330514907837, "step": 25705 }, { "epoch": 0.2419764705882353, "grad_norm": 0.6689927242252206, "learning_rate": 4.066811432767212e-06, "loss": 0.03114815056324005, "step": 25710 }, { "epoch": 0.2420235294117647, "grad_norm": 1.2317393387696807, "learning_rate": 4.066416024706777e-06, "loss": 0.02311876565217972, "step": 25715 }, { "epoch": 0.24207058823529412, "grad_norm": 0.6494780102144746, "learning_rate": 4.066020731958158e-06, "loss": 0.02484567165374756, "step": 25720 }, { "epoch": 0.24211764705882352, "grad_norm": 0.7204929189013795, "learning_rate": 4.065625554465321e-06, "loss": 0.021569234132766724, "step": 25725 }, { "epoch": 0.24216470588235295, "grad_norm": 0.6706166805350555, "learning_rate": 4.065230492172265e-06, "loss": 0.022964322566986085, "step": 25730 }, { "epoch": 0.24221176470588235, "grad_norm": 0.4533581670983182, "learning_rate": 4.064835545023033e-06, "loss": 0.02319491356611252, "step": 25735 }, { "epoch": 0.24225882352941178, "grad_norm": 0.5687663844756269, "learning_rate": 4.064440712961702e-06, "loss": 0.02820526957511902, "step": 25740 }, { "epoch": 0.24230588235294118, "grad_norm": 0.4788923270020646, "learning_rate": 4.064045995932387e-06, "loss": 0.02257761061191559, "step": 25745 }, { "epoch": 0.24235294117647058, "grad_norm": 0.7745570503851058, "learning_rate": 4.063651393879246e-06, "loss": 0.026941072940826417, "step": 25750 }, { "epoch": 0.2424, "grad_norm": 0.5097787792683449, "learning_rate": 4.063256906746469e-06, "loss": 0.02269260287284851, "step": 25755 }, { "epoch": 0.2424470588235294, "grad_norm": 0.6971997129795863, "learning_rate": 4.062862534478286e-06, "loss": 0.024486124515533447, "step": 25760 }, { "epoch": 0.24249411764705883, "grad_norm": 0.5899783642220526, "learning_rate": 4.062468277018965e-06, "loss": 0.022698023915290834, "step": 25765 }, { "epoch": 0.24254117647058823, "grad_norm": 0.6103659962301581, "learning_rate": 4.062074134312813e-06, "loss": 0.028244489431381227, "step": 25770 }, { "epoch": 0.24258823529411766, "grad_norm": 0.5262385125453923, "learning_rate": 4.061680106304173e-06, "loss": 0.0253377765417099, "step": 25775 }, { "epoch": 0.24263529411764706, "grad_norm": 0.4219780126863813, "learning_rate": 4.061286192937428e-06, "loss": 0.024845470488071442, "step": 25780 }, { "epoch": 0.24268235294117646, "grad_norm": 0.7218148946154533, "learning_rate": 4.060892394156995e-06, "loss": 0.030587461590766907, "step": 25785 }, { "epoch": 0.2427294117647059, "grad_norm": 0.4668668862326869, "learning_rate": 4.060498709907332e-06, "loss": 0.021849888563156127, "step": 25790 }, { "epoch": 0.2427764705882353, "grad_norm": 0.40502456001637527, "learning_rate": 4.060105140132932e-06, "loss": 0.019021311402320863, "step": 25795 }, { "epoch": 0.24282352941176472, "grad_norm": 0.7583868102135577, "learning_rate": 4.059711684778332e-06, "loss": 0.02200571894645691, "step": 25800 }, { "epoch": 0.24287058823529412, "grad_norm": 0.6022275778545135, "learning_rate": 4.059318343788098e-06, "loss": 0.02483263611793518, "step": 25805 }, { "epoch": 0.24291764705882352, "grad_norm": 0.7847674017475975, "learning_rate": 4.058925117106837e-06, "loss": 0.02719500958919525, "step": 25810 }, { "epoch": 0.24296470588235294, "grad_norm": 0.5445834305439649, "learning_rate": 4.058532004679197e-06, "loss": 0.023122428357601164, "step": 25815 }, { "epoch": 0.24301176470588234, "grad_norm": 0.5214416280927632, "learning_rate": 4.05813900644986e-06, "loss": 0.02306448668241501, "step": 25820 }, { "epoch": 0.24305882352941177, "grad_norm": 0.7474891679263557, "learning_rate": 4.057746122363544e-06, "loss": 0.01947462260723114, "step": 25825 }, { "epoch": 0.24310588235294117, "grad_norm": 0.6682650648951887, "learning_rate": 4.057353352365009e-06, "loss": 0.023893764615058898, "step": 25830 }, { "epoch": 0.2431529411764706, "grad_norm": 0.46293466869862454, "learning_rate": 4.056960696399049e-06, "loss": 0.017268508672714233, "step": 25835 }, { "epoch": 0.2432, "grad_norm": 1.0341651319299168, "learning_rate": 4.0565681544104965e-06, "loss": 0.023402637243270873, "step": 25840 }, { "epoch": 0.2432470588235294, "grad_norm": 0.7066609316151841, "learning_rate": 4.056175726344219e-06, "loss": 0.023303064703941345, "step": 25845 }, { "epoch": 0.24329411764705883, "grad_norm": 1.0762572831149935, "learning_rate": 4.055783412145129e-06, "loss": 0.027866297960281373, "step": 25850 }, { "epoch": 0.24334117647058823, "grad_norm": 0.5586896676194151, "learning_rate": 4.0553912117581645e-06, "loss": 0.025491827726364137, "step": 25855 }, { "epoch": 0.24338823529411766, "grad_norm": 0.7013057585608516, "learning_rate": 4.054999125128311e-06, "loss": 0.023250025510787965, "step": 25860 }, { "epoch": 0.24343529411764706, "grad_norm": 0.6568546834293996, "learning_rate": 4.054607152200587e-06, "loss": 0.02117617130279541, "step": 25865 }, { "epoch": 0.24348235294117648, "grad_norm": 0.4726759451551447, "learning_rate": 4.054215292920047e-06, "loss": 0.028543010354042053, "step": 25870 }, { "epoch": 0.24352941176470588, "grad_norm": 0.6572620574222968, "learning_rate": 4.053823547231786e-06, "loss": 0.02373068779706955, "step": 25875 }, { "epoch": 0.24357647058823528, "grad_norm": 0.5824897804252059, "learning_rate": 4.053431915080934e-06, "loss": 0.025247544050216675, "step": 25880 }, { "epoch": 0.2436235294117647, "grad_norm": 0.6671274431624872, "learning_rate": 4.053040396412657e-06, "loss": 0.023999598622322083, "step": 25885 }, { "epoch": 0.2436705882352941, "grad_norm": 0.6477236129768434, "learning_rate": 4.0526489911721615e-06, "loss": 0.02605649530887604, "step": 25890 }, { "epoch": 0.24371764705882354, "grad_norm": 0.5729071203922811, "learning_rate": 4.052257699304686e-06, "loss": 0.023783734440803526, "step": 25895 }, { "epoch": 0.24376470588235294, "grad_norm": 0.7649391884918948, "learning_rate": 4.051866520755513e-06, "loss": 0.0255304753780365, "step": 25900 }, { "epoch": 0.24381176470588237, "grad_norm": 0.474488616168866, "learning_rate": 4.0514754554699556e-06, "loss": 0.02661007046699524, "step": 25905 }, { "epoch": 0.24385882352941177, "grad_norm": 0.5090234454617901, "learning_rate": 4.051084503393366e-06, "loss": 0.023724883794784546, "step": 25910 }, { "epoch": 0.24390588235294117, "grad_norm": 0.4025929747290348, "learning_rate": 4.050693664471134e-06, "loss": 0.021658888459205626, "step": 25915 }, { "epoch": 0.2439529411764706, "grad_norm": 0.5308278061675952, "learning_rate": 4.050302938648688e-06, "loss": 0.025795954465866088, "step": 25920 }, { "epoch": 0.244, "grad_norm": 0.5490827646476449, "learning_rate": 4.049912325871487e-06, "loss": 0.024021585285663605, "step": 25925 }, { "epoch": 0.24404705882352942, "grad_norm": 0.5332958491765217, "learning_rate": 4.049521826085034e-06, "loss": 0.023125389218330385, "step": 25930 }, { "epoch": 0.24409411764705882, "grad_norm": 0.44097906320523916, "learning_rate": 4.049131439234865e-06, "loss": 0.023894870281219484, "step": 25935 }, { "epoch": 0.24414117647058822, "grad_norm": 0.6849317238281791, "learning_rate": 4.048741165266551e-06, "loss": 0.024328178167343138, "step": 25940 }, { "epoch": 0.24418823529411765, "grad_norm": 0.544118600829765, "learning_rate": 4.048351004125706e-06, "loss": 0.02594665586948395, "step": 25945 }, { "epoch": 0.24423529411764705, "grad_norm": 0.6468672042484915, "learning_rate": 4.047960955757975e-06, "loss": 0.025324735045433044, "step": 25950 }, { "epoch": 0.24428235294117648, "grad_norm": 0.701786388097004, "learning_rate": 4.047571020109041e-06, "loss": 0.024425725638866424, "step": 25955 }, { "epoch": 0.24432941176470588, "grad_norm": 0.5993735227238113, "learning_rate": 4.047181197124624e-06, "loss": 0.024344220757484436, "step": 25960 }, { "epoch": 0.2443764705882353, "grad_norm": 0.4070922575004919, "learning_rate": 4.046791486750482e-06, "loss": 0.026008379459381104, "step": 25965 }, { "epoch": 0.2444235294117647, "grad_norm": 0.5035384179343163, "learning_rate": 4.046401888932407e-06, "loss": 0.02564786672592163, "step": 25970 }, { "epoch": 0.2444705882352941, "grad_norm": 0.501695984929813, "learning_rate": 4.046012403616229e-06, "loss": 0.02225574254989624, "step": 25975 }, { "epoch": 0.24451764705882353, "grad_norm": 0.4609299143778876, "learning_rate": 4.045623030747815e-06, "loss": 0.024229031801223756, "step": 25980 }, { "epoch": 0.24456470588235293, "grad_norm": 0.8885024464675587, "learning_rate": 4.045233770273066e-06, "loss": 0.030340990424156188, "step": 25985 }, { "epoch": 0.24461176470588236, "grad_norm": 0.541601248351493, "learning_rate": 4.044844622137921e-06, "loss": 0.023756642639636994, "step": 25990 }, { "epoch": 0.24465882352941176, "grad_norm": 0.5455747410149643, "learning_rate": 4.044455586288358e-06, "loss": 0.018584933876991273, "step": 25995 }, { "epoch": 0.2447058823529412, "grad_norm": 0.6582629907027027, "learning_rate": 4.044066662670387e-06, "loss": 0.02718210220336914, "step": 26000 }, { "epoch": 0.2447529411764706, "grad_norm": 0.8002414806549049, "learning_rate": 4.043677851230058e-06, "loss": 0.02831684350967407, "step": 26005 }, { "epoch": 0.2448, "grad_norm": 0.5754631895248475, "learning_rate": 4.043289151913453e-06, "loss": 0.027047562599182128, "step": 26010 }, { "epoch": 0.24484705882352942, "grad_norm": 0.5635590745156629, "learning_rate": 4.042900564666694e-06, "loss": 0.0240051731467247, "step": 26015 }, { "epoch": 0.24489411764705882, "grad_norm": 0.6141441772451941, "learning_rate": 4.042512089435939e-06, "loss": 0.023718833923339844, "step": 26020 }, { "epoch": 0.24494117647058825, "grad_norm": 0.5432702989549901, "learning_rate": 4.042123726167379e-06, "loss": 0.02179258465766907, "step": 26025 }, { "epoch": 0.24498823529411765, "grad_norm": 0.6138820096393233, "learning_rate": 4.041735474807245e-06, "loss": 0.024870608747005463, "step": 26030 }, { "epoch": 0.24503529411764705, "grad_norm": 0.7081803287372054, "learning_rate": 4.041347335301802e-06, "loss": 0.022360247373580933, "step": 26035 }, { "epoch": 0.24508235294117647, "grad_norm": 0.53839574234379, "learning_rate": 4.040959307597353e-06, "loss": 0.02233070880174637, "step": 26040 }, { "epoch": 0.24512941176470587, "grad_norm": 0.6136299761396387, "learning_rate": 4.040571391640232e-06, "loss": 0.02660120129585266, "step": 26045 }, { "epoch": 0.2451764705882353, "grad_norm": 0.6627279507697472, "learning_rate": 4.040183587376818e-06, "loss": 0.02247231900691986, "step": 26050 }, { "epoch": 0.2452235294117647, "grad_norm": 0.4184010538256384, "learning_rate": 4.039795894753517e-06, "loss": 0.021064209938049316, "step": 26055 }, { "epoch": 0.24527058823529413, "grad_norm": 0.5322899937449899, "learning_rate": 4.039408313716776e-06, "loss": 0.02563582956790924, "step": 26060 }, { "epoch": 0.24531764705882353, "grad_norm": 0.45850966073274974, "learning_rate": 4.039020844213078e-06, "loss": 0.02312031090259552, "step": 26065 }, { "epoch": 0.24536470588235293, "grad_norm": 0.9061565060828366, "learning_rate": 4.0386334861889395e-06, "loss": 0.025102436542510986, "step": 26070 }, { "epoch": 0.24541176470588236, "grad_norm": 0.5017849226310986, "learning_rate": 4.0382462395909125e-06, "loss": 0.026193016767501832, "step": 26075 }, { "epoch": 0.24545882352941176, "grad_norm": 0.8711651277610465, "learning_rate": 4.037859104365589e-06, "loss": 0.03284930884838104, "step": 26080 }, { "epoch": 0.24550588235294118, "grad_norm": 0.49490705356451875, "learning_rate": 4.037472080459594e-06, "loss": 0.026544210314750672, "step": 26085 }, { "epoch": 0.24555294117647058, "grad_norm": 0.6396375900079491, "learning_rate": 4.037085167819587e-06, "loss": 0.024268317222595214, "step": 26090 }, { "epoch": 0.2456, "grad_norm": 0.6488680899949361, "learning_rate": 4.036698366392268e-06, "loss": 0.025929281115531923, "step": 26095 }, { "epoch": 0.2456470588235294, "grad_norm": 0.6115989364853617, "learning_rate": 4.036311676124366e-06, "loss": 0.024056124687194824, "step": 26100 }, { "epoch": 0.2456941176470588, "grad_norm": 0.5108173427253223, "learning_rate": 4.035925096962651e-06, "loss": 0.029411238431930543, "step": 26105 }, { "epoch": 0.24574117647058824, "grad_norm": 0.3904062832333412, "learning_rate": 4.0355386288539275e-06, "loss": 0.019916908442974092, "step": 26110 }, { "epoch": 0.24578823529411764, "grad_norm": 0.585114546514992, "learning_rate": 4.035152271745034e-06, "loss": 0.02462601512670517, "step": 26115 }, { "epoch": 0.24583529411764707, "grad_norm": 0.7872506412046039, "learning_rate": 4.0347660255828465e-06, "loss": 0.026745423674583435, "step": 26120 }, { "epoch": 0.24588235294117647, "grad_norm": 0.5652864205632265, "learning_rate": 4.034379890314276e-06, "loss": 0.025025364756584168, "step": 26125 }, { "epoch": 0.24592941176470587, "grad_norm": 0.7066189785788632, "learning_rate": 4.033993865886268e-06, "loss": 0.02662962079048157, "step": 26130 }, { "epoch": 0.2459764705882353, "grad_norm": 0.5287671466928159, "learning_rate": 4.033607952245806e-06, "loss": 0.022961658239364625, "step": 26135 }, { "epoch": 0.2460235294117647, "grad_norm": 0.61411441564897, "learning_rate": 4.033222149339906e-06, "loss": 0.025008782744407654, "step": 26140 }, { "epoch": 0.24607058823529412, "grad_norm": 0.5350635358861382, "learning_rate": 4.032836457115623e-06, "loss": 0.025225377082824706, "step": 26145 }, { "epoch": 0.24611764705882352, "grad_norm": 0.7434488588942098, "learning_rate": 4.032450875520042e-06, "loss": 0.033210089802742, "step": 26150 }, { "epoch": 0.24616470588235295, "grad_norm": 0.5958503202775455, "learning_rate": 4.032065404500289e-06, "loss": 0.021204379200935364, "step": 26155 }, { "epoch": 0.24621176470588235, "grad_norm": 0.3949973578300725, "learning_rate": 4.031680044003524e-06, "loss": 0.02284952104091644, "step": 26160 }, { "epoch": 0.24625882352941175, "grad_norm": 0.7341522297257261, "learning_rate": 4.031294793976939e-06, "loss": 0.020062097907066347, "step": 26165 }, { "epoch": 0.24630588235294118, "grad_norm": 0.47178938957156485, "learning_rate": 4.030909654367767e-06, "loss": 0.025370484590530394, "step": 26170 }, { "epoch": 0.24635294117647058, "grad_norm": 0.7057116098623373, "learning_rate": 4.03052462512327e-06, "loss": 0.022165557742118834, "step": 26175 }, { "epoch": 0.2464, "grad_norm": 0.641405313843615, "learning_rate": 4.03013970619075e-06, "loss": 0.027807962894439698, "step": 26180 }, { "epoch": 0.2464470588235294, "grad_norm": 0.4276469774252465, "learning_rate": 4.029754897517541e-06, "loss": 0.022615540027618408, "step": 26185 }, { "epoch": 0.24649411764705884, "grad_norm": 0.5503370615648677, "learning_rate": 4.029370199051016e-06, "loss": 0.02396128177642822, "step": 26190 }, { "epoch": 0.24654117647058824, "grad_norm": 0.6250055442957454, "learning_rate": 4.02898561073858e-06, "loss": 0.018095216155052184, "step": 26195 }, { "epoch": 0.24658823529411764, "grad_norm": 0.6381225533254192, "learning_rate": 4.028601132527673e-06, "loss": 0.025086233019828798, "step": 26200 }, { "epoch": 0.24663529411764706, "grad_norm": 0.7948619127542494, "learning_rate": 4.028216764365773e-06, "loss": 0.02049071192741394, "step": 26205 }, { "epoch": 0.24668235294117646, "grad_norm": 0.8735792476311365, "learning_rate": 4.027832506200388e-06, "loss": 0.025306889414787294, "step": 26210 }, { "epoch": 0.2467294117647059, "grad_norm": 0.5191662016349328, "learning_rate": 4.027448357979068e-06, "loss": 0.024785818159580232, "step": 26215 }, { "epoch": 0.2467764705882353, "grad_norm": 0.7966843094795774, "learning_rate": 4.027064319649394e-06, "loss": 0.023088255524635316, "step": 26220 }, { "epoch": 0.24682352941176472, "grad_norm": 0.5022950878859155, "learning_rate": 4.02668039115898e-06, "loss": 0.019586484134197234, "step": 26225 }, { "epoch": 0.24687058823529412, "grad_norm": 0.4817357810278771, "learning_rate": 4.026296572455479e-06, "loss": 0.02499206066131592, "step": 26230 }, { "epoch": 0.24691764705882352, "grad_norm": 0.6458482180680007, "learning_rate": 4.025912863486576e-06, "loss": 0.025514835119247438, "step": 26235 }, { "epoch": 0.24696470588235295, "grad_norm": 0.42993447934014173, "learning_rate": 4.025529264199991e-06, "loss": 0.023018106818199158, "step": 26240 }, { "epoch": 0.24701176470588235, "grad_norm": 0.4091117500763779, "learning_rate": 4.0251457745434835e-06, "loss": 0.025066468119621276, "step": 26245 }, { "epoch": 0.24705882352941178, "grad_norm": 0.5904863455170075, "learning_rate": 4.024762394464841e-06, "loss": 0.02041969299316406, "step": 26250 }, { "epoch": 0.24710588235294118, "grad_norm": 0.6564535181134016, "learning_rate": 4.02437912391189e-06, "loss": 0.019389277696609496, "step": 26255 }, { "epoch": 0.24715294117647058, "grad_norm": 0.7069436416383095, "learning_rate": 4.023995962832491e-06, "loss": 0.028924041986465455, "step": 26260 }, { "epoch": 0.2472, "grad_norm": 0.6906189124787329, "learning_rate": 4.0236129111745394e-06, "loss": 0.026024937629699707, "step": 26265 }, { "epoch": 0.2472470588235294, "grad_norm": 0.6580950056187447, "learning_rate": 4.023229968885965e-06, "loss": 0.025600710511207582, "step": 26270 }, { "epoch": 0.24729411764705883, "grad_norm": 0.6042085410362831, "learning_rate": 4.022847135914731e-06, "loss": 0.02486099749803543, "step": 26275 }, { "epoch": 0.24734117647058823, "grad_norm": 2.1134581432625534, "learning_rate": 4.022464412208838e-06, "loss": 0.025632905960083007, "step": 26280 }, { "epoch": 0.24738823529411766, "grad_norm": 0.966973941820357, "learning_rate": 4.022081797716318e-06, "loss": 0.036524531245231626, "step": 26285 }, { "epoch": 0.24743529411764706, "grad_norm": 0.6029016090698023, "learning_rate": 4.02169929238524e-06, "loss": 0.029410877823829652, "step": 26290 }, { "epoch": 0.24748235294117646, "grad_norm": 0.4260712720679642, "learning_rate": 4.021316896163709e-06, "loss": 0.021709275245666505, "step": 26295 }, { "epoch": 0.2475294117647059, "grad_norm": 0.841773471924875, "learning_rate": 4.020934608999859e-06, "loss": 0.02639077603816986, "step": 26300 }, { "epoch": 0.2475764705882353, "grad_norm": 0.5389864938455414, "learning_rate": 4.020552430841864e-06, "loss": 0.023977784812450408, "step": 26305 }, { "epoch": 0.24762352941176471, "grad_norm": 0.8087077895135791, "learning_rate": 4.02017036163793e-06, "loss": 0.02748657464981079, "step": 26310 }, { "epoch": 0.24767058823529411, "grad_norm": 0.4931004838513959, "learning_rate": 4.019788401336297e-06, "loss": 0.025768750905990602, "step": 26315 }, { "epoch": 0.24771764705882354, "grad_norm": 0.5082915073387526, "learning_rate": 4.0194065498852425e-06, "loss": 0.02923450469970703, "step": 26320 }, { "epoch": 0.24776470588235294, "grad_norm": 0.5975250666726343, "learning_rate": 4.019024807233074e-06, "loss": 0.027701351046562194, "step": 26325 }, { "epoch": 0.24781176470588234, "grad_norm": 0.6186319221886833, "learning_rate": 4.018643173328135e-06, "loss": 0.027322176098823547, "step": 26330 }, { "epoch": 0.24785882352941177, "grad_norm": 0.5763667556964226, "learning_rate": 4.0182616481188065e-06, "loss": 0.020329397916793824, "step": 26335 }, { "epoch": 0.24790588235294117, "grad_norm": 0.6097923903284208, "learning_rate": 4.017880231553499e-06, "loss": 0.021539613604545593, "step": 26340 }, { "epoch": 0.2479529411764706, "grad_norm": 0.623038283158167, "learning_rate": 4.017498923580659e-06, "loss": 0.024212834239006043, "step": 26345 }, { "epoch": 0.248, "grad_norm": 0.8915779134703011, "learning_rate": 4.017117724148771e-06, "loss": 0.02169518917798996, "step": 26350 }, { "epoch": 0.2480470588235294, "grad_norm": 1.0006833113851266, "learning_rate": 4.016736633206347e-06, "loss": 0.025876343250274658, "step": 26355 }, { "epoch": 0.24809411764705883, "grad_norm": 0.5900451379624351, "learning_rate": 4.016355650701937e-06, "loss": 0.018644466996192932, "step": 26360 }, { "epoch": 0.24814117647058823, "grad_norm": 0.7412008146527038, "learning_rate": 4.015974776584126e-06, "loss": 0.024790674448013306, "step": 26365 }, { "epoch": 0.24818823529411765, "grad_norm": 0.6602287847651903, "learning_rate": 4.0155940108015306e-06, "loss": 0.024969615042209625, "step": 26370 }, { "epoch": 0.24823529411764705, "grad_norm": 0.5735335501059885, "learning_rate": 4.015213353302803e-06, "loss": 0.02487715184688568, "step": 26375 }, { "epoch": 0.24828235294117648, "grad_norm": 1.1961595910956444, "learning_rate": 4.01483280403663e-06, "loss": 0.027109920978546143, "step": 26380 }, { "epoch": 0.24832941176470588, "grad_norm": 0.606123178728188, "learning_rate": 4.0144523629517305e-06, "loss": 0.023828183114528657, "step": 26385 }, { "epoch": 0.24837647058823528, "grad_norm": 0.7406310994134208, "learning_rate": 4.014072029996859e-06, "loss": 0.031819939613342285, "step": 26390 }, { "epoch": 0.2484235294117647, "grad_norm": 0.588521392860753, "learning_rate": 4.013691805120805e-06, "loss": 0.027884858846664428, "step": 26395 }, { "epoch": 0.2484705882352941, "grad_norm": 0.5509839987531577, "learning_rate": 4.013311688272387e-06, "loss": 0.026471030712127686, "step": 26400 }, { "epoch": 0.24851764705882354, "grad_norm": 0.6467438671667621, "learning_rate": 4.012931679400464e-06, "loss": 0.023592090606689452, "step": 26405 }, { "epoch": 0.24856470588235294, "grad_norm": 0.6714185865334069, "learning_rate": 4.012551778453924e-06, "loss": 0.03408459424972534, "step": 26410 }, { "epoch": 0.24861176470588237, "grad_norm": 0.45505386889697885, "learning_rate": 4.012171985381692e-06, "loss": 0.021514002978801728, "step": 26415 }, { "epoch": 0.24865882352941177, "grad_norm": 0.40799016322984366, "learning_rate": 4.011792300132725e-06, "loss": 0.022568851709365845, "step": 26420 }, { "epoch": 0.24870588235294117, "grad_norm": 0.6336758676622353, "learning_rate": 4.011412722656013e-06, "loss": 0.030024608969688414, "step": 26425 }, { "epoch": 0.2487529411764706, "grad_norm": 0.6662757175295724, "learning_rate": 4.011033252900583e-06, "loss": 0.025816270709037782, "step": 26430 }, { "epoch": 0.2488, "grad_norm": 0.7011696665657431, "learning_rate": 4.010653890815493e-06, "loss": 0.02663598656654358, "step": 26435 }, { "epoch": 0.24884705882352942, "grad_norm": 0.45714314399918576, "learning_rate": 4.010274636349834e-06, "loss": 0.023554301261901854, "step": 26440 }, { "epoch": 0.24889411764705882, "grad_norm": 0.4855171068422303, "learning_rate": 4.0098954894527344e-06, "loss": 0.026556140184402464, "step": 26445 }, { "epoch": 0.24894117647058825, "grad_norm": 0.5916452697229462, "learning_rate": 4.0095164500733535e-06, "loss": 0.025937706232070923, "step": 26450 }, { "epoch": 0.24898823529411765, "grad_norm": 0.5420887298873203, "learning_rate": 4.009137518160884e-06, "loss": 0.022563600540161134, "step": 26455 }, { "epoch": 0.24903529411764705, "grad_norm": 0.844201585656779, "learning_rate": 4.0087586936645546e-06, "loss": 0.028836607933044434, "step": 26460 }, { "epoch": 0.24908235294117648, "grad_norm": 0.7580875320401504, "learning_rate": 4.008379976533624e-06, "loss": 0.027366763353347777, "step": 26465 }, { "epoch": 0.24912941176470588, "grad_norm": 0.5883026897390707, "learning_rate": 4.0080013667173874e-06, "loss": 0.029043543338775634, "step": 26470 }, { "epoch": 0.2491764705882353, "grad_norm": 0.39828153367303987, "learning_rate": 4.0076228641651734e-06, "loss": 0.01907094568014145, "step": 26475 }, { "epoch": 0.2492235294117647, "grad_norm": 0.5954129402903658, "learning_rate": 4.007244468826342e-06, "loss": 0.02881717085838318, "step": 26480 }, { "epoch": 0.2492705882352941, "grad_norm": 0.6257979781180724, "learning_rate": 4.006866180650289e-06, "loss": 0.02672816812992096, "step": 26485 }, { "epoch": 0.24931764705882353, "grad_norm": 0.7335602270237587, "learning_rate": 4.006487999586441e-06, "loss": 0.028262394666671752, "step": 26490 }, { "epoch": 0.24936470588235293, "grad_norm": 0.5110261531657091, "learning_rate": 4.00610992558426e-06, "loss": 0.019240114092826843, "step": 26495 }, { "epoch": 0.24941176470588236, "grad_norm": 0.8198296281677482, "learning_rate": 4.005731958593242e-06, "loss": 0.023392093181610108, "step": 26500 }, { "epoch": 0.24945882352941176, "grad_norm": 0.6573919795129662, "learning_rate": 4.005354098562914e-06, "loss": 0.03840703964233398, "step": 26505 }, { "epoch": 0.2495058823529412, "grad_norm": 0.5597192748523728, "learning_rate": 4.004976345442839e-06, "loss": 0.0202840656042099, "step": 26510 }, { "epoch": 0.2495529411764706, "grad_norm": 0.6486715739292956, "learning_rate": 4.004598699182611e-06, "loss": 0.023663267493247986, "step": 26515 }, { "epoch": 0.2496, "grad_norm": 0.5406981281651252, "learning_rate": 4.004221159731858e-06, "loss": 0.02388562709093094, "step": 26520 }, { "epoch": 0.24964705882352942, "grad_norm": 0.7728375759864087, "learning_rate": 4.003843727040242e-06, "loss": 0.025209301710128786, "step": 26525 }, { "epoch": 0.24969411764705882, "grad_norm": 0.5019233219337977, "learning_rate": 4.003466401057458e-06, "loss": 0.02447884976863861, "step": 26530 }, { "epoch": 0.24974117647058824, "grad_norm": 0.6138051337274766, "learning_rate": 4.003089181733231e-06, "loss": 0.027539026737213135, "step": 26535 }, { "epoch": 0.24978823529411764, "grad_norm": 0.6491008175158179, "learning_rate": 4.002712069017325e-06, "loss": 0.029640641808509827, "step": 26540 }, { "epoch": 0.24983529411764707, "grad_norm": 0.5770595041595531, "learning_rate": 4.002335062859533e-06, "loss": 0.020462319254875183, "step": 26545 }, { "epoch": 0.24988235294117647, "grad_norm": 0.503201439974445, "learning_rate": 4.001958163209681e-06, "loss": 0.020065736770629884, "step": 26550 }, { "epoch": 0.24992941176470587, "grad_norm": 0.667164820165571, "learning_rate": 4.001581370017632e-06, "loss": 0.025468820333480836, "step": 26555 }, { "epoch": 0.2499764705882353, "grad_norm": 0.5900593212123404, "learning_rate": 4.001204683233277e-06, "loss": 0.0247043713927269, "step": 26560 }, { "epoch": 0.2500235294117647, "grad_norm": 0.4629163684190041, "learning_rate": 4.000828102806543e-06, "loss": 0.026566910743713378, "step": 26565 }, { "epoch": 0.2500705882352941, "grad_norm": 0.7149669530001695, "learning_rate": 4.000451628687389e-06, "loss": 0.02030508816242218, "step": 26570 }, { "epoch": 0.25011764705882356, "grad_norm": 0.5482397586000344, "learning_rate": 4.000075260825807e-06, "loss": 0.020320491492748262, "step": 26575 }, { "epoch": 0.25016470588235296, "grad_norm": 0.7751015944122706, "learning_rate": 3.999698999171822e-06, "loss": 0.025027981400489806, "step": 26580 }, { "epoch": 0.25021176470588236, "grad_norm": 0.5894606519456748, "learning_rate": 3.999322843675493e-06, "loss": 0.017674747109413146, "step": 26585 }, { "epoch": 0.25025882352941176, "grad_norm": 0.6572315449751418, "learning_rate": 3.9989467942869105e-06, "loss": 0.02534594535827637, "step": 26590 }, { "epoch": 0.25030588235294116, "grad_norm": 0.4683829438638388, "learning_rate": 3.998570850956198e-06, "loss": 0.02188514769077301, "step": 26595 }, { "epoch": 0.2503529411764706, "grad_norm": 0.4982629362909983, "learning_rate": 3.998195013633512e-06, "loss": 0.027044042944908142, "step": 26600 }, { "epoch": 0.2504, "grad_norm": 0.7150276864347447, "learning_rate": 3.997819282269042e-06, "loss": 0.027136123180389403, "step": 26605 }, { "epoch": 0.2504470588235294, "grad_norm": 0.5445148158211525, "learning_rate": 3.99744365681301e-06, "loss": 0.024712133407592773, "step": 26610 }, { "epoch": 0.2504941176470588, "grad_norm": 0.43906030097148097, "learning_rate": 3.997068137215671e-06, "loss": 0.02118421494960785, "step": 26615 }, { "epoch": 0.2505411764705882, "grad_norm": 0.5054408638543715, "learning_rate": 3.996692723427313e-06, "loss": 0.027759167551994323, "step": 26620 }, { "epoch": 0.25058823529411767, "grad_norm": 0.42306679561424776, "learning_rate": 3.996317415398255e-06, "loss": 0.023879049718379973, "step": 26625 }, { "epoch": 0.25063529411764707, "grad_norm": 0.7005578572991648, "learning_rate": 3.99594221307885e-06, "loss": 0.022113627195358275, "step": 26630 }, { "epoch": 0.25068235294117647, "grad_norm": 1.1153082809590507, "learning_rate": 3.9955671164194855e-06, "loss": 0.023370346426963805, "step": 26635 }, { "epoch": 0.25072941176470587, "grad_norm": 0.6972779573446386, "learning_rate": 3.995192125370577e-06, "loss": 0.025005656480789184, "step": 26640 }, { "epoch": 0.25077647058823527, "grad_norm": 0.3954398821194365, "learning_rate": 3.994817239882578e-06, "loss": 0.024090898036956788, "step": 26645 }, { "epoch": 0.2508235294117647, "grad_norm": 0.6531038849975945, "learning_rate": 3.994442459905969e-06, "loss": 0.023191431164741518, "step": 26650 }, { "epoch": 0.2508705882352941, "grad_norm": 0.6364486042358862, "learning_rate": 3.9940677853912665e-06, "loss": 0.019408908486366273, "step": 26655 }, { "epoch": 0.2509176470588235, "grad_norm": 0.6190052312220404, "learning_rate": 3.99369321628902e-06, "loss": 0.02662118673324585, "step": 26660 }, { "epoch": 0.2509647058823529, "grad_norm": 0.7379423518920215, "learning_rate": 3.993318752549807e-06, "loss": 0.025962573289871217, "step": 26665 }, { "epoch": 0.2510117647058824, "grad_norm": 0.644525409316299, "learning_rate": 3.992944394124244e-06, "loss": 0.018661651015281677, "step": 26670 }, { "epoch": 0.2510588235294118, "grad_norm": 0.5438384987738352, "learning_rate": 3.992570140962975e-06, "loss": 0.020020799338817598, "step": 26675 }, { "epoch": 0.2511058823529412, "grad_norm": 0.9331026328420075, "learning_rate": 3.992195993016679e-06, "loss": 0.023069852590560914, "step": 26680 }, { "epoch": 0.2511529411764706, "grad_norm": 0.8283554714244955, "learning_rate": 3.991821950236064e-06, "loss": 0.030287337303161622, "step": 26685 }, { "epoch": 0.2512, "grad_norm": 0.49403045342118607, "learning_rate": 3.991448012571874e-06, "loss": 0.017767378687858583, "step": 26690 }, { "epoch": 0.25124705882352943, "grad_norm": 0.9935557509513732, "learning_rate": 3.9910741799748835e-06, "loss": 0.029843291640281676, "step": 26695 }, { "epoch": 0.25129411764705883, "grad_norm": 0.407821637405203, "learning_rate": 3.9907004523959e-06, "loss": 0.025033575296401978, "step": 26700 }, { "epoch": 0.25134117647058823, "grad_norm": 0.6186106923516052, "learning_rate": 3.990326829785762e-06, "loss": 0.01779632121324539, "step": 26705 }, { "epoch": 0.25138823529411763, "grad_norm": 0.45792364520906853, "learning_rate": 3.989953312095342e-06, "loss": 0.02277480661869049, "step": 26710 }, { "epoch": 0.25143529411764703, "grad_norm": 0.46265543571593054, "learning_rate": 3.989579899275543e-06, "loss": 0.02427072823047638, "step": 26715 }, { "epoch": 0.2514823529411765, "grad_norm": 0.6367556468222094, "learning_rate": 3.989206591277302e-06, "loss": 0.02576305568218231, "step": 26720 }, { "epoch": 0.2515294117647059, "grad_norm": 0.6945361407320665, "learning_rate": 3.988833388051585e-06, "loss": 0.024880436062812806, "step": 26725 }, { "epoch": 0.2515764705882353, "grad_norm": 0.5919800571826427, "learning_rate": 3.988460289549394e-06, "loss": 0.02540256977081299, "step": 26730 }, { "epoch": 0.2516235294117647, "grad_norm": 0.6291832271176421, "learning_rate": 3.9880872957217595e-06, "loss": 0.025371259450912474, "step": 26735 }, { "epoch": 0.2516705882352941, "grad_norm": 0.6619610641331183, "learning_rate": 3.987714406519748e-06, "loss": 0.02177487462759018, "step": 26740 }, { "epoch": 0.25171764705882355, "grad_norm": 0.8555821067458764, "learning_rate": 3.987341621894454e-06, "loss": 0.02900271415710449, "step": 26745 }, { "epoch": 0.25176470588235295, "grad_norm": 0.5799258827151732, "learning_rate": 3.986968941797006e-06, "loss": 0.023554262518882752, "step": 26750 }, { "epoch": 0.25181176470588235, "grad_norm": 0.7807553972404059, "learning_rate": 3.986596366178565e-06, "loss": 0.026221543550491333, "step": 26755 }, { "epoch": 0.25185882352941175, "grad_norm": 0.5591288363593655, "learning_rate": 3.986223894990323e-06, "loss": 0.02156051993370056, "step": 26760 }, { "epoch": 0.2519058823529412, "grad_norm": 0.48163584029966383, "learning_rate": 3.985851528183503e-06, "loss": 0.023802158236503602, "step": 26765 }, { "epoch": 0.2519529411764706, "grad_norm": 0.4225912948469439, "learning_rate": 3.985479265709362e-06, "loss": 0.02352069318294525, "step": 26770 }, { "epoch": 0.252, "grad_norm": 0.5353380128622938, "learning_rate": 3.985107107519188e-06, "loss": 0.022117282450199127, "step": 26775 }, { "epoch": 0.2520470588235294, "grad_norm": 0.5638960234906027, "learning_rate": 3.9847350535642994e-06, "loss": 0.022488251328468323, "step": 26780 }, { "epoch": 0.2520941176470588, "grad_norm": 0.6581533265588949, "learning_rate": 3.984363103796048e-06, "loss": 0.030454528331756592, "step": 26785 }, { "epoch": 0.25214117647058826, "grad_norm": 0.6569292565811393, "learning_rate": 3.983991258165819e-06, "loss": 0.02385210692882538, "step": 26790 }, { "epoch": 0.25218823529411766, "grad_norm": 0.5657970118789473, "learning_rate": 3.983619516625026e-06, "loss": 0.022579298913478853, "step": 26795 }, { "epoch": 0.25223529411764706, "grad_norm": 0.4810713690105315, "learning_rate": 3.983247879125115e-06, "loss": 0.022200725972652435, "step": 26800 }, { "epoch": 0.25228235294117646, "grad_norm": 0.675007701476664, "learning_rate": 3.9828763456175654e-06, "loss": 0.028279268741607667, "step": 26805 }, { "epoch": 0.25232941176470586, "grad_norm": 0.6563582639770242, "learning_rate": 3.9825049160538866e-06, "loss": 0.03248311877250672, "step": 26810 }, { "epoch": 0.2523764705882353, "grad_norm": 0.7076234784849891, "learning_rate": 3.982133590385622e-06, "loss": 0.02694331407546997, "step": 26815 }, { "epoch": 0.2524235294117647, "grad_norm": 0.5090155263751045, "learning_rate": 3.981762368564343e-06, "loss": 0.02367358058691025, "step": 26820 }, { "epoch": 0.2524705882352941, "grad_norm": 0.8626130286760249, "learning_rate": 3.981391250541656e-06, "loss": 0.028810372948646544, "step": 26825 }, { "epoch": 0.2525176470588235, "grad_norm": 0.5739374480173194, "learning_rate": 3.981020236269197e-06, "loss": 0.026790815591812133, "step": 26830 }, { "epoch": 0.25256470588235297, "grad_norm": 0.4647493309140686, "learning_rate": 3.980649325698634e-06, "loss": 0.023066234588623048, "step": 26835 }, { "epoch": 0.25261176470588237, "grad_norm": 0.611921651110877, "learning_rate": 3.9802785187816674e-06, "loss": 0.024721877276897432, "step": 26840 }, { "epoch": 0.25265882352941177, "grad_norm": 0.6107427436040552, "learning_rate": 3.979907815470027e-06, "loss": 0.02632948160171509, "step": 26845 }, { "epoch": 0.25270588235294117, "grad_norm": 0.4767991438851121, "learning_rate": 3.979537215715476e-06, "loss": 0.02576501965522766, "step": 26850 }, { "epoch": 0.25275294117647057, "grad_norm": 0.5757900754657815, "learning_rate": 3.979166719469808e-06, "loss": 0.023884418606758117, "step": 26855 }, { "epoch": 0.2528, "grad_norm": 0.6613813039355357, "learning_rate": 3.978796326684849e-06, "loss": 0.019181853532791136, "step": 26860 }, { "epoch": 0.2528470588235294, "grad_norm": 0.5547718197678098, "learning_rate": 3.978426037312456e-06, "loss": 0.022690656781196594, "step": 26865 }, { "epoch": 0.2528941176470588, "grad_norm": 0.8826543224320935, "learning_rate": 3.978055851304518e-06, "loss": 0.02288243770599365, "step": 26870 }, { "epoch": 0.2529411764705882, "grad_norm": 0.7161106546265247, "learning_rate": 3.977685768612951e-06, "loss": 0.0216510072350502, "step": 26875 }, { "epoch": 0.2529882352941176, "grad_norm": 0.525239978340867, "learning_rate": 3.9773157891897096e-06, "loss": 0.020248103141784667, "step": 26880 }, { "epoch": 0.2530352941176471, "grad_norm": 0.7245761644137485, "learning_rate": 3.9769459129867736e-06, "loss": 0.02915666699409485, "step": 26885 }, { "epoch": 0.2530823529411765, "grad_norm": 0.8245406079928616, "learning_rate": 3.976576139956157e-06, "loss": 0.026924213767051695, "step": 26890 }, { "epoch": 0.2531294117647059, "grad_norm": 0.6303250024461441, "learning_rate": 3.976206470049904e-06, "loss": 0.023951952159404755, "step": 26895 }, { "epoch": 0.2531764705882353, "grad_norm": 0.6357891183552415, "learning_rate": 3.975836903220092e-06, "loss": 0.023242783546447755, "step": 26900 }, { "epoch": 0.2532235294117647, "grad_norm": 0.5645493119772432, "learning_rate": 3.975467439418826e-06, "loss": 0.026178193092346192, "step": 26905 }, { "epoch": 0.25327058823529414, "grad_norm": 0.5880584472276306, "learning_rate": 3.975098078598245e-06, "loss": 0.02286243438720703, "step": 26910 }, { "epoch": 0.25331764705882354, "grad_norm": 0.8376426344394712, "learning_rate": 3.974728820710518e-06, "loss": 0.025091663002967834, "step": 26915 }, { "epoch": 0.25336470588235294, "grad_norm": 0.6849597364395664, "learning_rate": 3.974359665707844e-06, "loss": 0.01990032196044922, "step": 26920 }, { "epoch": 0.25341176470588234, "grad_norm": 0.4291270021462507, "learning_rate": 3.973990613542457e-06, "loss": 0.01846162974834442, "step": 26925 }, { "epoch": 0.2534588235294118, "grad_norm": 0.7112349069379008, "learning_rate": 3.973621664166617e-06, "loss": 0.02239966541528702, "step": 26930 }, { "epoch": 0.2535058823529412, "grad_norm": 0.5288926211879643, "learning_rate": 3.9732528175326185e-06, "loss": 0.0232161819934845, "step": 26935 }, { "epoch": 0.2535529411764706, "grad_norm": 0.9040266576721478, "learning_rate": 3.9728840735927865e-06, "loss": 0.03113217353820801, "step": 26940 }, { "epoch": 0.2536, "grad_norm": 0.48613851118965695, "learning_rate": 3.972515432299475e-06, "loss": 0.02392398416996002, "step": 26945 }, { "epoch": 0.2536470588235294, "grad_norm": 0.5957582580259454, "learning_rate": 3.9721468936050714e-06, "loss": 0.02557385265827179, "step": 26950 }, { "epoch": 0.25369411764705885, "grad_norm": 0.6902993983339817, "learning_rate": 3.971778457461992e-06, "loss": 0.026721298694610596, "step": 26955 }, { "epoch": 0.25374117647058825, "grad_norm": 0.8087397562272903, "learning_rate": 3.971410123822686e-06, "loss": 0.025238007307052612, "step": 26960 }, { "epoch": 0.25378823529411765, "grad_norm": 0.7620907249035583, "learning_rate": 3.9710418926396315e-06, "loss": 0.02861245572566986, "step": 26965 }, { "epoch": 0.25383529411764705, "grad_norm": 0.5755982977838318, "learning_rate": 3.97067376386534e-06, "loss": 0.024505233764648436, "step": 26970 }, { "epoch": 0.25388235294117645, "grad_norm": 0.5331108023908581, "learning_rate": 3.970305737452349e-06, "loss": 0.0273975670337677, "step": 26975 }, { "epoch": 0.2539294117647059, "grad_norm": 0.49907781317315936, "learning_rate": 3.969937813353232e-06, "loss": 0.026544272899627686, "step": 26980 }, { "epoch": 0.2539764705882353, "grad_norm": 0.7002396532298683, "learning_rate": 3.969569991520592e-06, "loss": 0.02551911473274231, "step": 26985 }, { "epoch": 0.2540235294117647, "grad_norm": 1.0129644503784805, "learning_rate": 3.969202271907061e-06, "loss": 0.024823136627674103, "step": 26990 }, { "epoch": 0.2540705882352941, "grad_norm": 0.7844901156471156, "learning_rate": 3.9688346544653e-06, "loss": 0.02620965838432312, "step": 26995 }, { "epoch": 0.2541176470588235, "grad_norm": 0.5510180114927229, "learning_rate": 3.9684671391480085e-06, "loss": 0.02145174741744995, "step": 27000 }, { "epoch": 0.25416470588235296, "grad_norm": 0.5858207053321397, "learning_rate": 3.9680997259079074e-06, "loss": 0.022065755724906922, "step": 27005 }, { "epoch": 0.25421176470588236, "grad_norm": 0.5719144231929517, "learning_rate": 3.967732414697754e-06, "loss": 0.029893866181373595, "step": 27010 }, { "epoch": 0.25425882352941176, "grad_norm": 0.5780562225151853, "learning_rate": 3.967365205470334e-06, "loss": 0.02204786241054535, "step": 27015 }, { "epoch": 0.25430588235294116, "grad_norm": 0.6211288354178429, "learning_rate": 3.966998098178465e-06, "loss": 0.023982879519462586, "step": 27020 }, { "epoch": 0.2543529411764706, "grad_norm": 0.5943193039809446, "learning_rate": 3.966631092774994e-06, "loss": 0.023516204953193665, "step": 27025 }, { "epoch": 0.2544, "grad_norm": 0.504761285552292, "learning_rate": 3.966264189212796e-06, "loss": 0.019112524390220643, "step": 27030 }, { "epoch": 0.2544470588235294, "grad_norm": 0.5675675275579408, "learning_rate": 3.965897387444784e-06, "loss": 0.02287077307701111, "step": 27035 }, { "epoch": 0.2544941176470588, "grad_norm": 0.6500553298635268, "learning_rate": 3.965530687423894e-06, "loss": 0.022410482168197632, "step": 27040 }, { "epoch": 0.2545411764705882, "grad_norm": 0.4666422657391221, "learning_rate": 3.965164089103098e-06, "loss": 0.02847161889076233, "step": 27045 }, { "epoch": 0.25458823529411767, "grad_norm": 0.41606250503614645, "learning_rate": 3.964797592435392e-06, "loss": 0.019649465382099152, "step": 27050 }, { "epoch": 0.25463529411764707, "grad_norm": 0.42969317242315314, "learning_rate": 3.9644311973738084e-06, "loss": 0.01869681477546692, "step": 27055 }, { "epoch": 0.25468235294117647, "grad_norm": 0.5686436782845558, "learning_rate": 3.964064903871407e-06, "loss": 0.025708991289138793, "step": 27060 }, { "epoch": 0.25472941176470587, "grad_norm": 0.49219998577194063, "learning_rate": 3.96369871188128e-06, "loss": 0.024022029340267183, "step": 27065 }, { "epoch": 0.25477647058823527, "grad_norm": 0.6708663662502545, "learning_rate": 3.963332621356548e-06, "loss": 0.026143580675125122, "step": 27070 }, { "epoch": 0.2548235294117647, "grad_norm": 0.8180454458494565, "learning_rate": 3.962966632250361e-06, "loss": 0.027770179510116576, "step": 27075 }, { "epoch": 0.2548705882352941, "grad_norm": 0.4433957667789148, "learning_rate": 3.962600744515902e-06, "loss": 0.0207874596118927, "step": 27080 }, { "epoch": 0.2549176470588235, "grad_norm": 0.5304551577886423, "learning_rate": 3.962234958106383e-06, "loss": 0.025648057460784912, "step": 27085 }, { "epoch": 0.2549647058823529, "grad_norm": 0.6837218753060751, "learning_rate": 3.961869272975046e-06, "loss": 0.02484729588031769, "step": 27090 }, { "epoch": 0.2550117647058823, "grad_norm": 1.0976885210058165, "learning_rate": 3.961503689075164e-06, "loss": 0.02849630415439606, "step": 27095 }, { "epoch": 0.2550588235294118, "grad_norm": 0.6035941100227682, "learning_rate": 3.961138206360041e-06, "loss": 0.020322638750076293, "step": 27100 }, { "epoch": 0.2551058823529412, "grad_norm": 0.5880438767816489, "learning_rate": 3.9607728247830065e-06, "loss": 0.02132394313812256, "step": 27105 }, { "epoch": 0.2551529411764706, "grad_norm": 0.6705336506844983, "learning_rate": 3.960407544297425e-06, "loss": 0.026897099614143372, "step": 27110 }, { "epoch": 0.2552, "grad_norm": 0.5985335345940754, "learning_rate": 3.960042364856689e-06, "loss": 0.033380305767059325, "step": 27115 }, { "epoch": 0.25524705882352944, "grad_norm": 0.7062572625544092, "learning_rate": 3.959677286414224e-06, "loss": 0.030217090249061586, "step": 27120 }, { "epoch": 0.25529411764705884, "grad_norm": 0.7283778778893011, "learning_rate": 3.95931230892348e-06, "loss": 0.02460269331932068, "step": 27125 }, { "epoch": 0.25534117647058824, "grad_norm": 0.5255672741342626, "learning_rate": 3.958947432337943e-06, "loss": 0.0222959041595459, "step": 27130 }, { "epoch": 0.25538823529411764, "grad_norm": 0.8003185001093431, "learning_rate": 3.9585826566111235e-06, "loss": 0.028778839111328124, "step": 27135 }, { "epoch": 0.25543529411764704, "grad_norm": 0.8852477071084119, "learning_rate": 3.958217981696566e-06, "loss": 0.02612462043762207, "step": 27140 }, { "epoch": 0.2554823529411765, "grad_norm": 0.6199382008946751, "learning_rate": 3.957853407547844e-06, "loss": 0.020441457629203796, "step": 27145 }, { "epoch": 0.2555294117647059, "grad_norm": 0.7785523244838222, "learning_rate": 3.95748893411856e-06, "loss": 0.024198223650455476, "step": 27150 }, { "epoch": 0.2555764705882353, "grad_norm": 0.3460304414634414, "learning_rate": 3.9571245613623465e-06, "loss": 0.025190871953964234, "step": 27155 }, { "epoch": 0.2556235294117647, "grad_norm": 0.3525096419906607, "learning_rate": 3.956760289232867e-06, "loss": 0.026714664697647095, "step": 27160 }, { "epoch": 0.2556705882352941, "grad_norm": 0.6671434607620487, "learning_rate": 3.956396117683815e-06, "loss": 0.02273898422718048, "step": 27165 }, { "epoch": 0.25571764705882355, "grad_norm": 0.5663764870527415, "learning_rate": 3.9560320466689116e-06, "loss": 0.01871682405471802, "step": 27170 }, { "epoch": 0.25576470588235295, "grad_norm": 0.9303149072679157, "learning_rate": 3.955668076141908e-06, "loss": 0.024984046816825867, "step": 27175 }, { "epoch": 0.25581176470588235, "grad_norm": 0.677619249527276, "learning_rate": 3.9553042060565906e-06, "loss": 0.027087834477424622, "step": 27180 }, { "epoch": 0.25585882352941175, "grad_norm": 0.8649548990609837, "learning_rate": 3.954940436366767e-06, "loss": 0.025087463855743408, "step": 27185 }, { "epoch": 0.25590588235294115, "grad_norm": 0.6493973668787449, "learning_rate": 3.9545767670262795e-06, "loss": 0.02041739076375961, "step": 27190 }, { "epoch": 0.2559529411764706, "grad_norm": 0.5207260011419608, "learning_rate": 3.9542131979890004e-06, "loss": 0.02257698178291321, "step": 27195 }, { "epoch": 0.256, "grad_norm": 0.701915647937917, "learning_rate": 3.953849729208832e-06, "loss": 0.028824952244758607, "step": 27200 }, { "epoch": 0.2560470588235294, "grad_norm": 0.5192106809092952, "learning_rate": 3.953486360639701e-06, "loss": 0.020209914445877074, "step": 27205 }, { "epoch": 0.2560941176470588, "grad_norm": 0.5712094456388298, "learning_rate": 3.953123092235572e-06, "loss": 0.02473158985376358, "step": 27210 }, { "epoch": 0.25614117647058826, "grad_norm": 0.773683911846718, "learning_rate": 3.952759923950432e-06, "loss": 0.031162121891975404, "step": 27215 }, { "epoch": 0.25618823529411766, "grad_norm": 0.6053856979418073, "learning_rate": 3.952396855738302e-06, "loss": 0.020647817850112916, "step": 27220 }, { "epoch": 0.25623529411764706, "grad_norm": 0.45367488240231685, "learning_rate": 3.95203388755323e-06, "loss": 0.019211135804653168, "step": 27225 }, { "epoch": 0.25628235294117646, "grad_norm": 0.7895324550704752, "learning_rate": 3.951671019349293e-06, "loss": 0.023831915855407716, "step": 27230 }, { "epoch": 0.25632941176470586, "grad_norm": 0.5151737934373254, "learning_rate": 3.951308251080603e-06, "loss": 0.019977001845836638, "step": 27235 }, { "epoch": 0.2563764705882353, "grad_norm": 0.5740076529810989, "learning_rate": 3.950945582701294e-06, "loss": 0.022402460873126983, "step": 27240 }, { "epoch": 0.2564235294117647, "grad_norm": 0.7447847410379457, "learning_rate": 3.950583014165535e-06, "loss": 0.02410808950662613, "step": 27245 }, { "epoch": 0.2564705882352941, "grad_norm": 0.45902170726565394, "learning_rate": 3.950220545427522e-06, "loss": 0.021914216876029968, "step": 27250 }, { "epoch": 0.2565176470588235, "grad_norm": 0.5385302889855528, "learning_rate": 3.94985817644148e-06, "loss": 0.02412150949239731, "step": 27255 }, { "epoch": 0.2565647058823529, "grad_norm": 0.5569914801723478, "learning_rate": 3.949495907161665e-06, "loss": 0.02422538697719574, "step": 27260 }, { "epoch": 0.2566117647058824, "grad_norm": 0.775732112430728, "learning_rate": 3.949133737542363e-06, "loss": 0.025456967949867248, "step": 27265 }, { "epoch": 0.2566588235294118, "grad_norm": 0.44447795611583724, "learning_rate": 3.948771667537885e-06, "loss": 0.027324193716049196, "step": 27270 }, { "epoch": 0.2567058823529412, "grad_norm": 0.7694136839036122, "learning_rate": 3.948409697102575e-06, "loss": 0.029523611068725586, "step": 27275 }, { "epoch": 0.2567529411764706, "grad_norm": 0.7460921280013111, "learning_rate": 3.948047826190807e-06, "loss": 0.022551319003105162, "step": 27280 }, { "epoch": 0.2568, "grad_norm": 0.8509286865301745, "learning_rate": 3.947686054756981e-06, "loss": 0.023467589914798737, "step": 27285 }, { "epoch": 0.25684705882352943, "grad_norm": 0.5768471029539568, "learning_rate": 3.94732438275553e-06, "loss": 0.02549278438091278, "step": 27290 }, { "epoch": 0.25689411764705883, "grad_norm": 0.5199597234160157, "learning_rate": 3.946962810140913e-06, "loss": 0.026191693544387818, "step": 27295 }, { "epoch": 0.25694117647058823, "grad_norm": 0.6025028988622326, "learning_rate": 3.9466013368676195e-06, "loss": 0.02342277318239212, "step": 27300 }, { "epoch": 0.25698823529411763, "grad_norm": 0.757116872719119, "learning_rate": 3.9462399628901684e-06, "loss": 0.027405548095703124, "step": 27305 }, { "epoch": 0.2570352941176471, "grad_norm": 0.4363602404339703, "learning_rate": 3.945878688163108e-06, "loss": 0.03136489391326904, "step": 27310 }, { "epoch": 0.2570823529411765, "grad_norm": 0.6150226733440941, "learning_rate": 3.945517512641014e-06, "loss": 0.025318643450736998, "step": 27315 }, { "epoch": 0.2571294117647059, "grad_norm": 0.6698933574322983, "learning_rate": 3.9451564362784925e-06, "loss": 0.024406705796718598, "step": 27320 }, { "epoch": 0.2571764705882353, "grad_norm": 0.38875812785724007, "learning_rate": 3.94479545903018e-06, "loss": 0.01917800009250641, "step": 27325 }, { "epoch": 0.2572235294117647, "grad_norm": 0.6453695619656644, "learning_rate": 3.944434580850739e-06, "loss": 0.02048536539077759, "step": 27330 }, { "epoch": 0.25727058823529414, "grad_norm": 0.4507970399213423, "learning_rate": 3.944073801694864e-06, "loss": 0.01867852658033371, "step": 27335 }, { "epoch": 0.25731764705882354, "grad_norm": 0.5464057350943471, "learning_rate": 3.943713121517277e-06, "loss": 0.017665109038352965, "step": 27340 }, { "epoch": 0.25736470588235294, "grad_norm": 0.6381613582698791, "learning_rate": 3.943352540272727e-06, "loss": 0.026361995935440065, "step": 27345 }, { "epoch": 0.25741176470588234, "grad_norm": 0.5429899506203876, "learning_rate": 3.942992057915996e-06, "loss": 0.023987388610839842, "step": 27350 }, { "epoch": 0.25745882352941174, "grad_norm": 0.6608954031826432, "learning_rate": 3.942631674401893e-06, "loss": 0.026465195417404174, "step": 27355 }, { "epoch": 0.2575058823529412, "grad_norm": 0.6796737448937997, "learning_rate": 3.942271389685256e-06, "loss": 0.02207561433315277, "step": 27360 }, { "epoch": 0.2575529411764706, "grad_norm": 0.5315116382763364, "learning_rate": 3.941911203720951e-06, "loss": 0.02323712557554245, "step": 27365 }, { "epoch": 0.2576, "grad_norm": 0.5548560550951194, "learning_rate": 3.941551116463874e-06, "loss": 0.022008223831653594, "step": 27370 }, { "epoch": 0.2576470588235294, "grad_norm": 0.6247260186675248, "learning_rate": 3.94119112786895e-06, "loss": 0.02135191559791565, "step": 27375 }, { "epoch": 0.25769411764705885, "grad_norm": 0.6342770453370365, "learning_rate": 3.940831237891132e-06, "loss": 0.021920230984687806, "step": 27380 }, { "epoch": 0.25774117647058825, "grad_norm": 0.4561724396343153, "learning_rate": 3.940471446485402e-06, "loss": 0.027150803804397584, "step": 27385 }, { "epoch": 0.25778823529411765, "grad_norm": 0.5133488326167497, "learning_rate": 3.940111753606771e-06, "loss": 0.021520277857780455, "step": 27390 }, { "epoch": 0.25783529411764705, "grad_norm": 0.9763905657687049, "learning_rate": 3.939752159210278e-06, "loss": 0.026447072625160217, "step": 27395 }, { "epoch": 0.25788235294117645, "grad_norm": 0.5232746178666475, "learning_rate": 3.939392663250991e-06, "loss": 0.02765784561634064, "step": 27400 }, { "epoch": 0.2579294117647059, "grad_norm": 0.6953663337104672, "learning_rate": 3.939033265684009e-06, "loss": 0.02594042420387268, "step": 27405 }, { "epoch": 0.2579764705882353, "grad_norm": 0.48662288554381306, "learning_rate": 3.938673966464458e-06, "loss": 0.025794985890388488, "step": 27410 }, { "epoch": 0.2580235294117647, "grad_norm": 0.3779725763456007, "learning_rate": 3.9383147655474875e-06, "loss": 0.02250009775161743, "step": 27415 }, { "epoch": 0.2580705882352941, "grad_norm": 0.5786868298035602, "learning_rate": 3.937955662888286e-06, "loss": 0.021186521649360655, "step": 27420 }, { "epoch": 0.2581176470588235, "grad_norm": 0.6700856220034578, "learning_rate": 3.937596658442064e-06, "loss": 0.02126347869634628, "step": 27425 }, { "epoch": 0.25816470588235296, "grad_norm": 0.6095114660185911, "learning_rate": 3.937237752164058e-06, "loss": 0.0257521390914917, "step": 27430 }, { "epoch": 0.25821176470588236, "grad_norm": 0.6050080121161459, "learning_rate": 3.93687894400954e-06, "loss": 0.02477146089076996, "step": 27435 }, { "epoch": 0.25825882352941176, "grad_norm": 0.7484849407809722, "learning_rate": 3.936520233933807e-06, "loss": 0.022239959239959715, "step": 27440 }, { "epoch": 0.25830588235294116, "grad_norm": 0.6539059571548704, "learning_rate": 3.936161621892183e-06, "loss": 0.02384798228740692, "step": 27445 }, { "epoch": 0.25835294117647056, "grad_norm": 0.575039029172254, "learning_rate": 3.935803107840025e-06, "loss": 0.024685972929000856, "step": 27450 }, { "epoch": 0.2584, "grad_norm": 0.6924229326722807, "learning_rate": 3.935444691732713e-06, "loss": 0.02507973909378052, "step": 27455 }, { "epoch": 0.2584470588235294, "grad_norm": 0.6650147312777475, "learning_rate": 3.93508637352566e-06, "loss": 0.02085082232952118, "step": 27460 }, { "epoch": 0.2584941176470588, "grad_norm": 0.6333698996901634, "learning_rate": 3.934728153174304e-06, "loss": 0.021417374908924102, "step": 27465 }, { "epoch": 0.2585411764705882, "grad_norm": 0.760986124238509, "learning_rate": 3.934370030634113e-06, "loss": 0.02250753343105316, "step": 27470 }, { "epoch": 0.2585882352941177, "grad_norm": 0.6944224320242478, "learning_rate": 3.934012005860585e-06, "loss": 0.02133045792579651, "step": 27475 }, { "epoch": 0.2586352941176471, "grad_norm": 0.9208006466474692, "learning_rate": 3.933654078809243e-06, "loss": 0.021928296983242036, "step": 27480 }, { "epoch": 0.2586823529411765, "grad_norm": 0.4602668854086915, "learning_rate": 3.933296249435641e-06, "loss": 0.02190698981285095, "step": 27485 }, { "epoch": 0.2587294117647059, "grad_norm": 0.6660091261786217, "learning_rate": 3.93293851769536e-06, "loss": 0.025069981813430786, "step": 27490 }, { "epoch": 0.2587764705882353, "grad_norm": 0.5812414657450452, "learning_rate": 3.932580883544009e-06, "loss": 0.025363552570343017, "step": 27495 }, { "epoch": 0.25882352941176473, "grad_norm": 0.6375416989892652, "learning_rate": 3.932223346937225e-06, "loss": 0.021354493498802186, "step": 27500 }, { "epoch": 0.25887058823529413, "grad_norm": 0.6786694725024562, "learning_rate": 3.9318659078306756e-06, "loss": 0.02525303065776825, "step": 27505 }, { "epoch": 0.25891764705882353, "grad_norm": 0.6190694680640184, "learning_rate": 3.931508566180054e-06, "loss": 0.023686710000038146, "step": 27510 }, { "epoch": 0.25896470588235293, "grad_norm": 0.5382552582987806, "learning_rate": 3.931151321941084e-06, "loss": 0.022662407159805296, "step": 27515 }, { "epoch": 0.25901176470588233, "grad_norm": 0.5817088344321166, "learning_rate": 3.930794175069514e-06, "loss": 0.02620537281036377, "step": 27520 }, { "epoch": 0.2590588235294118, "grad_norm": 0.45275331269761127, "learning_rate": 3.930437125521124e-06, "loss": 0.03039974570274353, "step": 27525 }, { "epoch": 0.2591058823529412, "grad_norm": 0.5945231869516208, "learning_rate": 3.930080173251721e-06, "loss": 0.030679333209991454, "step": 27530 }, { "epoch": 0.2591529411764706, "grad_norm": 0.4793889076616866, "learning_rate": 3.929723318217139e-06, "loss": 0.01926426589488983, "step": 27535 }, { "epoch": 0.2592, "grad_norm": 0.43557243086449987, "learning_rate": 3.9293665603732415e-06, "loss": 0.02460305541753769, "step": 27540 }, { "epoch": 0.2592470588235294, "grad_norm": 0.45005194771532886, "learning_rate": 3.92900989967592e-06, "loss": 0.0285767138004303, "step": 27545 }, { "epoch": 0.25929411764705884, "grad_norm": 0.6847582006831693, "learning_rate": 3.928653336081092e-06, "loss": 0.021647873520851135, "step": 27550 }, { "epoch": 0.25934117647058824, "grad_norm": 0.808472816816688, "learning_rate": 3.928296869544705e-06, "loss": 0.02540384829044342, "step": 27555 }, { "epoch": 0.25938823529411764, "grad_norm": 0.6339892147057048, "learning_rate": 3.927940500022735e-06, "loss": 0.023157253861427307, "step": 27560 }, { "epoch": 0.25943529411764704, "grad_norm": 0.6659820089289799, "learning_rate": 3.927584227471185e-06, "loss": 0.02090331315994263, "step": 27565 }, { "epoch": 0.2594823529411765, "grad_norm": 0.6178417312188486, "learning_rate": 3.927228051846084e-06, "loss": 0.024734795093536377, "step": 27570 }, { "epoch": 0.2595294117647059, "grad_norm": 0.6881163815863474, "learning_rate": 3.9268719731034925e-06, "loss": 0.024643391370773315, "step": 27575 }, { "epoch": 0.2595764705882353, "grad_norm": 0.6064571020666074, "learning_rate": 3.926515991199496e-06, "loss": 0.02381356954574585, "step": 27580 }, { "epoch": 0.2596235294117647, "grad_norm": 0.5190968177003544, "learning_rate": 3.92616010609021e-06, "loss": 0.02208274006843567, "step": 27585 }, { "epoch": 0.2596705882352941, "grad_norm": 0.4668037553870238, "learning_rate": 3.925804317731777e-06, "loss": 0.026292085647583008, "step": 27590 }, { "epoch": 0.25971764705882355, "grad_norm": 0.641718122625227, "learning_rate": 3.925448626080366e-06, "loss": 0.023488253355026245, "step": 27595 }, { "epoch": 0.25976470588235295, "grad_norm": 0.6146749526383082, "learning_rate": 3.9250930310921755e-06, "loss": 0.0288163959980011, "step": 27600 }, { "epoch": 0.25981176470588235, "grad_norm": 0.8100456529054879, "learning_rate": 3.9247375327234314e-06, "loss": 0.022424620389938355, "step": 27605 }, { "epoch": 0.25985882352941175, "grad_norm": 0.4801256107186632, "learning_rate": 3.924382130930386e-06, "loss": 0.023734286427497864, "step": 27610 }, { "epoch": 0.25990588235294115, "grad_norm": 0.6001107913174041, "learning_rate": 3.924026825669323e-06, "loss": 0.021012786030769347, "step": 27615 }, { "epoch": 0.2599529411764706, "grad_norm": 0.7231415512331493, "learning_rate": 3.923671616896549e-06, "loss": 0.026264527440071107, "step": 27620 }, { "epoch": 0.26, "grad_norm": 0.7992342710746496, "learning_rate": 3.923316504568402e-06, "loss": 0.024870279431343078, "step": 27625 }, { "epoch": 0.2600470588235294, "grad_norm": 0.6793998195026613, "learning_rate": 3.922961488641245e-06, "loss": 0.021058923006057738, "step": 27630 }, { "epoch": 0.2600941176470588, "grad_norm": 0.5396636801479583, "learning_rate": 3.922606569071472e-06, "loss": 0.01913299560546875, "step": 27635 }, { "epoch": 0.2601411764705882, "grad_norm": 0.5804561959266825, "learning_rate": 3.9222517458155e-06, "loss": 0.024808609485626222, "step": 27640 }, { "epoch": 0.26018823529411766, "grad_norm": 0.6071699165749219, "learning_rate": 3.921897018829777e-06, "loss": 0.020042356848716737, "step": 27645 }, { "epoch": 0.26023529411764706, "grad_norm": 0.6332804625555777, "learning_rate": 3.921542388070778e-06, "loss": 0.027173030376434325, "step": 27650 }, { "epoch": 0.26028235294117646, "grad_norm": 0.5540389440578954, "learning_rate": 3.921187853495005e-06, "loss": 0.031447237730026244, "step": 27655 }, { "epoch": 0.26032941176470586, "grad_norm": 0.4219020326140704, "learning_rate": 3.920833415058987e-06, "loss": 0.021670232713222503, "step": 27660 }, { "epoch": 0.2603764705882353, "grad_norm": 0.48894385856987166, "learning_rate": 3.920479072719283e-06, "loss": 0.02461310476064682, "step": 27665 }, { "epoch": 0.2604235294117647, "grad_norm": 0.8170210741902201, "learning_rate": 3.9201248264324756e-06, "loss": 0.024503321945667268, "step": 27670 }, { "epoch": 0.2604705882352941, "grad_norm": 0.5026823445747058, "learning_rate": 3.919770676155178e-06, "loss": 0.015983384847640992, "step": 27675 }, { "epoch": 0.2605176470588235, "grad_norm": 0.9590794885643019, "learning_rate": 3.91941662184403e-06, "loss": 0.022530454397201537, "step": 27680 }, { "epoch": 0.2605647058823529, "grad_norm": 0.5587402632154675, "learning_rate": 3.919062663455699e-06, "loss": 0.029539221525192262, "step": 27685 }, { "epoch": 0.2606117647058824, "grad_norm": 0.5301734726530487, "learning_rate": 3.918708800946877e-06, "loss": 0.01767389625310898, "step": 27690 }, { "epoch": 0.2606588235294118, "grad_norm": 0.6120788402381356, "learning_rate": 3.9183550342742885e-06, "loss": 0.025425294041633607, "step": 27695 }, { "epoch": 0.2607058823529412, "grad_norm": 0.7679576402614603, "learning_rate": 3.918001363394681e-06, "loss": 0.027455776929855347, "step": 27700 }, { "epoch": 0.2607529411764706, "grad_norm": 0.6111369083073954, "learning_rate": 3.917647788264832e-06, "loss": 0.023907433450222015, "step": 27705 }, { "epoch": 0.2608, "grad_norm": 0.5676497905884359, "learning_rate": 3.917294308841545e-06, "loss": 0.024045906960964203, "step": 27710 }, { "epoch": 0.26084705882352943, "grad_norm": 0.6102115597506196, "learning_rate": 3.9169409250816496e-06, "loss": 0.024948500096797943, "step": 27715 }, { "epoch": 0.26089411764705883, "grad_norm": 0.6472800167017129, "learning_rate": 3.916587636942006e-06, "loss": 0.020031511783599854, "step": 27720 }, { "epoch": 0.26094117647058823, "grad_norm": 0.6364629523622309, "learning_rate": 3.916234444379498e-06, "loss": 0.030231916904449464, "step": 27725 }, { "epoch": 0.26098823529411763, "grad_norm": 0.5284030427942573, "learning_rate": 3.9158813473510395e-06, "loss": 0.026694458723068238, "step": 27730 }, { "epoch": 0.26103529411764703, "grad_norm": 0.6448162059989837, "learning_rate": 3.91552834581357e-06, "loss": 0.021413418650627136, "step": 27735 }, { "epoch": 0.2610823529411765, "grad_norm": 0.5018500253430607, "learning_rate": 3.915175439724056e-06, "loss": 0.029986894130706786, "step": 27740 }, { "epoch": 0.2611294117647059, "grad_norm": 0.40050922764306857, "learning_rate": 3.914822629039492e-06, "loss": 0.018655422329902648, "step": 27745 }, { "epoch": 0.2611764705882353, "grad_norm": 0.6774592282378713, "learning_rate": 3.9144699137169e-06, "loss": 0.0275705486536026, "step": 27750 }, { "epoch": 0.2612235294117647, "grad_norm": 0.8526099982841792, "learning_rate": 3.914117293713327e-06, "loss": 0.023955437541007995, "step": 27755 }, { "epoch": 0.26127058823529414, "grad_norm": 0.6162301699717904, "learning_rate": 3.9137647689858504e-06, "loss": 0.023836952447891236, "step": 27760 }, { "epoch": 0.26131764705882354, "grad_norm": 0.6283955061283045, "learning_rate": 3.91341233949157e-06, "loss": 0.01946513056755066, "step": 27765 }, { "epoch": 0.26136470588235294, "grad_norm": 0.7481855537211849, "learning_rate": 3.913060005187619e-06, "loss": 0.02594759166240692, "step": 27770 }, { "epoch": 0.26141176470588234, "grad_norm": 0.7632805542873079, "learning_rate": 3.912707766031152e-06, "loss": 0.025575751066207887, "step": 27775 }, { "epoch": 0.26145882352941174, "grad_norm": 1.0030595623801573, "learning_rate": 3.912355621979352e-06, "loss": 0.022560450434684753, "step": 27780 }, { "epoch": 0.2615058823529412, "grad_norm": 0.532074765209229, "learning_rate": 3.912003572989429e-06, "loss": 0.022651174664497377, "step": 27785 }, { "epoch": 0.2615529411764706, "grad_norm": 0.8915771730744482, "learning_rate": 3.911651619018624e-06, "loss": 0.028452733159065248, "step": 27790 }, { "epoch": 0.2616, "grad_norm": 0.5463935616826678, "learning_rate": 3.911299760024197e-06, "loss": 0.02391139566898346, "step": 27795 }, { "epoch": 0.2616470588235294, "grad_norm": 0.7701590596662053, "learning_rate": 3.910947995963443e-06, "loss": 0.022996988892555238, "step": 27800 }, { "epoch": 0.2616941176470588, "grad_norm": 0.4971907925081837, "learning_rate": 3.910596326793679e-06, "loss": 0.02251678556203842, "step": 27805 }, { "epoch": 0.26174117647058825, "grad_norm": 0.6450125292883323, "learning_rate": 3.910244752472249e-06, "loss": 0.020683209598064422, "step": 27810 }, { "epoch": 0.26178823529411765, "grad_norm": 0.5992499325948202, "learning_rate": 3.909893272956526e-06, "loss": 0.023484787344932555, "step": 27815 }, { "epoch": 0.26183529411764705, "grad_norm": 0.5836986467120379, "learning_rate": 3.909541888203908e-06, "loss": 0.025730040669441224, "step": 27820 }, { "epoch": 0.26188235294117646, "grad_norm": 0.7654522315081953, "learning_rate": 3.909190598171822e-06, "loss": 0.03088163733482361, "step": 27825 }, { "epoch": 0.26192941176470586, "grad_norm": 0.623201973469856, "learning_rate": 3.908839402817718e-06, "loss": 0.024835506081581117, "step": 27830 }, { "epoch": 0.2619764705882353, "grad_norm": 0.3944513751941007, "learning_rate": 3.9084883020990775e-06, "loss": 0.02623276710510254, "step": 27835 }, { "epoch": 0.2620235294117647, "grad_norm": 0.5138910657744112, "learning_rate": 3.9081372959734045e-06, "loss": 0.022469529509544374, "step": 27840 }, { "epoch": 0.2620705882352941, "grad_norm": 0.7335202485977081, "learning_rate": 3.9077863843982315e-06, "loss": 0.025123214721679686, "step": 27845 }, { "epoch": 0.2621176470588235, "grad_norm": 0.6185171315809496, "learning_rate": 3.90743556733112e-06, "loss": 0.02229461967945099, "step": 27850 }, { "epoch": 0.26216470588235297, "grad_norm": 0.9171473094200957, "learning_rate": 3.907084844729653e-06, "loss": 0.027130556106567384, "step": 27855 }, { "epoch": 0.26221176470588237, "grad_norm": 0.6985887812876305, "learning_rate": 3.9067342165514445e-06, "loss": 0.016682220995426177, "step": 27860 }, { "epoch": 0.26225882352941177, "grad_norm": 0.7251472054688971, "learning_rate": 3.906383682754133e-06, "loss": 0.020970535278320313, "step": 27865 }, { "epoch": 0.26230588235294117, "grad_norm": 0.44772687944470646, "learning_rate": 3.906033243295385e-06, "loss": 0.01797071397304535, "step": 27870 }, { "epoch": 0.26235294117647057, "grad_norm": 0.9212559598098672, "learning_rate": 3.905682898132891e-06, "loss": 0.02392530143260956, "step": 27875 }, { "epoch": 0.2624, "grad_norm": 0.5247133479015512, "learning_rate": 3.905332647224372e-06, "loss": 0.022595737874507905, "step": 27880 }, { "epoch": 0.2624470588235294, "grad_norm": 0.3558432268489785, "learning_rate": 3.904982490527573e-06, "loss": 0.022438064217567444, "step": 27885 }, { "epoch": 0.2624941176470588, "grad_norm": 0.6315787718846751, "learning_rate": 3.904632428000265e-06, "loss": 0.02449001669883728, "step": 27890 }, { "epoch": 0.2625411764705882, "grad_norm": 0.5720519855049908, "learning_rate": 3.904282459600247e-06, "loss": 0.027113744616508485, "step": 27895 }, { "epoch": 0.2625882352941176, "grad_norm": 0.6874443254641989, "learning_rate": 3.903932585285344e-06, "loss": 0.016996976733207703, "step": 27900 }, { "epoch": 0.2626352941176471, "grad_norm": 0.5952964923368331, "learning_rate": 3.903582805013406e-06, "loss": 0.02172112464904785, "step": 27905 }, { "epoch": 0.2626823529411765, "grad_norm": 0.49619875341758896, "learning_rate": 3.903233118742313e-06, "loss": 0.02410275340080261, "step": 27910 }, { "epoch": 0.2627294117647059, "grad_norm": 0.620614776500968, "learning_rate": 3.902883526429968e-06, "loss": 0.023747031390666962, "step": 27915 }, { "epoch": 0.2627764705882353, "grad_norm": 0.8578189617028735, "learning_rate": 3.9025340280343e-06, "loss": 0.022645846009254456, "step": 27920 }, { "epoch": 0.26282352941176473, "grad_norm": 1.0144833939513676, "learning_rate": 3.902184623513268e-06, "loss": 0.023559364676475524, "step": 27925 }, { "epoch": 0.26287058823529413, "grad_norm": 0.6184780545515309, "learning_rate": 3.901835312824856e-06, "loss": 0.02379632890224457, "step": 27930 }, { "epoch": 0.26291764705882353, "grad_norm": 0.8046031744820497, "learning_rate": 3.90148609592707e-06, "loss": 0.02363189160823822, "step": 27935 }, { "epoch": 0.26296470588235293, "grad_norm": 0.42848344149581874, "learning_rate": 3.9011369727779494e-06, "loss": 0.029669281840324403, "step": 27940 }, { "epoch": 0.26301176470588233, "grad_norm": 0.47656347053677717, "learning_rate": 3.900787943335555e-06, "loss": 0.021785911917686463, "step": 27945 }, { "epoch": 0.2630588235294118, "grad_norm": 0.5665788863163372, "learning_rate": 3.900439007557976e-06, "loss": 0.023305192589759827, "step": 27950 }, { "epoch": 0.2631058823529412, "grad_norm": 0.8082076970513911, "learning_rate": 3.900090165403326e-06, "loss": 0.027790528535842896, "step": 27955 }, { "epoch": 0.2631529411764706, "grad_norm": 0.5012547045712327, "learning_rate": 3.899741416829746e-06, "loss": 0.025941529870033266, "step": 27960 }, { "epoch": 0.2632, "grad_norm": 0.9532510604371432, "learning_rate": 3.8993927617954035e-06, "loss": 0.026929616928100586, "step": 27965 }, { "epoch": 0.2632470588235294, "grad_norm": 0.8039604235482763, "learning_rate": 3.899044200258491e-06, "loss": 0.02618521749973297, "step": 27970 }, { "epoch": 0.26329411764705885, "grad_norm": 0.5771733724423904, "learning_rate": 3.8986957321772304e-06, "loss": 0.023781682550907134, "step": 27975 }, { "epoch": 0.26334117647058825, "grad_norm": 0.4973015751548111, "learning_rate": 3.898347357509864e-06, "loss": 0.022268682718276978, "step": 27980 }, { "epoch": 0.26338823529411765, "grad_norm": 0.8490738504777631, "learning_rate": 3.8979990762146655e-06, "loss": 0.028424212336540224, "step": 27985 }, { "epoch": 0.26343529411764705, "grad_norm": 0.5750646537085103, "learning_rate": 3.897650888249932e-06, "loss": 0.023116621375083923, "step": 27990 }, { "epoch": 0.26348235294117645, "grad_norm": 0.6222102770950393, "learning_rate": 3.897302793573987e-06, "loss": 0.024560873210430146, "step": 27995 }, { "epoch": 0.2635294117647059, "grad_norm": 0.7316593637544223, "learning_rate": 3.896954792145181e-06, "loss": 0.029497578740119934, "step": 28000 }, { "epoch": 0.2635764705882353, "grad_norm": 0.49491402886442, "learning_rate": 3.896606883921889e-06, "loss": 0.020118045806884765, "step": 28005 }, { "epoch": 0.2636235294117647, "grad_norm": 0.47274390773601277, "learning_rate": 3.896259068862514e-06, "loss": 0.032169857621192934, "step": 28010 }, { "epoch": 0.2636705882352941, "grad_norm": 0.705504750735933, "learning_rate": 3.895911346925484e-06, "loss": 0.02607690691947937, "step": 28015 }, { "epoch": 0.26371764705882356, "grad_norm": 0.5471581414304073, "learning_rate": 3.895563718069251e-06, "loss": 0.023725095391273498, "step": 28020 }, { "epoch": 0.26376470588235296, "grad_norm": 0.8057268648144816, "learning_rate": 3.895216182252297e-06, "loss": 0.019062785804271697, "step": 28025 }, { "epoch": 0.26381176470588236, "grad_norm": 0.7190737126189304, "learning_rate": 3.894868739433126e-06, "loss": 0.023899078369140625, "step": 28030 }, { "epoch": 0.26385882352941176, "grad_norm": 0.7736453463176374, "learning_rate": 3.894521389570272e-06, "loss": 0.03107905387878418, "step": 28035 }, { "epoch": 0.26390588235294116, "grad_norm": 0.8909019875761085, "learning_rate": 3.89417413262229e-06, "loss": 0.02427549660205841, "step": 28040 }, { "epoch": 0.2639529411764706, "grad_norm": 0.6367366018172159, "learning_rate": 3.893826968547765e-06, "loss": 0.025763389468193055, "step": 28045 }, { "epoch": 0.264, "grad_norm": 0.44074913457007286, "learning_rate": 3.893479897305305e-06, "loss": 0.024704450368881227, "step": 28050 }, { "epoch": 0.2640470588235294, "grad_norm": 0.7661985429809899, "learning_rate": 3.893132918853545e-06, "loss": 0.023271328210830687, "step": 28055 }, { "epoch": 0.2640941176470588, "grad_norm": 0.5147128532481583, "learning_rate": 3.892786033151148e-06, "loss": 0.019587132334709167, "step": 28060 }, { "epoch": 0.2641411764705882, "grad_norm": 0.525750025341205, "learning_rate": 3.892439240156798e-06, "loss": 0.02472561299800873, "step": 28065 }, { "epoch": 0.26418823529411767, "grad_norm": 0.47560394974151526, "learning_rate": 3.8920925398292085e-06, "loss": 0.033020681142807005, "step": 28070 }, { "epoch": 0.26423529411764707, "grad_norm": 0.8697491503243979, "learning_rate": 3.891745932127118e-06, "loss": 0.01996225118637085, "step": 28075 }, { "epoch": 0.26428235294117647, "grad_norm": 0.6403917116740392, "learning_rate": 3.89139941700929e-06, "loss": 0.02563405632972717, "step": 28080 }, { "epoch": 0.26432941176470587, "grad_norm": 0.6356142145029975, "learning_rate": 3.891052994434514e-06, "loss": 0.01895131766796112, "step": 28085 }, { "epoch": 0.26437647058823527, "grad_norm": 0.5390841443795741, "learning_rate": 3.890706664361605e-06, "loss": 0.022924430668354034, "step": 28090 }, { "epoch": 0.2644235294117647, "grad_norm": 0.5718553324666839, "learning_rate": 3.890360426749403e-06, "loss": 0.024492529034614564, "step": 28095 }, { "epoch": 0.2644705882352941, "grad_norm": 0.7223224103959354, "learning_rate": 3.890014281556776e-06, "loss": 0.026236513257026674, "step": 28100 }, { "epoch": 0.2645176470588235, "grad_norm": 0.47442656624795887, "learning_rate": 3.889668228742616e-06, "loss": 0.018904325366020203, "step": 28105 }, { "epoch": 0.2645647058823529, "grad_norm": 0.6616447504081457, "learning_rate": 3.889322268265841e-06, "loss": 0.025895664095878602, "step": 28110 }, { "epoch": 0.2646117647058824, "grad_norm": 0.8265694090086856, "learning_rate": 3.888976400085393e-06, "loss": 0.027250123023986817, "step": 28115 }, { "epoch": 0.2646588235294118, "grad_norm": 0.6403091978048543, "learning_rate": 3.888630624160241e-06, "loss": 0.025812432169914246, "step": 28120 }, { "epoch": 0.2647058823529412, "grad_norm": 0.5494685658757514, "learning_rate": 3.888284940449381e-06, "loss": 0.02680985927581787, "step": 28125 }, { "epoch": 0.2647529411764706, "grad_norm": 0.6624002329503438, "learning_rate": 3.887939348911831e-06, "loss": 0.02800590395927429, "step": 28130 }, { "epoch": 0.2648, "grad_norm": 0.408002851127991, "learning_rate": 3.8875938495066365e-06, "loss": 0.020228397846221925, "step": 28135 }, { "epoch": 0.26484705882352944, "grad_norm": 0.6675700906150859, "learning_rate": 3.88724844219287e-06, "loss": 0.027518612146377564, "step": 28140 }, { "epoch": 0.26489411764705884, "grad_norm": 0.7548397397716018, "learning_rate": 3.886903126929627e-06, "loss": 0.02194809764623642, "step": 28145 }, { "epoch": 0.26494117647058824, "grad_norm": 0.40330069583598943, "learning_rate": 3.886557903676027e-06, "loss": 0.017092597484588624, "step": 28150 }, { "epoch": 0.26498823529411764, "grad_norm": 0.6806055885577273, "learning_rate": 3.886212772391221e-06, "loss": 0.02162933796644211, "step": 28155 }, { "epoch": 0.26503529411764704, "grad_norm": 0.3901744935469472, "learning_rate": 3.885867733034378e-06, "loss": 0.024465171992778777, "step": 28160 }, { "epoch": 0.2650823529411765, "grad_norm": 0.5605078952306695, "learning_rate": 3.8855227855646985e-06, "loss": 0.018548636138439177, "step": 28165 }, { "epoch": 0.2651294117647059, "grad_norm": 0.5759538889978073, "learning_rate": 3.885177929941405e-06, "loss": 0.021828818321228027, "step": 28170 }, { "epoch": 0.2651764705882353, "grad_norm": 1.3707079869014398, "learning_rate": 3.884833166123745e-06, "loss": 0.024289843440055848, "step": 28175 }, { "epoch": 0.2652235294117647, "grad_norm": 0.5151306140922078, "learning_rate": 3.884488494070993e-06, "loss": 0.02481008768081665, "step": 28180 }, { "epoch": 0.2652705882352941, "grad_norm": 0.5467209785956257, "learning_rate": 3.884143913742448e-06, "loss": 0.023661208152770997, "step": 28185 }, { "epoch": 0.26531764705882355, "grad_norm": 0.5955738489807055, "learning_rate": 3.883799425097436e-06, "loss": 0.023145601153373718, "step": 28190 }, { "epoch": 0.26536470588235295, "grad_norm": 0.5043564068267316, "learning_rate": 3.883455028095305e-06, "loss": 0.02012266218662262, "step": 28195 }, { "epoch": 0.26541176470588235, "grad_norm": 0.7675697240316907, "learning_rate": 3.883110722695431e-06, "loss": 0.02952294945716858, "step": 28200 }, { "epoch": 0.26545882352941175, "grad_norm": 0.7758183861560876, "learning_rate": 3.882766508857212e-06, "loss": 0.02668156921863556, "step": 28205 }, { "epoch": 0.2655058823529412, "grad_norm": 0.5545352513702669, "learning_rate": 3.882422386540076e-06, "loss": 0.021552810072898866, "step": 28210 }, { "epoch": 0.2655529411764706, "grad_norm": 0.7662466854344853, "learning_rate": 3.882078355703471e-06, "loss": 0.027022457122802733, "step": 28215 }, { "epoch": 0.2656, "grad_norm": 0.5439960509309969, "learning_rate": 3.881734416306875e-06, "loss": 0.020911984145641327, "step": 28220 }, { "epoch": 0.2656470588235294, "grad_norm": 1.510708479811989, "learning_rate": 3.881390568309787e-06, "loss": 0.023245617747306824, "step": 28225 }, { "epoch": 0.2656941176470588, "grad_norm": 0.4376986946933591, "learning_rate": 3.881046811671734e-06, "loss": 0.018652185797691345, "step": 28230 }, { "epoch": 0.26574117647058826, "grad_norm": 0.6020461250788731, "learning_rate": 3.880703146352267e-06, "loss": 0.02304314374923706, "step": 28235 }, { "epoch": 0.26578823529411766, "grad_norm": 0.7551041232319837, "learning_rate": 3.880359572310961e-06, "loss": 0.024280992150306702, "step": 28240 }, { "epoch": 0.26583529411764706, "grad_norm": 0.5734723022930458, "learning_rate": 3.880016089507417e-06, "loss": 0.020454099774360655, "step": 28245 }, { "epoch": 0.26588235294117646, "grad_norm": 0.4729507442442484, "learning_rate": 3.879672697901262e-06, "loss": 0.021830974519252776, "step": 28250 }, { "epoch": 0.26592941176470586, "grad_norm": 0.5888853009398239, "learning_rate": 3.879329397452147e-06, "loss": 0.024187296628952026, "step": 28255 }, { "epoch": 0.2659764705882353, "grad_norm": 0.5005964142667484, "learning_rate": 3.8789861881197475e-06, "loss": 0.0210374116897583, "step": 28260 }, { "epoch": 0.2660235294117647, "grad_norm": 0.4606148754017559, "learning_rate": 3.878643069863764e-06, "loss": 0.01960582137107849, "step": 28265 }, { "epoch": 0.2660705882352941, "grad_norm": 1.2658711900409452, "learning_rate": 3.878300042643925e-06, "loss": 0.027526667714118956, "step": 28270 }, { "epoch": 0.2661176470588235, "grad_norm": 0.5232029261843867, "learning_rate": 3.877957106419979e-06, "loss": 0.025643506646156312, "step": 28275 }, { "epoch": 0.2661647058823529, "grad_norm": 0.5190250299182525, "learning_rate": 3.8776142611517035e-06, "loss": 0.02276638150215149, "step": 28280 }, { "epoch": 0.26621176470588237, "grad_norm": 0.6325518930132829, "learning_rate": 3.8772715067988985e-06, "loss": 0.021633931994438173, "step": 28285 }, { "epoch": 0.26625882352941177, "grad_norm": 0.48228911333055535, "learning_rate": 3.876928843321388e-06, "loss": 0.026010316610336304, "step": 28290 }, { "epoch": 0.26630588235294117, "grad_norm": 0.8906080914478488, "learning_rate": 3.876586270679025e-06, "loss": 0.02343512177467346, "step": 28295 }, { "epoch": 0.26635294117647057, "grad_norm": 0.5832211842682568, "learning_rate": 3.876243788831683e-06, "loss": 0.022078923881053925, "step": 28300 }, { "epoch": 0.2664, "grad_norm": 0.5833466203250068, "learning_rate": 3.875901397739264e-06, "loss": 0.01860639303922653, "step": 28305 }, { "epoch": 0.2664470588235294, "grad_norm": 0.6305479330218591, "learning_rate": 3.87555909736169e-06, "loss": 0.025725612044334413, "step": 28310 }, { "epoch": 0.2664941176470588, "grad_norm": 0.45795017005497035, "learning_rate": 3.875216887658913e-06, "loss": 0.020082828402519227, "step": 28315 }, { "epoch": 0.2665411764705882, "grad_norm": 0.5658430266574866, "learning_rate": 3.874874768590908e-06, "loss": 0.024983906745910646, "step": 28320 }, { "epoch": 0.2665882352941176, "grad_norm": 0.7830925139169875, "learning_rate": 3.874532740117671e-06, "loss": 0.024883022904396056, "step": 28325 }, { "epoch": 0.2666352941176471, "grad_norm": 0.6270582819753047, "learning_rate": 3.874190802199229e-06, "loss": 0.01800763010978699, "step": 28330 }, { "epoch": 0.2666823529411765, "grad_norm": 0.7007652148092118, "learning_rate": 3.8738489547956295e-06, "loss": 0.028647172451019286, "step": 28335 }, { "epoch": 0.2667294117647059, "grad_norm": 0.7013402912646706, "learning_rate": 3.873507197866944e-06, "loss": 0.02100757956504822, "step": 28340 }, { "epoch": 0.2667764705882353, "grad_norm": 0.7378691993696601, "learning_rate": 3.873165531373273e-06, "loss": 0.02293824255466461, "step": 28345 }, { "epoch": 0.2668235294117647, "grad_norm": 0.6229760675134715, "learning_rate": 3.872823955274738e-06, "loss": 0.021895934641361237, "step": 28350 }, { "epoch": 0.26687058823529414, "grad_norm": 0.6919806007473317, "learning_rate": 3.872482469531485e-06, "loss": 0.023213595151901245, "step": 28355 }, { "epoch": 0.26691764705882354, "grad_norm": 0.5850898333840365, "learning_rate": 3.872141074103687e-06, "loss": 0.022448617219924926, "step": 28360 }, { "epoch": 0.26696470588235294, "grad_norm": 0.4903962448523058, "learning_rate": 3.87179976895154e-06, "loss": 0.025429314374923705, "step": 28365 }, { "epoch": 0.26701176470588234, "grad_norm": 0.6199860766025401, "learning_rate": 3.871458554035265e-06, "loss": 0.023966535925865173, "step": 28370 }, { "epoch": 0.26705882352941174, "grad_norm": 0.6468695275627675, "learning_rate": 3.871117429315105e-06, "loss": 0.021191801130771636, "step": 28375 }, { "epoch": 0.2671058823529412, "grad_norm": 0.4971726038158779, "learning_rate": 3.870776394751334e-06, "loss": 0.024762019515037537, "step": 28380 }, { "epoch": 0.2671529411764706, "grad_norm": 0.48822305284769135, "learning_rate": 3.870435450304245e-06, "loss": 0.02320566475391388, "step": 28385 }, { "epoch": 0.2672, "grad_norm": 0.836301809983505, "learning_rate": 3.870094595934155e-06, "loss": 0.021822258830070496, "step": 28390 }, { "epoch": 0.2672470588235294, "grad_norm": 0.7404194246669658, "learning_rate": 3.8697538316014075e-06, "loss": 0.02427091300487518, "step": 28395 }, { "epoch": 0.26729411764705885, "grad_norm": 0.6419683479855242, "learning_rate": 3.869413157266373e-06, "loss": 0.025552791357040406, "step": 28400 }, { "epoch": 0.26734117647058825, "grad_norm": 0.5359710945001159, "learning_rate": 3.8690725728894405e-06, "loss": 0.02247333973646164, "step": 28405 }, { "epoch": 0.26738823529411765, "grad_norm": 0.5983879250128378, "learning_rate": 3.868732078431029e-06, "loss": 0.021365638077259063, "step": 28410 }, { "epoch": 0.26743529411764705, "grad_norm": 0.7349629594290906, "learning_rate": 3.868391673851577e-06, "loss": 0.0249611034989357, "step": 28415 }, { "epoch": 0.26748235294117645, "grad_norm": 0.6215574799164107, "learning_rate": 3.868051359111552e-06, "loss": 0.025310003757476808, "step": 28420 }, { "epoch": 0.2675294117647059, "grad_norm": 0.7185373488664164, "learning_rate": 3.867711134171444e-06, "loss": 0.02605978846549988, "step": 28425 }, { "epoch": 0.2675764705882353, "grad_norm": 0.36731046663141653, "learning_rate": 3.867370998991764e-06, "loss": 0.020642109215259552, "step": 28430 }, { "epoch": 0.2676235294117647, "grad_norm": 0.5740408617102493, "learning_rate": 3.8670309535330534e-06, "loss": 0.027566510438919067, "step": 28435 }, { "epoch": 0.2676705882352941, "grad_norm": 0.6971209708644666, "learning_rate": 3.866690997755873e-06, "loss": 0.025127679109573364, "step": 28440 }, { "epoch": 0.2677176470588235, "grad_norm": 0.6763886666921562, "learning_rate": 3.866351131620811e-06, "loss": 0.02505582571029663, "step": 28445 }, { "epoch": 0.26776470588235296, "grad_norm": 0.7391621761279045, "learning_rate": 3.866011355088476e-06, "loss": 0.023936864733695985, "step": 28450 }, { "epoch": 0.26781176470588236, "grad_norm": 0.991219979127291, "learning_rate": 3.865671668119505e-06, "loss": 0.02538534104824066, "step": 28455 }, { "epoch": 0.26785882352941176, "grad_norm": 0.5713357624084611, "learning_rate": 3.865332070674558e-06, "loss": 0.02738614082336426, "step": 28460 }, { "epoch": 0.26790588235294116, "grad_norm": 0.5258520108377611, "learning_rate": 3.864992562714318e-06, "loss": 0.023350876569747925, "step": 28465 }, { "epoch": 0.2679529411764706, "grad_norm": 0.5153039214331011, "learning_rate": 3.864653144199493e-06, "loss": 0.0220514252781868, "step": 28470 }, { "epoch": 0.268, "grad_norm": 0.3910997086167181, "learning_rate": 3.864313815090814e-06, "loss": 0.023283904790878295, "step": 28475 }, { "epoch": 0.2680470588235294, "grad_norm": 0.5359635072548145, "learning_rate": 3.863974575349039e-06, "loss": 0.030838778614997862, "step": 28480 }, { "epoch": 0.2680941176470588, "grad_norm": 0.6252916783366214, "learning_rate": 3.863635424934947e-06, "loss": 0.024108102917671202, "step": 28485 }, { "epoch": 0.2681411764705882, "grad_norm": 0.5568289557042135, "learning_rate": 3.863296363809343e-06, "loss": 0.02102506160736084, "step": 28490 }, { "epoch": 0.26818823529411767, "grad_norm": 0.41672704767508717, "learning_rate": 3.8629573919330545e-06, "loss": 0.022683382034301758, "step": 28495 }, { "epoch": 0.26823529411764707, "grad_norm": 0.6782251882955125, "learning_rate": 3.862618509266935e-06, "loss": 0.02297268360853195, "step": 28500 }, { "epoch": 0.26828235294117647, "grad_norm": 0.6840745776082441, "learning_rate": 3.862279715771859e-06, "loss": 0.025785279273986817, "step": 28505 }, { "epoch": 0.26832941176470587, "grad_norm": 0.5665107622974874, "learning_rate": 3.861941011408731e-06, "loss": 0.023523493111133574, "step": 28510 }, { "epoch": 0.26837647058823527, "grad_norm": 0.41717695161671126, "learning_rate": 3.8616023961384715e-06, "loss": 0.02161920666694641, "step": 28515 }, { "epoch": 0.2684235294117647, "grad_norm": 0.898644356791161, "learning_rate": 3.861263869922031e-06, "loss": 0.022730377316474915, "step": 28520 }, { "epoch": 0.2684705882352941, "grad_norm": 0.6182370418640563, "learning_rate": 3.860925432720382e-06, "loss": 0.021475034952163696, "step": 28525 }, { "epoch": 0.2685176470588235, "grad_norm": 0.4824516122667696, "learning_rate": 3.86058708449452e-06, "loss": 0.02392962872982025, "step": 28530 }, { "epoch": 0.2685647058823529, "grad_norm": 0.5367790167281292, "learning_rate": 3.860248825205466e-06, "loss": 0.02913941740989685, "step": 28535 }, { "epoch": 0.2686117647058823, "grad_norm": 0.6529307343679007, "learning_rate": 3.859910654814264e-06, "loss": 0.03056524395942688, "step": 28540 }, { "epoch": 0.2686588235294118, "grad_norm": 2.226942253201833, "learning_rate": 3.859572573281981e-06, "loss": 0.0276200532913208, "step": 28545 }, { "epoch": 0.2687058823529412, "grad_norm": 0.5673831105522428, "learning_rate": 3.859234580569712e-06, "loss": 0.023378753662109376, "step": 28550 }, { "epoch": 0.2687529411764706, "grad_norm": 0.7779564436646048, "learning_rate": 3.8588966766385695e-06, "loss": 0.022554811835289002, "step": 28555 }, { "epoch": 0.2688, "grad_norm": 0.7744931017519449, "learning_rate": 3.858558861449695e-06, "loss": 0.019803696870803834, "step": 28560 }, { "epoch": 0.26884705882352944, "grad_norm": 0.728795180970736, "learning_rate": 3.858221134964251e-06, "loss": 0.03130698204040527, "step": 28565 }, { "epoch": 0.26889411764705884, "grad_norm": 0.49714493710996, "learning_rate": 3.857883497143426e-06, "loss": 0.020939257740974427, "step": 28570 }, { "epoch": 0.26894117647058824, "grad_norm": 0.5260011847188779, "learning_rate": 3.85754594794843e-06, "loss": 0.025401189923286438, "step": 28575 }, { "epoch": 0.26898823529411764, "grad_norm": 0.7107903145236956, "learning_rate": 3.857208487340499e-06, "loss": 0.022865453362464906, "step": 28580 }, { "epoch": 0.26903529411764704, "grad_norm": 0.7285942881090667, "learning_rate": 3.8568711152808895e-06, "loss": 0.02355181276798248, "step": 28585 }, { "epoch": 0.2690823529411765, "grad_norm": 0.4516663999977612, "learning_rate": 3.856533831730885e-06, "loss": 0.0208921879529953, "step": 28590 }, { "epoch": 0.2691294117647059, "grad_norm": 0.7552906524729646, "learning_rate": 3.8561966366517915e-06, "loss": 0.026395776867866518, "step": 28595 }, { "epoch": 0.2691764705882353, "grad_norm": 0.4664861590344404, "learning_rate": 3.8558595300049375e-06, "loss": 0.02259054034948349, "step": 28600 }, { "epoch": 0.2692235294117647, "grad_norm": 0.7552364926512715, "learning_rate": 3.855522511751679e-06, "loss": 0.027809828519821167, "step": 28605 }, { "epoch": 0.2692705882352941, "grad_norm": 0.49478692114850353, "learning_rate": 3.855185581853389e-06, "loss": 0.022909075021743774, "step": 28610 }, { "epoch": 0.26931764705882355, "grad_norm": 0.5239145860693692, "learning_rate": 3.854848740271471e-06, "loss": 0.0194091796875, "step": 28615 }, { "epoch": 0.26936470588235295, "grad_norm": 0.5645323020143438, "learning_rate": 3.854511986967348e-06, "loss": 0.02511439621448517, "step": 28620 }, { "epoch": 0.26941176470588235, "grad_norm": 0.5741660744108551, "learning_rate": 3.854175321902468e-06, "loss": 0.027041929960250854, "step": 28625 }, { "epoch": 0.26945882352941175, "grad_norm": 0.6430862882885774, "learning_rate": 3.853838745038302e-06, "loss": 0.022574847936630248, "step": 28630 }, { "epoch": 0.26950588235294115, "grad_norm": 0.6834138593208091, "learning_rate": 3.853502256336345e-06, "loss": 0.02220887839794159, "step": 28635 }, { "epoch": 0.2695529411764706, "grad_norm": 0.828039218490265, "learning_rate": 3.853165855758116e-06, "loss": 0.017934542894363404, "step": 28640 }, { "epoch": 0.2696, "grad_norm": 0.5937408639887006, "learning_rate": 3.852829543265155e-06, "loss": 0.023760305345058443, "step": 28645 }, { "epoch": 0.2696470588235294, "grad_norm": 0.6501292251754726, "learning_rate": 3.852493318819029e-06, "loss": 0.02095753252506256, "step": 28650 }, { "epoch": 0.2696941176470588, "grad_norm": 0.7737024305187467, "learning_rate": 3.852157182381327e-06, "loss": 0.022434771060943604, "step": 28655 }, { "epoch": 0.26974117647058826, "grad_norm": 0.5765739492392761, "learning_rate": 3.85182113391366e-06, "loss": 0.02169664651155472, "step": 28660 }, { "epoch": 0.26978823529411766, "grad_norm": 0.5886528660737355, "learning_rate": 3.851485173377665e-06, "loss": 0.027214089035987855, "step": 28665 }, { "epoch": 0.26983529411764706, "grad_norm": 0.542849582623479, "learning_rate": 3.851149300735e-06, "loss": 0.024533838033676147, "step": 28670 }, { "epoch": 0.26988235294117646, "grad_norm": 0.5509718907737478, "learning_rate": 3.850813515947348e-06, "loss": 0.023635679483413698, "step": 28675 }, { "epoch": 0.26992941176470586, "grad_norm": 0.6209484125154972, "learning_rate": 3.850477818976416e-06, "loss": 0.02770736813545227, "step": 28680 }, { "epoch": 0.2699764705882353, "grad_norm": 0.5492888496175427, "learning_rate": 3.850142209783932e-06, "loss": 0.022433409094810487, "step": 28685 }, { "epoch": 0.2700235294117647, "grad_norm": 0.6162112281089157, "learning_rate": 3.849806688331649e-06, "loss": 0.01840815246105194, "step": 28690 }, { "epoch": 0.2700705882352941, "grad_norm": 0.5458167331133951, "learning_rate": 3.849471254581343e-06, "loss": 0.03153601884841919, "step": 28695 }, { "epoch": 0.2701176470588235, "grad_norm": 0.4373795655440048, "learning_rate": 3.8491359084948124e-06, "loss": 0.026992136240005495, "step": 28700 }, { "epoch": 0.2701647058823529, "grad_norm": 0.5665224071783491, "learning_rate": 3.848800650033881e-06, "loss": 0.02044052928686142, "step": 28705 }, { "epoch": 0.2702117647058824, "grad_norm": 0.5200205291663642, "learning_rate": 3.848465479160395e-06, "loss": 0.017043986916542055, "step": 28710 }, { "epoch": 0.2702588235294118, "grad_norm": 0.7831959966515727, "learning_rate": 3.848130395836222e-06, "loss": 0.028188520669937135, "step": 28715 }, { "epoch": 0.2703058823529412, "grad_norm": 0.686981023161243, "learning_rate": 3.847795400023256e-06, "loss": 0.024275563657283783, "step": 28720 }, { "epoch": 0.2703529411764706, "grad_norm": 0.4677642450347601, "learning_rate": 3.8474604916834115e-06, "loss": 0.02026360183954239, "step": 28725 }, { "epoch": 0.2704, "grad_norm": 0.3835206876875681, "learning_rate": 3.847125670778627e-06, "loss": 0.023064452409744262, "step": 28730 }, { "epoch": 0.27044705882352943, "grad_norm": 0.6819691567421711, "learning_rate": 3.846790937270865e-06, "loss": 0.02648195028305054, "step": 28735 }, { "epoch": 0.27049411764705883, "grad_norm": 1.101724072668386, "learning_rate": 3.846456291122109e-06, "loss": 0.023973554372787476, "step": 28740 }, { "epoch": 0.27054117647058823, "grad_norm": 0.5584942085538979, "learning_rate": 3.846121732294371e-06, "loss": 0.022442762553691865, "step": 28745 }, { "epoch": 0.27058823529411763, "grad_norm": 0.5639845986358806, "learning_rate": 3.845787260749679e-06, "loss": 0.027804869413375854, "step": 28750 }, { "epoch": 0.2706352941176471, "grad_norm": 0.7162085946997204, "learning_rate": 3.845452876450088e-06, "loss": 0.026395529508590698, "step": 28755 }, { "epoch": 0.2706823529411765, "grad_norm": 0.6408075091822922, "learning_rate": 3.845118579357678e-06, "loss": 0.024160638451576233, "step": 28760 }, { "epoch": 0.2707294117647059, "grad_norm": 0.4517827183525357, "learning_rate": 3.844784369434546e-06, "loss": 0.021881461143493652, "step": 28765 }, { "epoch": 0.2707764705882353, "grad_norm": 0.6515263157696012, "learning_rate": 3.844450246642818e-06, "loss": 0.018430915474891663, "step": 28770 }, { "epoch": 0.2708235294117647, "grad_norm": 0.6077603024067343, "learning_rate": 3.84411621094464e-06, "loss": 0.0230040043592453, "step": 28775 }, { "epoch": 0.27087058823529414, "grad_norm": 0.6807402141600862, "learning_rate": 3.843782262302182e-06, "loss": 0.02049465775489807, "step": 28780 }, { "epoch": 0.27091764705882354, "grad_norm": 0.6128974244363117, "learning_rate": 3.843448400677637e-06, "loss": 0.02631562650203705, "step": 28785 }, { "epoch": 0.27096470588235294, "grad_norm": 0.5785454480972982, "learning_rate": 3.843114626033221e-06, "loss": 0.018263417482376098, "step": 28790 }, { "epoch": 0.27101176470588234, "grad_norm": 0.7841291659240143, "learning_rate": 3.84278093833117e-06, "loss": 0.02782368063926697, "step": 28795 }, { "epoch": 0.27105882352941174, "grad_norm": 0.6497660177101254, "learning_rate": 3.842447337533749e-06, "loss": 0.02509312629699707, "step": 28800 }, { "epoch": 0.2711058823529412, "grad_norm": 0.5178920336625134, "learning_rate": 3.8421138236032415e-06, "loss": 0.020853231847286224, "step": 28805 }, { "epoch": 0.2711529411764706, "grad_norm": 0.40055110633960056, "learning_rate": 3.841780396501955e-06, "loss": 0.018965718150138856, "step": 28810 }, { "epoch": 0.2712, "grad_norm": 0.4990732514885398, "learning_rate": 3.84144705619222e-06, "loss": 0.018057036399841308, "step": 28815 }, { "epoch": 0.2712470588235294, "grad_norm": 0.4563655899413038, "learning_rate": 3.841113802636389e-06, "loss": 0.0282230943441391, "step": 28820 }, { "epoch": 0.2712941176470588, "grad_norm": 0.6250261199644642, "learning_rate": 3.840780635796838e-06, "loss": 0.028525537252426146, "step": 28825 }, { "epoch": 0.27134117647058825, "grad_norm": 0.5412788606361253, "learning_rate": 3.840447555635968e-06, "loss": 0.02548527717590332, "step": 28830 }, { "epoch": 0.27138823529411765, "grad_norm": 0.5416811301074598, "learning_rate": 3.840114562116198e-06, "loss": 0.024940431118011475, "step": 28835 }, { "epoch": 0.27143529411764705, "grad_norm": 0.5240961933528645, "learning_rate": 3.839781655199974e-06, "loss": 0.02529653310775757, "step": 28840 }, { "epoch": 0.27148235294117645, "grad_norm": 0.5977894810588962, "learning_rate": 3.839448834849764e-06, "loss": 0.019315508008003236, "step": 28845 }, { "epoch": 0.2715294117647059, "grad_norm": 0.7855002111117336, "learning_rate": 3.839116101028057e-06, "loss": 0.0306485116481781, "step": 28850 }, { "epoch": 0.2715764705882353, "grad_norm": 0.5680251269785166, "learning_rate": 3.838783453697366e-06, "loss": 0.022839269042015074, "step": 28855 }, { "epoch": 0.2716235294117647, "grad_norm": 0.3625615213737648, "learning_rate": 3.838450892820228e-06, "loss": 0.02206222265958786, "step": 28860 }, { "epoch": 0.2716705882352941, "grad_norm": 0.7512463651668587, "learning_rate": 3.838118418359198e-06, "loss": 0.026798421144485475, "step": 28865 }, { "epoch": 0.2717176470588235, "grad_norm": 0.6199840285154438, "learning_rate": 3.8377860302768615e-06, "loss": 0.02375732809305191, "step": 28870 }, { "epoch": 0.27176470588235296, "grad_norm": 0.5063366296515798, "learning_rate": 3.8374537285358195e-06, "loss": 0.02170650064945221, "step": 28875 }, { "epoch": 0.27181176470588236, "grad_norm": 0.5687428555307773, "learning_rate": 3.8371215130986985e-06, "loss": 0.024792969226837158, "step": 28880 }, { "epoch": 0.27185882352941176, "grad_norm": 0.4950345466813124, "learning_rate": 3.836789383928149e-06, "loss": 0.02984648048877716, "step": 28885 }, { "epoch": 0.27190588235294116, "grad_norm": 0.6773073198153966, "learning_rate": 3.83645734098684e-06, "loss": 0.025516277551651, "step": 28890 }, { "epoch": 0.27195294117647056, "grad_norm": 0.45307579039927376, "learning_rate": 3.8361253842374676e-06, "loss": 0.018110641837120058, "step": 28895 }, { "epoch": 0.272, "grad_norm": 0.8498245404569555, "learning_rate": 3.835793513642748e-06, "loss": 0.02339368164539337, "step": 28900 }, { "epoch": 0.2720470588235294, "grad_norm": 0.6640986787328425, "learning_rate": 3.8354617291654225e-06, "loss": 0.016731572151184083, "step": 28905 }, { "epoch": 0.2720941176470588, "grad_norm": 0.5718581669423233, "learning_rate": 3.83513003076825e-06, "loss": 0.025279784202575685, "step": 28910 }, { "epoch": 0.2721411764705882, "grad_norm": 0.5767497853117183, "learning_rate": 3.834798418414017e-06, "loss": 0.021489655971527098, "step": 28915 }, { "epoch": 0.2721882352941176, "grad_norm": 0.7464031527479614, "learning_rate": 3.834466892065531e-06, "loss": 0.028097784519195555, "step": 28920 }, { "epoch": 0.2722352941176471, "grad_norm": 0.4993595535043489, "learning_rate": 3.834135451685621e-06, "loss": 0.021361231803894043, "step": 28925 }, { "epoch": 0.2722823529411765, "grad_norm": 0.5335648853282552, "learning_rate": 3.8338040972371374e-06, "loss": 0.02325374186038971, "step": 28930 }, { "epoch": 0.2723294117647059, "grad_norm": 0.6421944741868424, "learning_rate": 3.833472828682956e-06, "loss": 0.029786646366119385, "step": 28935 }, { "epoch": 0.2723764705882353, "grad_norm": 0.43383112810623087, "learning_rate": 3.833141645985973e-06, "loss": 0.023506483435630797, "step": 28940 }, { "epoch": 0.27242352941176473, "grad_norm": 0.48922283955769924, "learning_rate": 3.8328105491091106e-06, "loss": 0.02346715033054352, "step": 28945 }, { "epoch": 0.27247058823529413, "grad_norm": 0.5716962435305961, "learning_rate": 3.832479538015307e-06, "loss": 0.01984817534685135, "step": 28950 }, { "epoch": 0.27251764705882353, "grad_norm": 0.3590068459397714, "learning_rate": 3.832148612667529e-06, "loss": 0.02235197275876999, "step": 28955 }, { "epoch": 0.27256470588235293, "grad_norm": 0.6894898589241861, "learning_rate": 3.831817773028761e-06, "loss": 0.02649434208869934, "step": 28960 }, { "epoch": 0.27261176470588233, "grad_norm": 0.6807263441998108, "learning_rate": 3.831487019062013e-06, "loss": 0.027208858728408815, "step": 28965 }, { "epoch": 0.2726588235294118, "grad_norm": 0.5776131240106885, "learning_rate": 3.831156350730317e-06, "loss": 0.02457032650709152, "step": 28970 }, { "epoch": 0.2727058823529412, "grad_norm": 0.6312984279821974, "learning_rate": 3.830825767996726e-06, "loss": 0.023527106642723082, "step": 28975 }, { "epoch": 0.2727529411764706, "grad_norm": 0.633637202595211, "learning_rate": 3.8304952708243155e-06, "loss": 0.023614437878131868, "step": 28980 }, { "epoch": 0.2728, "grad_norm": 0.5034226678007284, "learning_rate": 3.8301648591761844e-06, "loss": 0.02003956139087677, "step": 28985 }, { "epoch": 0.2728470588235294, "grad_norm": 0.650858406404249, "learning_rate": 3.829834533015453e-06, "loss": 0.022889289259910583, "step": 28990 }, { "epoch": 0.27289411764705884, "grad_norm": 0.5727003898470977, "learning_rate": 3.829504292305264e-06, "loss": 0.02202829122543335, "step": 28995 }, { "epoch": 0.27294117647058824, "grad_norm": 0.537058356840777, "learning_rate": 3.829174137008782e-06, "loss": 0.022594377398490906, "step": 29000 }, { "epoch": 0.27298823529411764, "grad_norm": 0.47065065770736886, "learning_rate": 3.8288440670891955e-06, "loss": 0.02367735207080841, "step": 29005 }, { "epoch": 0.27303529411764704, "grad_norm": 0.6175087668495862, "learning_rate": 3.828514082509712e-06, "loss": 0.025653433799743653, "step": 29010 }, { "epoch": 0.2730823529411765, "grad_norm": 0.6583857653482377, "learning_rate": 3.828184183233565e-06, "loss": 0.019578637182712556, "step": 29015 }, { "epoch": 0.2731294117647059, "grad_norm": 0.5584940172808512, "learning_rate": 3.827854369224007e-06, "loss": 0.02411498725414276, "step": 29020 }, { "epoch": 0.2731764705882353, "grad_norm": 0.6795332063408582, "learning_rate": 3.827524640444314e-06, "loss": 0.019293558597564698, "step": 29025 }, { "epoch": 0.2732235294117647, "grad_norm": 0.7120649188617626, "learning_rate": 3.827194996857786e-06, "loss": 0.02698875069618225, "step": 29030 }, { "epoch": 0.2732705882352941, "grad_norm": 0.5457151496107043, "learning_rate": 3.826865438427741e-06, "loss": 0.027174729108810424, "step": 29035 }, { "epoch": 0.27331764705882355, "grad_norm": 0.4531184236715631, "learning_rate": 3.826535965117523e-06, "loss": 0.023887574672698975, "step": 29040 }, { "epoch": 0.27336470588235295, "grad_norm": 1.9680288224897693, "learning_rate": 3.826206576890495e-06, "loss": 0.024634133279323577, "step": 29045 }, { "epoch": 0.27341176470588235, "grad_norm": 0.6508052189236895, "learning_rate": 3.825877273710044e-06, "loss": 0.02375403940677643, "step": 29050 }, { "epoch": 0.27345882352941175, "grad_norm": 0.48265768326693614, "learning_rate": 3.825548055539579e-06, "loss": 0.025733956694602968, "step": 29055 }, { "epoch": 0.27350588235294115, "grad_norm": 0.5218239667221537, "learning_rate": 3.82521892234253e-06, "loss": 0.0262295663356781, "step": 29060 }, { "epoch": 0.2735529411764706, "grad_norm": 0.5841545498020684, "learning_rate": 3.82488987408235e-06, "loss": 0.025300818681716918, "step": 29065 }, { "epoch": 0.2736, "grad_norm": 0.6680294382681664, "learning_rate": 3.824560910722514e-06, "loss": 0.028294366598129273, "step": 29070 }, { "epoch": 0.2736470588235294, "grad_norm": 0.5971573693977134, "learning_rate": 3.824232032226517e-06, "loss": 0.02223997116088867, "step": 29075 }, { "epoch": 0.2736941176470588, "grad_norm": 0.5569860467233413, "learning_rate": 3.823903238557879e-06, "loss": 0.025759357213974, "step": 29080 }, { "epoch": 0.2737411764705882, "grad_norm": 0.5093161096844201, "learning_rate": 3.823574529680141e-06, "loss": 0.022365352511405943, "step": 29085 }, { "epoch": 0.27378823529411767, "grad_norm": 0.6799082682889933, "learning_rate": 3.823245905556863e-06, "loss": 0.02469031363725662, "step": 29090 }, { "epoch": 0.27383529411764707, "grad_norm": 0.4922262303316995, "learning_rate": 3.822917366151632e-06, "loss": 0.0216191828250885, "step": 29095 }, { "epoch": 0.27388235294117647, "grad_norm": 0.6318070633263525, "learning_rate": 3.822588911428054e-06, "loss": 0.020080885291099547, "step": 29100 }, { "epoch": 0.27392941176470587, "grad_norm": 0.573015913053489, "learning_rate": 3.822260541349755e-06, "loss": 0.024099677801132202, "step": 29105 }, { "epoch": 0.2739764705882353, "grad_norm": 0.8166382378177538, "learning_rate": 3.821932255880387e-06, "loss": 0.026861780881881715, "step": 29110 }, { "epoch": 0.2740235294117647, "grad_norm": 0.7823050896048314, "learning_rate": 3.821604054983621e-06, "loss": 0.02476591169834137, "step": 29115 }, { "epoch": 0.2740705882352941, "grad_norm": 0.8553080890244881, "learning_rate": 3.821275938623152e-06, "loss": 0.026889950037002563, "step": 29120 }, { "epoch": 0.2741176470588235, "grad_norm": 0.6197587871884218, "learning_rate": 3.820947906762693e-06, "loss": 0.025443097949028014, "step": 29125 }, { "epoch": 0.2741647058823529, "grad_norm": 0.47892817028389223, "learning_rate": 3.820619959365985e-06, "loss": 0.02081246078014374, "step": 29130 }, { "epoch": 0.2742117647058824, "grad_norm": 0.60638903612556, "learning_rate": 3.820292096396783e-06, "loss": 0.024565437436103822, "step": 29135 }, { "epoch": 0.2742588235294118, "grad_norm": 0.721980433068355, "learning_rate": 3.81996431781887e-06, "loss": 0.019387823343276978, "step": 29140 }, { "epoch": 0.2743058823529412, "grad_norm": 0.5184465618046298, "learning_rate": 3.819636623596049e-06, "loss": 0.017892025411128998, "step": 29145 }, { "epoch": 0.2743529411764706, "grad_norm": 0.5297096234909353, "learning_rate": 3.819309013692144e-06, "loss": 0.023079830408096313, "step": 29150 }, { "epoch": 0.2744, "grad_norm": 0.5091800349802919, "learning_rate": 3.818981488071e-06, "loss": 0.02310725152492523, "step": 29155 }, { "epoch": 0.27444705882352943, "grad_norm": 0.711312513611528, "learning_rate": 3.818654046696486e-06, "loss": 0.025785017013549804, "step": 29160 }, { "epoch": 0.27449411764705883, "grad_norm": 0.5502601931110928, "learning_rate": 3.818326689532492e-06, "loss": 0.021185648441314698, "step": 29165 }, { "epoch": 0.27454117647058823, "grad_norm": 0.5281324670347934, "learning_rate": 3.817999416542928e-06, "loss": 0.020026277005672454, "step": 29170 }, { "epoch": 0.27458823529411763, "grad_norm": 0.6071693599396821, "learning_rate": 3.817672227691727e-06, "loss": 0.0214416965842247, "step": 29175 }, { "epoch": 0.27463529411764703, "grad_norm": 0.7305149697815766, "learning_rate": 3.817345122942843e-06, "loss": 0.02799173891544342, "step": 29180 }, { "epoch": 0.2746823529411765, "grad_norm": 0.6673951927879017, "learning_rate": 3.817018102260252e-06, "loss": 0.023176778852939606, "step": 29185 }, { "epoch": 0.2747294117647059, "grad_norm": 0.3759453892505649, "learning_rate": 3.816691165607953e-06, "loss": 0.018718451261520386, "step": 29190 }, { "epoch": 0.2747764705882353, "grad_norm": 0.7349909915244578, "learning_rate": 3.816364312949964e-06, "loss": 0.026772916316986084, "step": 29195 }, { "epoch": 0.2748235294117647, "grad_norm": 0.5276810895697707, "learning_rate": 3.816037544250325e-06, "loss": 0.029529300332069398, "step": 29200 }, { "epoch": 0.27487058823529414, "grad_norm": 0.619149805008202, "learning_rate": 3.8157108594731e-06, "loss": 0.02126108705997467, "step": 29205 }, { "epoch": 0.27491764705882354, "grad_norm": 0.7479537285698844, "learning_rate": 3.815384258582373e-06, "loss": 0.017795957624912262, "step": 29210 }, { "epoch": 0.27496470588235294, "grad_norm": 0.7279982201067258, "learning_rate": 3.815057741542248e-06, "loss": 0.020099130272865296, "step": 29215 }, { "epoch": 0.27501176470588234, "grad_norm": 0.7861253210102854, "learning_rate": 3.814731308316852e-06, "loss": 0.020352761447429656, "step": 29220 }, { "epoch": 0.27505882352941174, "grad_norm": 0.5852540573821837, "learning_rate": 3.814404958870333e-06, "loss": 0.022900114953517913, "step": 29225 }, { "epoch": 0.2751058823529412, "grad_norm": 0.7915969617353308, "learning_rate": 3.8140786931668627e-06, "loss": 0.028683021664619446, "step": 29230 }, { "epoch": 0.2751529411764706, "grad_norm": 0.42303276805760637, "learning_rate": 3.8137525111706297e-06, "loss": 0.019651374220848082, "step": 29235 }, { "epoch": 0.2752, "grad_norm": 0.5429528721396547, "learning_rate": 3.81342641284585e-06, "loss": 0.020595282316207886, "step": 29240 }, { "epoch": 0.2752470588235294, "grad_norm": 0.6476536118328067, "learning_rate": 3.8131003981567544e-06, "loss": 0.028194189071655273, "step": 29245 }, { "epoch": 0.2752941176470588, "grad_norm": 0.4516827366109577, "learning_rate": 3.8127744670676004e-06, "loss": 0.023029837012290954, "step": 29250 }, { "epoch": 0.27534117647058826, "grad_norm": 0.4825625301644325, "learning_rate": 3.812448619542664e-06, "loss": 0.022894182801246644, "step": 29255 }, { "epoch": 0.27538823529411766, "grad_norm": 0.5096264281026737, "learning_rate": 3.8121228555462447e-06, "loss": 0.020357432961463928, "step": 29260 }, { "epoch": 0.27543529411764706, "grad_norm": 0.7362625238841397, "learning_rate": 3.81179717504266e-06, "loss": 0.021048161387443542, "step": 29265 }, { "epoch": 0.27548235294117646, "grad_norm": 0.6296925685449608, "learning_rate": 3.811471577996253e-06, "loss": 0.02218602001667023, "step": 29270 }, { "epoch": 0.27552941176470586, "grad_norm": 0.7154550559426406, "learning_rate": 3.811146064371384e-06, "loss": 0.024508047103881835, "step": 29275 }, { "epoch": 0.2755764705882353, "grad_norm": 0.5807943067248366, "learning_rate": 3.8108206341324388e-06, "loss": 0.020876657962799073, "step": 29280 }, { "epoch": 0.2756235294117647, "grad_norm": 0.6020930660464874, "learning_rate": 3.81049528724382e-06, "loss": 0.019622853398323058, "step": 29285 }, { "epoch": 0.2756705882352941, "grad_norm": 0.4295260483094508, "learning_rate": 3.810170023669956e-06, "loss": 0.02473735511302948, "step": 29290 }, { "epoch": 0.2757176470588235, "grad_norm": 0.45615189100752, "learning_rate": 3.8098448433752916e-06, "loss": 0.019820889830589293, "step": 29295 }, { "epoch": 0.27576470588235297, "grad_norm": 0.5312328435229524, "learning_rate": 3.809519746324297e-06, "loss": 0.022597414255142213, "step": 29300 }, { "epoch": 0.27581176470588237, "grad_norm": 0.7146390820720735, "learning_rate": 3.8091947324814614e-06, "loss": 0.020710939168930055, "step": 29305 }, { "epoch": 0.27585882352941177, "grad_norm": 0.8943969496584823, "learning_rate": 3.8088698018112963e-06, "loss": 0.02729179859161377, "step": 29310 }, { "epoch": 0.27590588235294117, "grad_norm": 0.55905669888755, "learning_rate": 3.8085449542783336e-06, "loss": 0.020008869469165802, "step": 29315 }, { "epoch": 0.27595294117647057, "grad_norm": 0.5471105800206969, "learning_rate": 3.808220189847128e-06, "loss": 0.018151295185089112, "step": 29320 }, { "epoch": 0.276, "grad_norm": 0.6225907072715409, "learning_rate": 3.8078955084822512e-06, "loss": 0.021456378698349, "step": 29325 }, { "epoch": 0.2760470588235294, "grad_norm": 0.6723854697000962, "learning_rate": 3.8075709101483014e-06, "loss": 0.023600491881370544, "step": 29330 }, { "epoch": 0.2760941176470588, "grad_norm": 0.5433421306062062, "learning_rate": 3.807246394809894e-06, "loss": 0.029858040809631347, "step": 29335 }, { "epoch": 0.2761411764705882, "grad_norm": 0.5206844978535174, "learning_rate": 3.8069219624316685e-06, "loss": 0.02342866361141205, "step": 29340 }, { "epoch": 0.2761882352941176, "grad_norm": 0.6226919539186001, "learning_rate": 3.8065976129782823e-06, "loss": 0.02529640793800354, "step": 29345 }, { "epoch": 0.2762352941176471, "grad_norm": 0.6302661356603745, "learning_rate": 3.806273346414416e-06, "loss": 0.020967434346675872, "step": 29350 }, { "epoch": 0.2762823529411765, "grad_norm": 0.39592868997569486, "learning_rate": 3.8059491627047705e-06, "loss": 0.018611973524093627, "step": 29355 }, { "epoch": 0.2763294117647059, "grad_norm": 0.5983888214023401, "learning_rate": 3.8056250618140683e-06, "loss": 0.020794953405857085, "step": 29360 }, { "epoch": 0.2763764705882353, "grad_norm": 0.612718801863223, "learning_rate": 3.8053010437070526e-06, "loss": 0.020916800200939178, "step": 29365 }, { "epoch": 0.2764235294117647, "grad_norm": 0.5655292584344108, "learning_rate": 3.8049771083484867e-06, "loss": 0.01919042766094208, "step": 29370 }, { "epoch": 0.27647058823529413, "grad_norm": 0.5726938887992388, "learning_rate": 3.8046532557031578e-06, "loss": 0.02032809555530548, "step": 29375 }, { "epoch": 0.27651764705882353, "grad_norm": 0.6744143844589704, "learning_rate": 3.8043294857358703e-06, "loss": 0.020361900329589844, "step": 29380 }, { "epoch": 0.27656470588235293, "grad_norm": 0.6156367288011692, "learning_rate": 3.804005798411452e-06, "loss": 0.0196912944316864, "step": 29385 }, { "epoch": 0.27661176470588233, "grad_norm": 0.5818102666243656, "learning_rate": 3.803682193694751e-06, "loss": 0.02876814007759094, "step": 29390 }, { "epoch": 0.2766588235294118, "grad_norm": 0.5377493719188801, "learning_rate": 3.8033586715506358e-06, "loss": 0.02766188383102417, "step": 29395 }, { "epoch": 0.2767058823529412, "grad_norm": 0.5761915342894787, "learning_rate": 3.803035231943996e-06, "loss": 0.02268124371767044, "step": 29400 }, { "epoch": 0.2767529411764706, "grad_norm": 0.642133492717007, "learning_rate": 3.8027118748397448e-06, "loss": 0.03355751633644104, "step": 29405 }, { "epoch": 0.2768, "grad_norm": 0.4916412486733042, "learning_rate": 3.802388600202811e-06, "loss": 0.022937993705272674, "step": 29410 }, { "epoch": 0.2768470588235294, "grad_norm": 0.5852183795103021, "learning_rate": 3.8020654079981495e-06, "loss": 0.023390784859657288, "step": 29415 }, { "epoch": 0.27689411764705885, "grad_norm": 0.36556374242036227, "learning_rate": 3.801742298190732e-06, "loss": 0.019269467890262605, "step": 29420 }, { "epoch": 0.27694117647058825, "grad_norm": 0.4740539781711474, "learning_rate": 3.801419270745553e-06, "loss": 0.02134244590997696, "step": 29425 }, { "epoch": 0.27698823529411765, "grad_norm": 0.527505222409685, "learning_rate": 3.8010963256276286e-06, "loss": 0.025111407041549683, "step": 29430 }, { "epoch": 0.27703529411764705, "grad_norm": 0.6731415251984091, "learning_rate": 3.8007734628019934e-06, "loss": 0.019844743609428405, "step": 29435 }, { "epoch": 0.27708235294117645, "grad_norm": 0.7120997556811138, "learning_rate": 3.8004506822337043e-06, "loss": 0.02464756965637207, "step": 29440 }, { "epoch": 0.2771294117647059, "grad_norm": 0.6124312052794935, "learning_rate": 3.80012798388784e-06, "loss": 0.026170367002487184, "step": 29445 }, { "epoch": 0.2771764705882353, "grad_norm": 0.893218241406948, "learning_rate": 3.7998053677294967e-06, "loss": 0.02224581837654114, "step": 29450 }, { "epoch": 0.2772235294117647, "grad_norm": 0.689595814840125, "learning_rate": 3.7994828337237953e-06, "loss": 0.024744904041290282, "step": 29455 }, { "epoch": 0.2772705882352941, "grad_norm": 0.7082823186179834, "learning_rate": 3.7991603818358743e-06, "loss": 0.02409539818763733, "step": 29460 }, { "epoch": 0.2773176470588235, "grad_norm": 0.6823194363052828, "learning_rate": 3.7988380120308936e-06, "loss": 0.025763946771621703, "step": 29465 }, { "epoch": 0.27736470588235296, "grad_norm": 0.693390798966827, "learning_rate": 3.798515724274035e-06, "loss": 0.024344088137149812, "step": 29470 }, { "epoch": 0.27741176470588236, "grad_norm": 0.9840188489915632, "learning_rate": 3.7981935185305e-06, "loss": 0.028224945068359375, "step": 29475 }, { "epoch": 0.27745882352941176, "grad_norm": 0.530185765283511, "learning_rate": 3.7978713947655114e-06, "loss": 0.026115959882736205, "step": 29480 }, { "epoch": 0.27750588235294116, "grad_norm": 0.5194986354678188, "learning_rate": 3.7975493529443112e-06, "loss": 0.022333300113677977, "step": 29485 }, { "epoch": 0.2775529411764706, "grad_norm": 0.5871220191996059, "learning_rate": 3.797227393032164e-06, "loss": 0.018241292238235472, "step": 29490 }, { "epoch": 0.2776, "grad_norm": 0.6260248764998165, "learning_rate": 3.796905514994353e-06, "loss": 0.0240763783454895, "step": 29495 }, { "epoch": 0.2776470588235294, "grad_norm": 0.6597443267726194, "learning_rate": 3.796583718796185e-06, "loss": 0.02410881221294403, "step": 29500 }, { "epoch": 0.2776941176470588, "grad_norm": 0.5874074523092231, "learning_rate": 3.796262004402983e-06, "loss": 0.019930623471736908, "step": 29505 }, { "epoch": 0.2777411764705882, "grad_norm": 0.40881054009329915, "learning_rate": 3.795940371780095e-06, "loss": 0.017880776524543764, "step": 29510 }, { "epoch": 0.27778823529411767, "grad_norm": 1.0800019492726292, "learning_rate": 3.7956188208928856e-06, "loss": 0.019436706602573395, "step": 29515 }, { "epoch": 0.27783529411764707, "grad_norm": 0.43848809653773635, "learning_rate": 3.7952973517067433e-06, "loss": 0.021925446391105653, "step": 29520 }, { "epoch": 0.27788235294117647, "grad_norm": 0.5290998385353918, "learning_rate": 3.7949759641870754e-06, "loss": 0.02271001636981964, "step": 29525 }, { "epoch": 0.27792941176470587, "grad_norm": 0.890205125812914, "learning_rate": 3.79465465829931e-06, "loss": 0.029892164468765258, "step": 29530 }, { "epoch": 0.27797647058823527, "grad_norm": 0.7713686764616957, "learning_rate": 3.794333434008895e-06, "loss": 0.020865267515182494, "step": 29535 }, { "epoch": 0.2780235294117647, "grad_norm": 0.7004706293655256, "learning_rate": 3.7940122912813003e-06, "loss": 0.024504199624061584, "step": 29540 }, { "epoch": 0.2780705882352941, "grad_norm": 0.4797857747306019, "learning_rate": 3.7936912300820156e-06, "loss": 0.02248126417398453, "step": 29545 }, { "epoch": 0.2781176470588235, "grad_norm": 0.624231337210071, "learning_rate": 3.7933702503765496e-06, "loss": 0.020940506458282472, "step": 29550 }, { "epoch": 0.2781647058823529, "grad_norm": 0.6722558033909144, "learning_rate": 3.7930493521304336e-06, "loss": 0.02474350929260254, "step": 29555 }, { "epoch": 0.2782117647058824, "grad_norm": 0.7021188325734842, "learning_rate": 3.792728535309218e-06, "loss": 0.019652459025382995, "step": 29560 }, { "epoch": 0.2782588235294118, "grad_norm": 0.505651579794726, "learning_rate": 3.792407799878473e-06, "loss": 0.025113844871520997, "step": 29565 }, { "epoch": 0.2783058823529412, "grad_norm": 0.5150551955236198, "learning_rate": 3.7920871458037935e-06, "loss": 0.022254863381385805, "step": 29570 }, { "epoch": 0.2783529411764706, "grad_norm": 0.6717227147403447, "learning_rate": 3.7917665730507873e-06, "loss": 0.01903931498527527, "step": 29575 }, { "epoch": 0.2784, "grad_norm": 0.5480875109062263, "learning_rate": 3.791446081585089e-06, "loss": 0.02419782280921936, "step": 29580 }, { "epoch": 0.27844705882352944, "grad_norm": 0.7679340873840439, "learning_rate": 3.7911256713723502e-06, "loss": 0.021316924691200258, "step": 29585 }, { "epoch": 0.27849411764705884, "grad_norm": 0.5713205972741445, "learning_rate": 3.790805342378245e-06, "loss": 0.020802223682403566, "step": 29590 }, { "epoch": 0.27854117647058824, "grad_norm": 0.4469882466780764, "learning_rate": 3.7904850945684657e-06, "loss": 0.019519507884979248, "step": 29595 }, { "epoch": 0.27858823529411764, "grad_norm": 0.8999876667635643, "learning_rate": 3.790164927908725e-06, "loss": 0.026263201236724855, "step": 29600 }, { "epoch": 0.27863529411764704, "grad_norm": 0.6786392555174581, "learning_rate": 3.789844842364758e-06, "loss": 0.024979296326637267, "step": 29605 }, { "epoch": 0.2786823529411765, "grad_norm": 0.4976828652526048, "learning_rate": 3.7895248379023185e-06, "loss": 0.024027062952518462, "step": 29610 }, { "epoch": 0.2787294117647059, "grad_norm": 0.5689342901619986, "learning_rate": 3.7892049144871802e-06, "loss": 0.025247108936309815, "step": 29615 }, { "epoch": 0.2787764705882353, "grad_norm": 0.8734848727193081, "learning_rate": 3.7888850720851377e-06, "loss": 0.02838309109210968, "step": 29620 }, { "epoch": 0.2788235294117647, "grad_norm": 0.6400781012515021, "learning_rate": 3.7885653106620056e-06, "loss": 0.02214789092540741, "step": 29625 }, { "epoch": 0.2788705882352941, "grad_norm": 0.8160253798698783, "learning_rate": 3.7882456301836195e-06, "loss": 0.023054897785186768, "step": 29630 }, { "epoch": 0.27891764705882355, "grad_norm": 0.5604622863954467, "learning_rate": 3.787926030615833e-06, "loss": 0.023309773206710814, "step": 29635 }, { "epoch": 0.27896470588235295, "grad_norm": 1.9596407645310174, "learning_rate": 3.7876065119245224e-06, "loss": 0.027319622039794923, "step": 29640 }, { "epoch": 0.27901176470588235, "grad_norm": 0.6917900028333195, "learning_rate": 3.7872870740755833e-06, "loss": 0.023716360330581665, "step": 29645 }, { "epoch": 0.27905882352941175, "grad_norm": 0.5068307229161209, "learning_rate": 3.78696771703493e-06, "loss": 0.017866452038288117, "step": 29650 }, { "epoch": 0.2791058823529412, "grad_norm": 0.5439883596436329, "learning_rate": 3.7866484407684993e-06, "loss": 0.021404018998146056, "step": 29655 }, { "epoch": 0.2791529411764706, "grad_norm": 0.44563673954709676, "learning_rate": 3.786329245242246e-06, "loss": 0.01907108575105667, "step": 29660 }, { "epoch": 0.2792, "grad_norm": 0.6260735537700366, "learning_rate": 3.7860101304221463e-06, "loss": 0.025827810168266296, "step": 29665 }, { "epoch": 0.2792470588235294, "grad_norm": 0.7485024089506019, "learning_rate": 3.7856910962741955e-06, "loss": 0.022737988829612733, "step": 29670 }, { "epoch": 0.2792941176470588, "grad_norm": 0.6229021834297785, "learning_rate": 3.785372142764411e-06, "loss": 0.025263282656669616, "step": 29675 }, { "epoch": 0.27934117647058826, "grad_norm": 0.41780566043971723, "learning_rate": 3.785053269858827e-06, "loss": 0.01878390908241272, "step": 29680 }, { "epoch": 0.27938823529411766, "grad_norm": 0.638712388670458, "learning_rate": 3.784734477523499e-06, "loss": 0.02203998863697052, "step": 29685 }, { "epoch": 0.27943529411764706, "grad_norm": 0.5978624542840296, "learning_rate": 3.784415765724505e-06, "loss": 0.01762903034687042, "step": 29690 }, { "epoch": 0.27948235294117646, "grad_norm": 0.5029485616975736, "learning_rate": 3.7840971344279404e-06, "loss": 0.017721818387508394, "step": 29695 }, { "epoch": 0.27952941176470586, "grad_norm": 0.9473042024155853, "learning_rate": 3.783778583599921e-06, "loss": 0.02773693799972534, "step": 29700 }, { "epoch": 0.2795764705882353, "grad_norm": 0.6397894629132386, "learning_rate": 3.783460113206582e-06, "loss": 0.024665603041648866, "step": 29705 }, { "epoch": 0.2796235294117647, "grad_norm": 0.606633112315991, "learning_rate": 3.783141723214079e-06, "loss": 0.023617511987686156, "step": 29710 }, { "epoch": 0.2796705882352941, "grad_norm": 0.8546864517997057, "learning_rate": 3.7828234135885894e-06, "loss": 0.0323618233203888, "step": 29715 }, { "epoch": 0.2797176470588235, "grad_norm": 0.3928148413061122, "learning_rate": 3.7825051842963077e-06, "loss": 0.02309444546699524, "step": 29720 }, { "epoch": 0.2797647058823529, "grad_norm": 0.6997343715507707, "learning_rate": 3.7821870353034496e-06, "loss": 0.023611807823181154, "step": 29725 }, { "epoch": 0.27981176470588237, "grad_norm": 0.43826675142088767, "learning_rate": 3.781868966576251e-06, "loss": 0.017272329330444335, "step": 29730 }, { "epoch": 0.27985882352941177, "grad_norm": 0.4550278644023631, "learning_rate": 3.7815509780809672e-06, "loss": 0.01990521848201752, "step": 29735 }, { "epoch": 0.27990588235294117, "grad_norm": 0.6580226047893734, "learning_rate": 3.781233069783873e-06, "loss": 0.025341010093688963, "step": 29740 }, { "epoch": 0.27995294117647057, "grad_norm": 0.6859176633706779, "learning_rate": 3.780915241651264e-06, "loss": 0.025469967722892763, "step": 29745 }, { "epoch": 0.28, "grad_norm": 0.6274460816214632, "learning_rate": 3.7805974936494544e-06, "loss": 0.023624494671821594, "step": 29750 }, { "epoch": 0.2800470588235294, "grad_norm": 1.4691269019855362, "learning_rate": 3.78027982574478e-06, "loss": 0.021825477480888367, "step": 29755 }, { "epoch": 0.2800941176470588, "grad_norm": 0.6403121771709978, "learning_rate": 3.7799622379035944e-06, "loss": 0.024220719933509827, "step": 29760 }, { "epoch": 0.2801411764705882, "grad_norm": 0.41645758762438445, "learning_rate": 3.7796447300922724e-06, "loss": 0.02300350069999695, "step": 29765 }, { "epoch": 0.2801882352941176, "grad_norm": 0.5258989468575875, "learning_rate": 3.779327302277208e-06, "loss": 0.029874420166015624, "step": 29770 }, { "epoch": 0.2802352941176471, "grad_norm": 0.719516798251321, "learning_rate": 3.779009954424814e-06, "loss": 0.02408299595117569, "step": 29775 }, { "epoch": 0.2802823529411765, "grad_norm": 0.5146687263780343, "learning_rate": 3.7786926865015255e-06, "loss": 0.021453306078910828, "step": 29780 }, { "epoch": 0.2803294117647059, "grad_norm": 0.7542467592425357, "learning_rate": 3.7783754984737955e-06, "loss": 0.028036993741989136, "step": 29785 }, { "epoch": 0.2803764705882353, "grad_norm": 0.5916087784754102, "learning_rate": 3.7780583903080954e-06, "loss": 0.02239566296339035, "step": 29790 }, { "epoch": 0.2804235294117647, "grad_norm": 0.7062618522522498, "learning_rate": 3.777741361970919e-06, "loss": 0.020875759422779083, "step": 29795 }, { "epoch": 0.28047058823529414, "grad_norm": 0.5248633877426407, "learning_rate": 3.7774244134287795e-06, "loss": 0.019709041714668273, "step": 29800 }, { "epoch": 0.28051764705882354, "grad_norm": 0.4257456244798528, "learning_rate": 3.777107544648208e-06, "loss": 0.01975470781326294, "step": 29805 }, { "epoch": 0.28056470588235294, "grad_norm": 0.6264239478203012, "learning_rate": 3.7767907555957556e-06, "loss": 0.02407606840133667, "step": 29810 }, { "epoch": 0.28061176470588234, "grad_norm": 0.3909748247018848, "learning_rate": 3.776474046237995e-06, "loss": 0.021782073378562927, "step": 29815 }, { "epoch": 0.28065882352941174, "grad_norm": 0.6799972110346737, "learning_rate": 3.776157416541516e-06, "loss": 0.021366968750953674, "step": 29820 }, { "epoch": 0.2807058823529412, "grad_norm": 0.44339516238897214, "learning_rate": 3.7758408664729295e-06, "loss": 0.020591823756694792, "step": 29825 }, { "epoch": 0.2807529411764706, "grad_norm": 0.49467409675934954, "learning_rate": 3.775524395998865e-06, "loss": 0.018224442005157472, "step": 29830 }, { "epoch": 0.2808, "grad_norm": 0.4421893955768803, "learning_rate": 3.7752080050859734e-06, "loss": 0.01873212605714798, "step": 29835 }, { "epoch": 0.2808470588235294, "grad_norm": 0.5564487461064945, "learning_rate": 3.774891693700924e-06, "loss": 0.01745118498802185, "step": 29840 }, { "epoch": 0.28089411764705885, "grad_norm": 0.6478322584511883, "learning_rate": 3.7745754618104035e-06, "loss": 0.018374544382095338, "step": 29845 }, { "epoch": 0.28094117647058825, "grad_norm": 0.5847830409896283, "learning_rate": 3.774259309381123e-06, "loss": 0.021432924270629882, "step": 29850 }, { "epoch": 0.28098823529411765, "grad_norm": 0.739762583198736, "learning_rate": 3.773943236379808e-06, "loss": 0.0287409633398056, "step": 29855 }, { "epoch": 0.28103529411764705, "grad_norm": 0.5896271851869782, "learning_rate": 3.773627242773207e-06, "loss": 0.02037339210510254, "step": 29860 }, { "epoch": 0.28108235294117645, "grad_norm": 0.6686517684371408, "learning_rate": 3.7733113285280865e-06, "loss": 0.01864689588546753, "step": 29865 }, { "epoch": 0.2811294117647059, "grad_norm": 0.6066219405936907, "learning_rate": 3.7729954936112336e-06, "loss": 0.026311063766479494, "step": 29870 }, { "epoch": 0.2811764705882353, "grad_norm": 0.5443329420325754, "learning_rate": 3.772679737989453e-06, "loss": 0.019461026787757872, "step": 29875 }, { "epoch": 0.2812235294117647, "grad_norm": 0.30810215993883516, "learning_rate": 3.7723640616295704e-06, "loss": 0.02276226580142975, "step": 29880 }, { "epoch": 0.2812705882352941, "grad_norm": 0.6085565916550394, "learning_rate": 3.7720484644984303e-06, "loss": 0.026346567273139953, "step": 29885 }, { "epoch": 0.2813176470588235, "grad_norm": 0.597988036464427, "learning_rate": 3.7717329465628967e-06, "loss": 0.025501823425292967, "step": 29890 }, { "epoch": 0.28136470588235296, "grad_norm": 0.5204273889357037, "learning_rate": 3.771417507789853e-06, "loss": 0.021835672855377197, "step": 29895 }, { "epoch": 0.28141176470588236, "grad_norm": 0.5891831581427929, "learning_rate": 3.771102148146203e-06, "loss": 0.02334701418876648, "step": 29900 }, { "epoch": 0.28145882352941176, "grad_norm": 0.6159000126279447, "learning_rate": 3.770786867598867e-06, "loss": 0.020042218267917633, "step": 29905 }, { "epoch": 0.28150588235294116, "grad_norm": 0.8324126650187006, "learning_rate": 3.770471666114788e-06, "loss": 0.02491724044084549, "step": 29910 }, { "epoch": 0.28155294117647056, "grad_norm": 0.8412374149764809, "learning_rate": 3.7701565436609266e-06, "loss": 0.024528980255126953, "step": 29915 }, { "epoch": 0.2816, "grad_norm": 0.5891434405734017, "learning_rate": 3.7698415002042625e-06, "loss": 0.01853509396314621, "step": 29920 }, { "epoch": 0.2816470588235294, "grad_norm": 0.507777527176367, "learning_rate": 3.7695265357117955e-06, "loss": 0.02016048729419708, "step": 29925 }, { "epoch": 0.2816941176470588, "grad_norm": 0.4106272466368397, "learning_rate": 3.7692116501505457e-06, "loss": 0.02042301595211029, "step": 29930 }, { "epoch": 0.2817411764705882, "grad_norm": 0.9876718451984955, "learning_rate": 3.7688968434875485e-06, "loss": 0.028366059064865112, "step": 29935 }, { "epoch": 0.2817882352941177, "grad_norm": 0.6220028911630321, "learning_rate": 3.7685821156898633e-06, "loss": 0.02420234978199005, "step": 29940 }, { "epoch": 0.2818352941176471, "grad_norm": 0.799080835472038, "learning_rate": 3.7682674667245667e-06, "loss": 0.021275727450847624, "step": 29945 }, { "epoch": 0.2818823529411765, "grad_norm": 0.6202890613311844, "learning_rate": 3.7679528965587537e-06, "loss": 0.02022414207458496, "step": 29950 }, { "epoch": 0.2819294117647059, "grad_norm": 0.6376720255448731, "learning_rate": 3.76763840515954e-06, "loss": 0.020200754702091216, "step": 29955 }, { "epoch": 0.2819764705882353, "grad_norm": 0.5192747698700595, "learning_rate": 3.767323992494059e-06, "loss": 0.020807398855686186, "step": 29960 }, { "epoch": 0.28202352941176473, "grad_norm": 0.7079527326107646, "learning_rate": 3.7670096585294657e-06, "loss": 0.021919164061546325, "step": 29965 }, { "epoch": 0.28207058823529413, "grad_norm": 0.6918297700276136, "learning_rate": 3.7666954032329318e-06, "loss": 0.020635750889778138, "step": 29970 }, { "epoch": 0.28211764705882353, "grad_norm": 0.5490870770626807, "learning_rate": 3.766381226571649e-06, "loss": 0.02176416218280792, "step": 29975 }, { "epoch": 0.28216470588235293, "grad_norm": 0.47784792079655475, "learning_rate": 3.7660671285128285e-06, "loss": 0.019165633618831633, "step": 29980 }, { "epoch": 0.28221176470588233, "grad_norm": 0.8428515942013199, "learning_rate": 3.7657531090237005e-06, "loss": 0.019119422137737273, "step": 29985 }, { "epoch": 0.2822588235294118, "grad_norm": 0.615802615042395, "learning_rate": 3.7654391680715148e-06, "loss": 0.018469932675361633, "step": 29990 }, { "epoch": 0.2823058823529412, "grad_norm": 0.394230456744058, "learning_rate": 3.765125305623538e-06, "loss": 0.018269012868404388, "step": 29995 }, { "epoch": 0.2823529411764706, "grad_norm": 0.6182340931179988, "learning_rate": 3.7648115216470604e-06, "loss": 0.02288973480463028, "step": 30000 }, { "epoch": 0.2823529411764706, "eval_loss": 0.022887835279107094, "eval_runtime": 613.3915, "eval_samples_per_second": 110.859, "eval_steps_per_second": 6.929, "step": 30000 }, { "epoch": 0.2824, "grad_norm": 0.4352126265183901, "learning_rate": 3.7644978161093865e-06, "loss": 0.023446245491504668, "step": 30005 }, { "epoch": 0.2824470588235294, "grad_norm": 0.5782855336322501, "learning_rate": 3.7641841889778412e-06, "loss": 0.02387852966785431, "step": 30010 }, { "epoch": 0.28249411764705884, "grad_norm": 0.5407136712538537, "learning_rate": 3.7638706402197715e-06, "loss": 0.023486655950546265, "step": 30015 }, { "epoch": 0.28254117647058824, "grad_norm": 0.6038658587907357, "learning_rate": 3.763557169802539e-06, "loss": 0.024473144114017485, "step": 30020 }, { "epoch": 0.28258823529411764, "grad_norm": 3.875137794711052, "learning_rate": 3.7632437776935282e-06, "loss": 0.022043685615062713, "step": 30025 }, { "epoch": 0.28263529411764704, "grad_norm": 0.6727427250390535, "learning_rate": 3.7629304638601396e-06, "loss": 0.02272123098373413, "step": 30030 }, { "epoch": 0.2826823529411765, "grad_norm": 0.5742336421276398, "learning_rate": 3.7626172282697936e-06, "loss": 0.0207367479801178, "step": 30035 }, { "epoch": 0.2827294117647059, "grad_norm": 0.48927244924182495, "learning_rate": 3.7623040708899315e-06, "loss": 0.0249939888715744, "step": 30040 }, { "epoch": 0.2827764705882353, "grad_norm": 0.5491057176726173, "learning_rate": 3.7619909916880103e-06, "loss": 0.026722192764282227, "step": 30045 }, { "epoch": 0.2828235294117647, "grad_norm": 0.5564288889697223, "learning_rate": 3.761677990631509e-06, "loss": 0.021966195106506346, "step": 30050 }, { "epoch": 0.2828705882352941, "grad_norm": 0.5464785174400096, "learning_rate": 3.7613650676879226e-06, "loss": 0.017971417307853697, "step": 30055 }, { "epoch": 0.28291764705882355, "grad_norm": 0.8196856358685648, "learning_rate": 3.7610522228247687e-06, "loss": 0.02490009069442749, "step": 30060 }, { "epoch": 0.28296470588235295, "grad_norm": 0.6021304475092459, "learning_rate": 3.7607394560095793e-06, "loss": 0.022247296571731568, "step": 30065 }, { "epoch": 0.28301176470588235, "grad_norm": 0.5355766667932281, "learning_rate": 3.76042676720991e-06, "loss": 0.02473783791065216, "step": 30070 }, { "epoch": 0.28305882352941175, "grad_norm": 0.4890972070032543, "learning_rate": 3.7601141563933315e-06, "loss": 0.021315911412239076, "step": 30075 }, { "epoch": 0.28310588235294115, "grad_norm": 0.6925755910261507, "learning_rate": 3.7598016235274348e-06, "loss": 0.024466773867607115, "step": 30080 }, { "epoch": 0.2831529411764706, "grad_norm": 0.9238661533986072, "learning_rate": 3.7594891685798303e-06, "loss": 0.025346994400024414, "step": 30085 }, { "epoch": 0.2832, "grad_norm": 0.5196241903915555, "learning_rate": 3.7591767915181467e-06, "loss": 0.020288768410682678, "step": 30090 }, { "epoch": 0.2832470588235294, "grad_norm": 0.5911823423375153, "learning_rate": 3.758864492310031e-06, "loss": 0.02585168182849884, "step": 30095 }, { "epoch": 0.2832941176470588, "grad_norm": 0.5198831255237675, "learning_rate": 3.758552270923151e-06, "loss": 0.02160395085811615, "step": 30100 }, { "epoch": 0.28334117647058826, "grad_norm": 0.43148356751598466, "learning_rate": 3.7582401273251893e-06, "loss": 0.018558043241500854, "step": 30105 }, { "epoch": 0.28338823529411766, "grad_norm": 0.7265252582367789, "learning_rate": 3.7579280614838524e-06, "loss": 0.02507941424846649, "step": 30110 }, { "epoch": 0.28343529411764706, "grad_norm": 0.7854646253904741, "learning_rate": 3.757616073366863e-06, "loss": 0.03121136426925659, "step": 30115 }, { "epoch": 0.28348235294117646, "grad_norm": 0.5798974355306759, "learning_rate": 3.7573041629419604e-06, "loss": 0.019559212028980255, "step": 30120 }, { "epoch": 0.28352941176470586, "grad_norm": 0.5974612356275294, "learning_rate": 3.7569923301769068e-06, "loss": 0.02476479709148407, "step": 30125 }, { "epoch": 0.2835764705882353, "grad_norm": 0.4819675570545192, "learning_rate": 3.7566805750394797e-06, "loss": 0.025363481044769286, "step": 30130 }, { "epoch": 0.2836235294117647, "grad_norm": 1.52471680827003, "learning_rate": 3.7563688974974776e-06, "loss": 0.02414490282535553, "step": 30135 }, { "epoch": 0.2836705882352941, "grad_norm": 0.7111950914975113, "learning_rate": 3.7560572975187165e-06, "loss": 0.02168884575366974, "step": 30140 }, { "epoch": 0.2837176470588235, "grad_norm": 0.6466234535873173, "learning_rate": 3.7557457750710326e-06, "loss": 0.019731082022190094, "step": 30145 }, { "epoch": 0.2837647058823529, "grad_norm": 0.8555043146908573, "learning_rate": 3.755434330122277e-06, "loss": 0.023548609018325804, "step": 30150 }, { "epoch": 0.2838117647058824, "grad_norm": 0.6798166865291929, "learning_rate": 3.755122962640325e-06, "loss": 0.027599406242370606, "step": 30155 }, { "epoch": 0.2838588235294118, "grad_norm": 0.5941405750364734, "learning_rate": 3.7548116725930654e-06, "loss": 0.023591570556163788, "step": 30160 }, { "epoch": 0.2839058823529412, "grad_norm": 0.5812144142474746, "learning_rate": 3.7545004599484095e-06, "loss": 0.023433613777160644, "step": 30165 }, { "epoch": 0.2839529411764706, "grad_norm": 0.4729526599632944, "learning_rate": 3.754189324674285e-06, "loss": 0.022482556104660035, "step": 30170 }, { "epoch": 0.284, "grad_norm": 0.7756654772592667, "learning_rate": 3.753878266738638e-06, "loss": 0.022611004114151, "step": 30175 }, { "epoch": 0.28404705882352943, "grad_norm": 0.4405324408142577, "learning_rate": 3.753567286109434e-06, "loss": 0.018552938103675844, "step": 30180 }, { "epoch": 0.28409411764705883, "grad_norm": 0.749008448000889, "learning_rate": 3.7532563827546577e-06, "loss": 0.023828893899917603, "step": 30185 }, { "epoch": 0.28414117647058823, "grad_norm": 0.3948021206690796, "learning_rate": 3.7529455566423125e-06, "loss": 0.017108017206192018, "step": 30190 }, { "epoch": 0.28418823529411763, "grad_norm": 0.7874120131488953, "learning_rate": 3.752634807740417e-06, "loss": 0.03064029514789581, "step": 30195 }, { "epoch": 0.2842352941176471, "grad_norm": 0.6401408807319543, "learning_rate": 3.752324136017013e-06, "loss": 0.02106268107891083, "step": 30200 }, { "epoch": 0.2842823529411765, "grad_norm": 0.5185488956729746, "learning_rate": 3.7520135414401583e-06, "loss": 0.02194199860095978, "step": 30205 }, { "epoch": 0.2843294117647059, "grad_norm": 0.6074248101784793, "learning_rate": 3.7517030239779284e-06, "loss": 0.02249226123094559, "step": 30210 }, { "epoch": 0.2843764705882353, "grad_norm": 0.5468881757008168, "learning_rate": 3.7513925835984195e-06, "loss": 0.023336023092269897, "step": 30215 }, { "epoch": 0.2844235294117647, "grad_norm": 0.6452411057934734, "learning_rate": 3.751082220269745e-06, "loss": 0.02931889295578003, "step": 30220 }, { "epoch": 0.28447058823529414, "grad_norm": 0.5944106805423347, "learning_rate": 3.750771933960037e-06, "loss": 0.01837591826915741, "step": 30225 }, { "epoch": 0.28451764705882354, "grad_norm": 0.6932988389595457, "learning_rate": 3.750461724637447e-06, "loss": 0.0257442444562912, "step": 30230 }, { "epoch": 0.28456470588235294, "grad_norm": 0.8116962848579775, "learning_rate": 3.750151592270143e-06, "loss": 0.019020773470401764, "step": 30235 }, { "epoch": 0.28461176470588234, "grad_norm": 0.6945539200900935, "learning_rate": 3.749841536826312e-06, "loss": 0.02247413694858551, "step": 30240 }, { "epoch": 0.28465882352941174, "grad_norm": 0.5075370975042732, "learning_rate": 3.74953155827416e-06, "loss": 0.02260165512561798, "step": 30245 }, { "epoch": 0.2847058823529412, "grad_norm": 0.5628139193970998, "learning_rate": 3.7492216565819123e-06, "loss": 0.01953389048576355, "step": 30250 }, { "epoch": 0.2847529411764706, "grad_norm": 0.4782670455484811, "learning_rate": 3.74891183171781e-06, "loss": 0.020982752740383147, "step": 30255 }, { "epoch": 0.2848, "grad_norm": 0.519207165790102, "learning_rate": 3.7486020836501156e-06, "loss": 0.021249298751354218, "step": 30260 }, { "epoch": 0.2848470588235294, "grad_norm": 0.5698914746216387, "learning_rate": 3.7482924123471075e-06, "loss": 0.02365705370903015, "step": 30265 }, { "epoch": 0.2848941176470588, "grad_norm": 0.6600228612577581, "learning_rate": 3.7479828177770835e-06, "loss": 0.022816890478134157, "step": 30270 }, { "epoch": 0.28494117647058825, "grad_norm": 0.4184505828439888, "learning_rate": 3.7476732999083593e-06, "loss": 0.02319225072860718, "step": 30275 }, { "epoch": 0.28498823529411765, "grad_norm": 0.3348444937064233, "learning_rate": 3.7473638587092693e-06, "loss": 0.01940847486257553, "step": 30280 }, { "epoch": 0.28503529411764705, "grad_norm": 0.6688041083381506, "learning_rate": 3.747054494148167e-06, "loss": 0.019659391045570372, "step": 30285 }, { "epoch": 0.28508235294117645, "grad_norm": 0.5448165859084692, "learning_rate": 3.7467452061934216e-06, "loss": 0.026876264810562135, "step": 30290 }, { "epoch": 0.2851294117647059, "grad_norm": 0.6104808113595582, "learning_rate": 3.7464359948134233e-06, "loss": 0.02069253921508789, "step": 30295 }, { "epoch": 0.2851764705882353, "grad_norm": 0.5112585071014728, "learning_rate": 3.74612685997658e-06, "loss": 0.02037215530872345, "step": 30300 }, { "epoch": 0.2852235294117647, "grad_norm": 0.47260643821494064, "learning_rate": 3.7458178016513165e-06, "loss": 0.020752373337745666, "step": 30305 }, { "epoch": 0.2852705882352941, "grad_norm": 0.47820143862701403, "learning_rate": 3.7455088198060767e-06, "loss": 0.0244311660528183, "step": 30310 }, { "epoch": 0.2853176470588235, "grad_norm": 0.7397184500818834, "learning_rate": 3.7451999144093232e-06, "loss": 0.024721767008304595, "step": 30315 }, { "epoch": 0.28536470588235296, "grad_norm": 0.5427955604545905, "learning_rate": 3.744891085429536e-06, "loss": 0.025823450088500975, "step": 30320 }, { "epoch": 0.28541176470588236, "grad_norm": 0.4881183236362514, "learning_rate": 3.7445823328352143e-06, "loss": 0.017762506008148195, "step": 30325 }, { "epoch": 0.28545882352941176, "grad_norm": 0.7154599288285298, "learning_rate": 3.7442736565948734e-06, "loss": 0.02406720072031021, "step": 30330 }, { "epoch": 0.28550588235294116, "grad_norm": 0.6088063727786577, "learning_rate": 3.743965056677049e-06, "loss": 0.022415950894355774, "step": 30335 }, { "epoch": 0.28555294117647056, "grad_norm": 0.7617272490348941, "learning_rate": 3.7436565330502943e-06, "loss": 0.023685947060585022, "step": 30340 }, { "epoch": 0.2856, "grad_norm": 0.7534028928364105, "learning_rate": 3.74334808568318e-06, "loss": 0.02419494390487671, "step": 30345 }, { "epoch": 0.2856470588235294, "grad_norm": 0.670939366384765, "learning_rate": 3.743039714544295e-06, "loss": 0.024122254550457002, "step": 30350 }, { "epoch": 0.2856941176470588, "grad_norm": 0.4084490260188238, "learning_rate": 3.742731419602248e-06, "loss": 0.023163726925849913, "step": 30355 }, { "epoch": 0.2857411764705882, "grad_norm": 0.5892580997337327, "learning_rate": 3.7424232008256634e-06, "loss": 0.020115964114665985, "step": 30360 }, { "epoch": 0.2857882352941176, "grad_norm": 0.3494942200476906, "learning_rate": 3.742115058183184e-06, "loss": 0.02154475450515747, "step": 30365 }, { "epoch": 0.2858352941176471, "grad_norm": 0.5804758815407989, "learning_rate": 3.741806991643473e-06, "loss": 0.019592873752117157, "step": 30370 }, { "epoch": 0.2858823529411765, "grad_norm": 0.6185388736871924, "learning_rate": 3.7414990011752094e-06, "loss": 0.022509706020355225, "step": 30375 }, { "epoch": 0.2859294117647059, "grad_norm": 0.5641098541020043, "learning_rate": 3.7411910867470906e-06, "loss": 0.022817760705947876, "step": 30380 }, { "epoch": 0.2859764705882353, "grad_norm": 0.6274979335522974, "learning_rate": 3.7408832483278334e-06, "loss": 0.026169225573539734, "step": 30385 }, { "epoch": 0.28602352941176473, "grad_norm": 0.9018736905594872, "learning_rate": 3.74057548588617e-06, "loss": 0.026769840717315675, "step": 30390 }, { "epoch": 0.28607058823529413, "grad_norm": 0.7025780630300147, "learning_rate": 3.740267799390854e-06, "loss": 0.02648269832134247, "step": 30395 }, { "epoch": 0.28611764705882353, "grad_norm": 0.5753031094697884, "learning_rate": 3.739960188810653e-06, "loss": 0.018892386555671693, "step": 30400 }, { "epoch": 0.28616470588235293, "grad_norm": 0.6441605674400032, "learning_rate": 3.739652654114357e-06, "loss": 0.022929567098617553, "step": 30405 }, { "epoch": 0.28621176470588233, "grad_norm": 0.5620836930511395, "learning_rate": 3.73934519527077e-06, "loss": 0.01990344822406769, "step": 30410 }, { "epoch": 0.2862588235294118, "grad_norm": 0.8078840100081538, "learning_rate": 3.739037812248715e-06, "loss": 0.025686466693878175, "step": 30415 }, { "epoch": 0.2863058823529412, "grad_norm": 0.5536342557597558, "learning_rate": 3.7387305050170363e-06, "loss": 0.023348423838615417, "step": 30420 }, { "epoch": 0.2863529411764706, "grad_norm": 0.582798996133706, "learning_rate": 3.738423273544592e-06, "loss": 0.023491214215755462, "step": 30425 }, { "epoch": 0.2864, "grad_norm": 0.41053211314419763, "learning_rate": 3.7381161178002583e-06, "loss": 0.02297411561012268, "step": 30430 }, { "epoch": 0.2864470588235294, "grad_norm": 0.41940652544677687, "learning_rate": 3.7378090377529323e-06, "loss": 0.023491448163986205, "step": 30435 }, { "epoch": 0.28649411764705884, "grad_norm": 0.6346968589934784, "learning_rate": 3.7375020333715263e-06, "loss": 0.028155863285064697, "step": 30440 }, { "epoch": 0.28654117647058824, "grad_norm": 0.32989311538459837, "learning_rate": 3.7371951046249714e-06, "loss": 0.021438878774642945, "step": 30445 }, { "epoch": 0.28658823529411764, "grad_norm": 0.5346027681696186, "learning_rate": 3.7368882514822167e-06, "loss": 0.016692003607749938, "step": 30450 }, { "epoch": 0.28663529411764704, "grad_norm": 0.5432800982432805, "learning_rate": 3.7365814739122286e-06, "loss": 0.018817493319511415, "step": 30455 }, { "epoch": 0.28668235294117644, "grad_norm": 0.6636009114601583, "learning_rate": 3.7362747718839926e-06, "loss": 0.022700557112693788, "step": 30460 }, { "epoch": 0.2867294117647059, "grad_norm": 0.5696363812631703, "learning_rate": 3.7359681453665097e-06, "loss": 0.02348073720932007, "step": 30465 }, { "epoch": 0.2867764705882353, "grad_norm": 0.5217757168662843, "learning_rate": 3.7356615943288016e-06, "loss": 0.022638823091983794, "step": 30470 }, { "epoch": 0.2868235294117647, "grad_norm": 0.511768134238066, "learning_rate": 3.735355118739905e-06, "loss": 0.019307947158813475, "step": 30475 }, { "epoch": 0.2868705882352941, "grad_norm": 0.700085899133926, "learning_rate": 3.7350487185688772e-06, "loss": 0.022788003087043762, "step": 30480 }, { "epoch": 0.28691764705882356, "grad_norm": 0.663698054888364, "learning_rate": 3.7347423937847904e-06, "loss": 0.025170499086380006, "step": 30485 }, { "epoch": 0.28696470588235296, "grad_norm": 0.4504722309720082, "learning_rate": 3.7344361443567356e-06, "loss": 0.02269287556409836, "step": 30490 }, { "epoch": 0.28701176470588236, "grad_norm": 0.5678046518458498, "learning_rate": 3.7341299702538236e-06, "loss": 0.024211686849594117, "step": 30495 }, { "epoch": 0.28705882352941176, "grad_norm": 0.8122275540906406, "learning_rate": 3.73382387144518e-06, "loss": 0.02096415460109711, "step": 30500 }, { "epoch": 0.28710588235294116, "grad_norm": 1.6313907985558045, "learning_rate": 3.733517847899949e-06, "loss": 0.02195557951927185, "step": 30505 }, { "epoch": 0.2871529411764706, "grad_norm": 0.6939652459128021, "learning_rate": 3.733211899587293e-06, "loss": 0.025617879629135133, "step": 30510 }, { "epoch": 0.2872, "grad_norm": 0.6166177041808549, "learning_rate": 3.732906026476393e-06, "loss": 0.022996731102466583, "step": 30515 }, { "epoch": 0.2872470588235294, "grad_norm": 0.6740069249910126, "learning_rate": 3.7326002285364442e-06, "loss": 0.018781065940856934, "step": 30520 }, { "epoch": 0.2872941176470588, "grad_norm": 0.6388531918969006, "learning_rate": 3.7322945057366642e-06, "loss": 0.02057865560054779, "step": 30525 }, { "epoch": 0.2873411764705882, "grad_norm": 0.6911602057170362, "learning_rate": 3.731988858046285e-06, "loss": 0.024228449165821075, "step": 30530 }, { "epoch": 0.28738823529411767, "grad_norm": 0.6753677372759823, "learning_rate": 3.7316832854345563e-06, "loss": 0.027360323071479797, "step": 30535 }, { "epoch": 0.28743529411764707, "grad_norm": 0.4733834419041038, "learning_rate": 3.7313777878707473e-06, "loss": 0.01861831247806549, "step": 30540 }, { "epoch": 0.28748235294117647, "grad_norm": 0.5543987148468785, "learning_rate": 3.7310723653241427e-06, "loss": 0.02311924397945404, "step": 30545 }, { "epoch": 0.28752941176470587, "grad_norm": 0.5219828794545177, "learning_rate": 3.7307670177640458e-06, "loss": 0.02655559778213501, "step": 30550 }, { "epoch": 0.28757647058823527, "grad_norm": 0.6568324232151269, "learning_rate": 3.730461745159779e-06, "loss": 0.02133137732744217, "step": 30555 }, { "epoch": 0.2876235294117647, "grad_norm": 0.5379587326667111, "learning_rate": 3.7301565474806797e-06, "loss": 0.021712958812713623, "step": 30560 }, { "epoch": 0.2876705882352941, "grad_norm": 0.5796380682619675, "learning_rate": 3.7298514246961036e-06, "loss": 0.025412780046463013, "step": 30565 }, { "epoch": 0.2877176470588235, "grad_norm": 0.3466272832358637, "learning_rate": 3.729546376775425e-06, "loss": 0.018705657124519347, "step": 30570 }, { "epoch": 0.2877647058823529, "grad_norm": 0.47018855549084565, "learning_rate": 3.729241403688034e-06, "loss": 0.021159470081329346, "step": 30575 }, { "epoch": 0.2878117647058824, "grad_norm": 0.5617533976576896, "learning_rate": 3.72893650540334e-06, "loss": 0.027503955364227294, "step": 30580 }, { "epoch": 0.2878588235294118, "grad_norm": 0.48088154925024057, "learning_rate": 3.7286316818907693e-06, "loss": 0.022458389401435852, "step": 30585 }, { "epoch": 0.2879058823529412, "grad_norm": 0.5469716579054955, "learning_rate": 3.7283269331197647e-06, "loss": 0.027396818995475768, "step": 30590 }, { "epoch": 0.2879529411764706, "grad_norm": 0.6975859256738499, "learning_rate": 3.728022259059787e-06, "loss": 0.028907161951065064, "step": 30595 }, { "epoch": 0.288, "grad_norm": 0.5343803247590315, "learning_rate": 3.7277176596803166e-06, "loss": 0.017606756091117857, "step": 30600 }, { "epoch": 0.28804705882352943, "grad_norm": 0.6598019768258355, "learning_rate": 3.7274131349508475e-06, "loss": 0.031055921316146852, "step": 30605 }, { "epoch": 0.28809411764705883, "grad_norm": 0.48611302614695634, "learning_rate": 3.727108684840895e-06, "loss": 0.022913777828216554, "step": 30610 }, { "epoch": 0.28814117647058823, "grad_norm": 0.6343303270501797, "learning_rate": 3.7268043093199878e-06, "loss": 0.02438627928495407, "step": 30615 }, { "epoch": 0.28818823529411763, "grad_norm": 0.5438470982348801, "learning_rate": 3.7265000083576756e-06, "loss": 0.01895911991596222, "step": 30620 }, { "epoch": 0.28823529411764703, "grad_norm": 0.6063588323342313, "learning_rate": 3.726195781923524e-06, "loss": 0.020847570896148682, "step": 30625 }, { "epoch": 0.2882823529411765, "grad_norm": 0.42904956219280144, "learning_rate": 3.7258916299871155e-06, "loss": 0.016139887273311615, "step": 30630 }, { "epoch": 0.2883294117647059, "grad_norm": 0.38719686912032725, "learning_rate": 3.725587552518052e-06, "loss": 0.01749749481678009, "step": 30635 }, { "epoch": 0.2883764705882353, "grad_norm": 0.7593368229209534, "learning_rate": 3.7252835494859492e-06, "loss": 0.02166995406150818, "step": 30640 }, { "epoch": 0.2884235294117647, "grad_norm": 0.43974427549057327, "learning_rate": 3.7249796208604433e-06, "loss": 0.02264767587184906, "step": 30645 }, { "epoch": 0.28847058823529415, "grad_norm": 0.8062136853821993, "learning_rate": 3.7246757666111873e-06, "loss": 0.023542693257331847, "step": 30650 }, { "epoch": 0.28851764705882355, "grad_norm": 0.5253236239634059, "learning_rate": 3.72437198670785e-06, "loss": 0.022173395752906798, "step": 30655 }, { "epoch": 0.28856470588235295, "grad_norm": 1.043048651237076, "learning_rate": 3.724068281120119e-06, "loss": 0.024758365750312806, "step": 30660 }, { "epoch": 0.28861176470588235, "grad_norm": 0.4672633071842461, "learning_rate": 3.723764649817699e-06, "loss": 0.020795495808124544, "step": 30665 }, { "epoch": 0.28865882352941175, "grad_norm": 0.672332689328508, "learning_rate": 3.7234610927703124e-06, "loss": 0.02186032384634018, "step": 30670 }, { "epoch": 0.2887058823529412, "grad_norm": 0.6169180563087117, "learning_rate": 3.723157609947697e-06, "loss": 0.021054226160049438, "step": 30675 }, { "epoch": 0.2887529411764706, "grad_norm": 0.6001160517460095, "learning_rate": 3.722854201319609e-06, "loss": 0.02409282624721527, "step": 30680 }, { "epoch": 0.2888, "grad_norm": 1.1652427478605634, "learning_rate": 3.7225508668558226e-06, "loss": 0.018492493033409118, "step": 30685 }, { "epoch": 0.2888470588235294, "grad_norm": 0.5628288661764969, "learning_rate": 3.7222476065261275e-06, "loss": 0.02329913079738617, "step": 30690 }, { "epoch": 0.2888941176470588, "grad_norm": 0.6464385057610178, "learning_rate": 3.7219444203003334e-06, "loss": 0.023275358974933623, "step": 30695 }, { "epoch": 0.28894117647058826, "grad_norm": 0.6129672302904311, "learning_rate": 3.7216413081482642e-06, "loss": 0.025069496035575865, "step": 30700 }, { "epoch": 0.28898823529411766, "grad_norm": 0.7257686279794214, "learning_rate": 3.7213382700397627e-06, "loss": 0.022614774107933045, "step": 30705 }, { "epoch": 0.28903529411764706, "grad_norm": 0.6464809600122595, "learning_rate": 3.7210353059446887e-06, "loss": 0.02517799437046051, "step": 30710 }, { "epoch": 0.28908235294117646, "grad_norm": 0.5256580803505561, "learning_rate": 3.720732415832918e-06, "loss": 0.022393305599689484, "step": 30715 }, { "epoch": 0.28912941176470586, "grad_norm": 0.5961701975154574, "learning_rate": 3.7204295996743457e-06, "loss": 0.02167706787586212, "step": 30720 }, { "epoch": 0.2891764705882353, "grad_norm": 0.7248577942450699, "learning_rate": 3.7201268574388826e-06, "loss": 0.022587178647518157, "step": 30725 }, { "epoch": 0.2892235294117647, "grad_norm": 0.4162567021858486, "learning_rate": 3.719824189096456e-06, "loss": 0.021774956583976747, "step": 30730 }, { "epoch": 0.2892705882352941, "grad_norm": 0.5634278857241338, "learning_rate": 3.7195215946170126e-06, "loss": 0.02059989869594574, "step": 30735 }, { "epoch": 0.2893176470588235, "grad_norm": 0.5982022818112852, "learning_rate": 3.7192190739705138e-06, "loss": 0.02009611129760742, "step": 30740 }, { "epoch": 0.28936470588235297, "grad_norm": 0.48658975805147003, "learning_rate": 3.7189166271269393e-06, "loss": 0.02119911015033722, "step": 30745 }, { "epoch": 0.28941176470588237, "grad_norm": 0.5771369001742713, "learning_rate": 3.718614254056286e-06, "loss": 0.026444214582443237, "step": 30750 }, { "epoch": 0.28945882352941177, "grad_norm": 0.9943901109519054, "learning_rate": 3.718311954728567e-06, "loss": 0.021039842069149016, "step": 30755 }, { "epoch": 0.28950588235294117, "grad_norm": 0.7438883007053646, "learning_rate": 3.718009729113814e-06, "loss": 0.022740545868873595, "step": 30760 }, { "epoch": 0.28955294117647057, "grad_norm": 0.6990723967689737, "learning_rate": 3.717707577182074e-06, "loss": 0.02450827360153198, "step": 30765 }, { "epoch": 0.2896, "grad_norm": 0.43145009685823343, "learning_rate": 3.7174054989034123e-06, "loss": 0.020464134216308594, "step": 30770 }, { "epoch": 0.2896470588235294, "grad_norm": 0.48178795962888665, "learning_rate": 3.71710349424791e-06, "loss": 0.016947340965270997, "step": 30775 }, { "epoch": 0.2896941176470588, "grad_norm": 0.47883393358335025, "learning_rate": 3.7168015631856675e-06, "loss": 0.02203715741634369, "step": 30780 }, { "epoch": 0.2897411764705882, "grad_norm": 0.7064377161629589, "learning_rate": 3.7164997056867997e-06, "loss": 0.01932472735643387, "step": 30785 }, { "epoch": 0.2897882352941176, "grad_norm": 0.5458314217789915, "learning_rate": 3.7161979217214384e-06, "loss": 0.022195100784301758, "step": 30790 }, { "epoch": 0.2898352941176471, "grad_norm": 0.5929321133903421, "learning_rate": 3.7158962112597352e-06, "loss": 0.02171963155269623, "step": 30795 }, { "epoch": 0.2898823529411765, "grad_norm": 0.6469292615275164, "learning_rate": 3.715594574271856e-06, "loss": 0.02203097641468048, "step": 30800 }, { "epoch": 0.2899294117647059, "grad_norm": 0.5809095848057917, "learning_rate": 3.715293010727985e-06, "loss": 0.02683107852935791, "step": 30805 }, { "epoch": 0.2899764705882353, "grad_norm": 0.5001574474350219, "learning_rate": 3.7149915205983226e-06, "loss": 0.024897903203964233, "step": 30810 }, { "epoch": 0.2900235294117647, "grad_norm": 0.4253232023650564, "learning_rate": 3.7146901038530862e-06, "loss": 0.023161736130714417, "step": 30815 }, { "epoch": 0.29007058823529414, "grad_norm": 0.6000472956907363, "learning_rate": 3.71438876046251e-06, "loss": 0.019871726632118225, "step": 30820 }, { "epoch": 0.29011764705882354, "grad_norm": 0.5017085536482149, "learning_rate": 3.7140874903968467e-06, "loss": 0.020125208795070647, "step": 30825 }, { "epoch": 0.29016470588235294, "grad_norm": 0.619865430194329, "learning_rate": 3.7137862936263636e-06, "loss": 0.025600260496139525, "step": 30830 }, { "epoch": 0.29021176470588234, "grad_norm": 0.702347353423794, "learning_rate": 3.7134851701213467e-06, "loss": 0.0244205504655838, "step": 30835 }, { "epoch": 0.2902588235294118, "grad_norm": 0.5772198168006304, "learning_rate": 3.713184119852097e-06, "loss": 0.019878774881362915, "step": 30840 }, { "epoch": 0.2903058823529412, "grad_norm": 0.6571656018629013, "learning_rate": 3.7128831427889334e-06, "loss": 0.02440352886915207, "step": 30845 }, { "epoch": 0.2903529411764706, "grad_norm": 0.6033131679714405, "learning_rate": 3.7125822389021926e-06, "loss": 0.020627978444099426, "step": 30850 }, { "epoch": 0.2904, "grad_norm": 0.6448943995126503, "learning_rate": 3.712281408162226e-06, "loss": 0.024418503046035767, "step": 30855 }, { "epoch": 0.2904470588235294, "grad_norm": 0.6000445749258244, "learning_rate": 3.7119806505394042e-06, "loss": 0.01743769347667694, "step": 30860 }, { "epoch": 0.29049411764705885, "grad_norm": 0.5703037005703787, "learning_rate": 3.7116799660041127e-06, "loss": 0.02205316722393036, "step": 30865 }, { "epoch": 0.29054117647058825, "grad_norm": 0.9831463204703823, "learning_rate": 3.711379354526754e-06, "loss": 0.02227019816637039, "step": 30870 }, { "epoch": 0.29058823529411765, "grad_norm": 0.6379081969636683, "learning_rate": 3.711078816077749e-06, "loss": 0.01672530472278595, "step": 30875 }, { "epoch": 0.29063529411764705, "grad_norm": 0.49769601024912546, "learning_rate": 3.7107783506275334e-06, "loss": 0.021092328429222106, "step": 30880 }, { "epoch": 0.29068235294117645, "grad_norm": 0.497713830494481, "learning_rate": 3.7104779581465606e-06, "loss": 0.025135695934295654, "step": 30885 }, { "epoch": 0.2907294117647059, "grad_norm": 0.2749049769688326, "learning_rate": 3.7101776386053e-06, "loss": 0.021110035479068756, "step": 30890 }, { "epoch": 0.2907764705882353, "grad_norm": 0.6770316457575217, "learning_rate": 3.7098773919742396e-06, "loss": 0.021020613610744476, "step": 30895 }, { "epoch": 0.2908235294117647, "grad_norm": 0.5542970306647274, "learning_rate": 3.7095772182238816e-06, "loss": 0.02517879009246826, "step": 30900 }, { "epoch": 0.2908705882352941, "grad_norm": 0.5920573999910423, "learning_rate": 3.709277117324747e-06, "loss": 0.0252620130777359, "step": 30905 }, { "epoch": 0.2909176470588235, "grad_norm": 0.609669358760018, "learning_rate": 3.708977089247372e-06, "loss": 0.02199782133102417, "step": 30910 }, { "epoch": 0.29096470588235296, "grad_norm": 0.5863994689134447, "learning_rate": 3.708677133962311e-06, "loss": 0.01969825625419617, "step": 30915 }, { "epoch": 0.29101176470588236, "grad_norm": 0.5628795232629669, "learning_rate": 3.7083772514401332e-06, "loss": 0.02721180021762848, "step": 30920 }, { "epoch": 0.29105882352941176, "grad_norm": 0.6073393274856317, "learning_rate": 3.708077441651426e-06, "loss": 0.025174614787101746, "step": 30925 }, { "epoch": 0.29110588235294116, "grad_norm": 0.6383827662525097, "learning_rate": 3.707777704566793e-06, "loss": 0.025345897674560545, "step": 30930 }, { "epoch": 0.2911529411764706, "grad_norm": 0.8204503412586116, "learning_rate": 3.707478040156853e-06, "loss": 0.023753906786441802, "step": 30935 }, { "epoch": 0.2912, "grad_norm": 0.6215648918952782, "learning_rate": 3.707178448392244e-06, "loss": 0.023082372546195985, "step": 30940 }, { "epoch": 0.2912470588235294, "grad_norm": 0.5393291846111086, "learning_rate": 3.7068789292436195e-06, "loss": 0.01814514398574829, "step": 30945 }, { "epoch": 0.2912941176470588, "grad_norm": 0.8372407274610096, "learning_rate": 3.7065794826816486e-06, "loss": 0.025574207305908203, "step": 30950 }, { "epoch": 0.2913411764705882, "grad_norm": 0.59822855560988, "learning_rate": 3.7062801086770184e-06, "loss": 0.023742066323757173, "step": 30955 }, { "epoch": 0.29138823529411767, "grad_norm": 1.3546501866126877, "learning_rate": 3.7059808072004323e-06, "loss": 0.020795202255249022, "step": 30960 }, { "epoch": 0.29143529411764707, "grad_norm": 0.6989944620233265, "learning_rate": 3.7056815782226086e-06, "loss": 0.024006134271621703, "step": 30965 }, { "epoch": 0.29148235294117647, "grad_norm": 0.4910046465443233, "learning_rate": 3.705382421714285e-06, "loss": 0.025182193517684935, "step": 30970 }, { "epoch": 0.29152941176470587, "grad_norm": 0.6511665667568896, "learning_rate": 3.7050833376462124e-06, "loss": 0.022733551263809205, "step": 30975 }, { "epoch": 0.29157647058823527, "grad_norm": 0.5541073093346952, "learning_rate": 3.704784325989162e-06, "loss": 0.023476731777191163, "step": 30980 }, { "epoch": 0.2916235294117647, "grad_norm": 0.7401072824529004, "learning_rate": 3.7044853867139187e-06, "loss": 0.027684777975082397, "step": 30985 }, { "epoch": 0.2916705882352941, "grad_norm": 0.3767735899674133, "learning_rate": 3.7041865197912846e-06, "loss": 0.02359769344329834, "step": 30990 }, { "epoch": 0.2917176470588235, "grad_norm": 0.6755716687282886, "learning_rate": 3.7038877251920783e-06, "loss": 0.019895996153354644, "step": 30995 }, { "epoch": 0.2917647058823529, "grad_norm": 0.4757218587483599, "learning_rate": 3.703589002887136e-06, "loss": 0.022935427725315094, "step": 31000 }, { "epoch": 0.2918117647058823, "grad_norm": 0.6186896815437773, "learning_rate": 3.703290352847308e-06, "loss": 0.02262442111968994, "step": 31005 }, { "epoch": 0.2918588235294118, "grad_norm": 0.30870729140052056, "learning_rate": 3.7029917750434637e-06, "loss": 0.018700030446052552, "step": 31010 }, { "epoch": 0.2919058823529412, "grad_norm": 0.4733370350764321, "learning_rate": 3.702693269446487e-06, "loss": 0.032503950595855716, "step": 31015 }, { "epoch": 0.2919529411764706, "grad_norm": 0.669956819760025, "learning_rate": 3.702394836027278e-06, "loss": 0.015087324380874633, "step": 31020 }, { "epoch": 0.292, "grad_norm": 0.64603886960176, "learning_rate": 3.702096474756756e-06, "loss": 0.022524192929267883, "step": 31025 }, { "epoch": 0.29204705882352944, "grad_norm": 0.516478824020331, "learning_rate": 3.7017981856058534e-06, "loss": 0.023444661498069765, "step": 31030 }, { "epoch": 0.29209411764705884, "grad_norm": 0.4448972662213852, "learning_rate": 3.7014999685455205e-06, "loss": 0.022793099284172058, "step": 31035 }, { "epoch": 0.29214117647058824, "grad_norm": 0.6453071238819769, "learning_rate": 3.7012018235467244e-06, "loss": 0.021462514996528625, "step": 31040 }, { "epoch": 0.29218823529411764, "grad_norm": 0.5929748845111557, "learning_rate": 3.7009037505804487e-06, "loss": 0.021755027770996093, "step": 31045 }, { "epoch": 0.29223529411764704, "grad_norm": 0.3878017158751231, "learning_rate": 3.7006057496176918e-06, "loss": 0.014907664060592652, "step": 31050 }, { "epoch": 0.2922823529411765, "grad_norm": 0.6939335437830401, "learning_rate": 3.7003078206294687e-06, "loss": 0.01960535943508148, "step": 31055 }, { "epoch": 0.2923294117647059, "grad_norm": 2.8572438221338836, "learning_rate": 3.7000099635868125e-06, "loss": 0.019247394800186158, "step": 31060 }, { "epoch": 0.2923764705882353, "grad_norm": 0.7677192143925237, "learning_rate": 3.6997121784607705e-06, "loss": 0.023317056894302367, "step": 31065 }, { "epoch": 0.2924235294117647, "grad_norm": 0.4900944882943087, "learning_rate": 3.6994144652224084e-06, "loss": 0.024096333980560304, "step": 31070 }, { "epoch": 0.2924705882352941, "grad_norm": 0.4985215954092385, "learning_rate": 3.699116823842807e-06, "loss": 0.020955705642700197, "step": 31075 }, { "epoch": 0.29251764705882355, "grad_norm": 0.5159774356415108, "learning_rate": 3.698819254293062e-06, "loss": 0.019084346294403077, "step": 31080 }, { "epoch": 0.29256470588235295, "grad_norm": 0.8481243030771342, "learning_rate": 3.698521756544289e-06, "loss": 0.020128557085990907, "step": 31085 }, { "epoch": 0.29261176470588235, "grad_norm": 0.5322247750626149, "learning_rate": 3.6982243305676162e-06, "loss": 0.022476968169212342, "step": 31090 }, { "epoch": 0.29265882352941175, "grad_norm": 0.535007824595483, "learning_rate": 3.6979269763341903e-06, "loss": 0.023248881101608276, "step": 31095 }, { "epoch": 0.29270588235294115, "grad_norm": 0.8054782833995344, "learning_rate": 3.697629693815173e-06, "loss": 0.02741323709487915, "step": 31100 }, { "epoch": 0.2927529411764706, "grad_norm": 0.5994350318329678, "learning_rate": 3.6973324829817426e-06, "loss": 0.02379283607006073, "step": 31105 }, { "epoch": 0.2928, "grad_norm": 0.6271626528006381, "learning_rate": 3.697035343805094e-06, "loss": 0.021586471796035768, "step": 31110 }, { "epoch": 0.2928470588235294, "grad_norm": 0.6603976819619654, "learning_rate": 3.6967382762564384e-06, "loss": 0.020029342174530028, "step": 31115 }, { "epoch": 0.2928941176470588, "grad_norm": 0.5424909319320085, "learning_rate": 3.6964412803070026e-06, "loss": 0.02043691873550415, "step": 31120 }, { "epoch": 0.29294117647058826, "grad_norm": 0.5015396196747732, "learning_rate": 3.69614435592803e-06, "loss": 0.015322171151638031, "step": 31125 }, { "epoch": 0.29298823529411766, "grad_norm": 0.4648585476808219, "learning_rate": 3.6958475030907794e-06, "loss": 0.01777816712856293, "step": 31130 }, { "epoch": 0.29303529411764706, "grad_norm": 0.545816234801245, "learning_rate": 3.6955507217665272e-06, "loss": 0.023150807619094847, "step": 31135 }, { "epoch": 0.29308235294117646, "grad_norm": 0.6957012589031022, "learning_rate": 3.695254011926564e-06, "loss": 0.02694222629070282, "step": 31140 }, { "epoch": 0.29312941176470586, "grad_norm": 0.4955342703392517, "learning_rate": 3.6949573735421985e-06, "loss": 0.020898565649986267, "step": 31145 }, { "epoch": 0.2931764705882353, "grad_norm": 0.5287393764461935, "learning_rate": 3.6946608065847546e-06, "loss": 0.020172327756881714, "step": 31150 }, { "epoch": 0.2932235294117647, "grad_norm": 0.5127434490790516, "learning_rate": 3.694364311025572e-06, "loss": 0.018581721186637878, "step": 31155 }, { "epoch": 0.2932705882352941, "grad_norm": 0.5239964272275859, "learning_rate": 3.6940678868360064e-06, "loss": 0.025665989518165587, "step": 31160 }, { "epoch": 0.2933176470588235, "grad_norm": 0.7321134508539744, "learning_rate": 3.6937715339874304e-06, "loss": 0.02350442111492157, "step": 31165 }, { "epoch": 0.2933647058823529, "grad_norm": 0.581953345304975, "learning_rate": 3.693475252451233e-06, "loss": 0.01920264959335327, "step": 31170 }, { "epoch": 0.29341176470588237, "grad_norm": 0.5298741935002081, "learning_rate": 3.6931790421988178e-06, "loss": 0.02213127613067627, "step": 31175 }, { "epoch": 0.29345882352941177, "grad_norm": 0.7880264545636205, "learning_rate": 3.6928829032016054e-06, "loss": 0.025855627655982972, "step": 31180 }, { "epoch": 0.29350588235294117, "grad_norm": 0.7598771972966807, "learning_rate": 3.6925868354310324e-06, "loss": 0.03151641488075256, "step": 31185 }, { "epoch": 0.29355294117647057, "grad_norm": 0.7104640135791611, "learning_rate": 3.692290838858551e-06, "loss": 0.023091599345207214, "step": 31190 }, { "epoch": 0.2936, "grad_norm": 0.6397280541238366, "learning_rate": 3.69199491345563e-06, "loss": 0.02087990492582321, "step": 31195 }, { "epoch": 0.2936470588235294, "grad_norm": 0.8267854688108971, "learning_rate": 3.6916990591937535e-06, "loss": 0.02461971640586853, "step": 31200 }, { "epoch": 0.2936941176470588, "grad_norm": 0.4131694305292876, "learning_rate": 3.6914032760444217e-06, "loss": 0.025301653146743774, "step": 31205 }, { "epoch": 0.29374117647058823, "grad_norm": 0.6236355902860297, "learning_rate": 3.691107563979153e-06, "loss": 0.02392014265060425, "step": 31210 }, { "epoch": 0.29378823529411763, "grad_norm": 0.6586022992049774, "learning_rate": 3.6908119229694768e-06, "loss": 0.030904936790466308, "step": 31215 }, { "epoch": 0.2938352941176471, "grad_norm": 0.533694672906133, "learning_rate": 3.6905163529869443e-06, "loss": 0.023374024033546447, "step": 31220 }, { "epoch": 0.2938823529411765, "grad_norm": 0.5396659121390229, "learning_rate": 3.6902208540031174e-06, "loss": 0.021954721212387084, "step": 31225 }, { "epoch": 0.2939294117647059, "grad_norm": 0.5683028159030235, "learning_rate": 3.689925425989579e-06, "loss": 0.023739336431026457, "step": 31230 }, { "epoch": 0.2939764705882353, "grad_norm": 0.7164738114468924, "learning_rate": 3.6896300689179233e-06, "loss": 0.023019346594810485, "step": 31235 }, { "epoch": 0.2940235294117647, "grad_norm": 0.5304781123645527, "learning_rate": 3.6893347827597633e-06, "loss": 0.02572755217552185, "step": 31240 }, { "epoch": 0.29407058823529414, "grad_norm": 0.6962112898929201, "learning_rate": 3.689039567486726e-06, "loss": 0.025964075326919557, "step": 31245 }, { "epoch": 0.29411764705882354, "grad_norm": 0.6171462920007591, "learning_rate": 3.6887444230704573e-06, "loss": 0.019804397225379945, "step": 31250 }, { "epoch": 0.29416470588235294, "grad_norm": 0.553638560542932, "learning_rate": 3.6884493494826155e-06, "loss": 0.017639264464378357, "step": 31255 }, { "epoch": 0.29421176470588234, "grad_norm": 0.5520515358542081, "learning_rate": 3.688154346694876e-06, "loss": 0.021360817551612853, "step": 31260 }, { "epoch": 0.29425882352941174, "grad_norm": 0.7573832764883648, "learning_rate": 3.6878594146789315e-06, "loss": 0.02120283842086792, "step": 31265 }, { "epoch": 0.2943058823529412, "grad_norm": 0.6939627510607599, "learning_rate": 3.6875645534064887e-06, "loss": 0.022359123826026915, "step": 31270 }, { "epoch": 0.2943529411764706, "grad_norm": 0.4501022067100307, "learning_rate": 3.68726976284927e-06, "loss": 0.017680180072784425, "step": 31275 }, { "epoch": 0.2944, "grad_norm": 0.585975946826559, "learning_rate": 3.686975042979016e-06, "loss": 0.019708167016506194, "step": 31280 }, { "epoch": 0.2944470588235294, "grad_norm": 0.5555116103062908, "learning_rate": 3.6866803937674806e-06, "loss": 0.02304462045431137, "step": 31285 }, { "epoch": 0.29449411764705885, "grad_norm": 0.4083874525085797, "learning_rate": 3.686385815186434e-06, "loss": 0.015755993127822877, "step": 31290 }, { "epoch": 0.29454117647058825, "grad_norm": 0.5023388726178418, "learning_rate": 3.6860913072076633e-06, "loss": 0.02077236622571945, "step": 31295 }, { "epoch": 0.29458823529411765, "grad_norm": 0.4536108533569914, "learning_rate": 3.6857968698029706e-06, "loss": 0.01852293759584427, "step": 31300 }, { "epoch": 0.29463529411764705, "grad_norm": 0.4465225223815688, "learning_rate": 3.6855025029441737e-06, "loss": 0.020779478549957275, "step": 31305 }, { "epoch": 0.29468235294117645, "grad_norm": 0.5045848663423574, "learning_rate": 3.685208206603107e-06, "loss": 0.026705098152160645, "step": 31310 }, { "epoch": 0.2947294117647059, "grad_norm": 0.6433382969798542, "learning_rate": 3.684913980751618e-06, "loss": 0.023270033299922943, "step": 31315 }, { "epoch": 0.2947764705882353, "grad_norm": 0.8487108591754944, "learning_rate": 3.6846198253615743e-06, "loss": 0.024960827827453614, "step": 31320 }, { "epoch": 0.2948235294117647, "grad_norm": 0.4489569894695828, "learning_rate": 3.684325740404855e-06, "loss": 0.024033564329147338, "step": 31325 }, { "epoch": 0.2948705882352941, "grad_norm": 0.5998458059460233, "learning_rate": 3.6840317258533577e-06, "loss": 0.02269732058048248, "step": 31330 }, { "epoch": 0.2949176470588235, "grad_norm": 0.5938051430057231, "learning_rate": 3.6837377816789944e-06, "loss": 0.021985328197479247, "step": 31335 }, { "epoch": 0.29496470588235296, "grad_norm": 0.38656819796643643, "learning_rate": 3.683443907853692e-06, "loss": 0.018436552584171297, "step": 31340 }, { "epoch": 0.29501176470588236, "grad_norm": 0.8674947587379317, "learning_rate": 3.683150104349396e-06, "loss": 0.02342973053455353, "step": 31345 }, { "epoch": 0.29505882352941176, "grad_norm": 0.8088402973221168, "learning_rate": 3.6828563711380644e-06, "loss": 0.023543205857276917, "step": 31350 }, { "epoch": 0.29510588235294116, "grad_norm": 0.44506513551343674, "learning_rate": 3.6825627081916725e-06, "loss": 0.02069748640060425, "step": 31355 }, { "epoch": 0.29515294117647056, "grad_norm": 0.7322900099649404, "learning_rate": 3.6822691154822114e-06, "loss": 0.018724068999290466, "step": 31360 }, { "epoch": 0.2952, "grad_norm": 0.6492723364224048, "learning_rate": 3.6819755929816862e-06, "loss": 0.024699409306049348, "step": 31365 }, { "epoch": 0.2952470588235294, "grad_norm": 0.5710244796463309, "learning_rate": 3.6816821406621204e-06, "loss": 0.016509920358657837, "step": 31370 }, { "epoch": 0.2952941176470588, "grad_norm": 0.5810012076492087, "learning_rate": 3.68138875849555e-06, "loss": 0.019532909989356993, "step": 31375 }, { "epoch": 0.2953411764705882, "grad_norm": 0.5876195856877184, "learning_rate": 3.6810954464540287e-06, "loss": 0.024649633467197417, "step": 31380 }, { "epoch": 0.2953882352941177, "grad_norm": 0.7359521403196099, "learning_rate": 3.6808022045096253e-06, "loss": 0.030783814191818238, "step": 31385 }, { "epoch": 0.2954352941176471, "grad_norm": 0.643928221465498, "learning_rate": 3.6805090326344233e-06, "loss": 0.020040690898895264, "step": 31390 }, { "epoch": 0.2954823529411765, "grad_norm": 0.7082150185483989, "learning_rate": 3.680215930800523e-06, "loss": 0.023372840881347657, "step": 31395 }, { "epoch": 0.2955294117647059, "grad_norm": 0.48158678680118877, "learning_rate": 3.67992289898004e-06, "loss": 0.021124032139778138, "step": 31400 }, { "epoch": 0.2955764705882353, "grad_norm": 0.6286164092882937, "learning_rate": 3.679629937145104e-06, "loss": 0.021830129623413085, "step": 31405 }, { "epoch": 0.29562352941176473, "grad_norm": 0.5178988256393338, "learning_rate": 3.679337045267863e-06, "loss": 0.01890803873538971, "step": 31410 }, { "epoch": 0.29567058823529413, "grad_norm": 0.5147565702534448, "learning_rate": 3.6790442233204765e-06, "loss": 0.023666341602802277, "step": 31415 }, { "epoch": 0.29571764705882353, "grad_norm": 0.6112899217355074, "learning_rate": 3.6787514712751244e-06, "loss": 0.017380574345588685, "step": 31420 }, { "epoch": 0.29576470588235293, "grad_norm": 0.4711163945117844, "learning_rate": 3.6784587891039986e-06, "loss": 0.017528891563415527, "step": 31425 }, { "epoch": 0.29581176470588233, "grad_norm": 0.7387493560927039, "learning_rate": 3.678166176779308e-06, "loss": 0.022615084052085878, "step": 31430 }, { "epoch": 0.2958588235294118, "grad_norm": 0.6004191161969614, "learning_rate": 3.677873634273275e-06, "loss": 0.020966637134552, "step": 31435 }, { "epoch": 0.2959058823529412, "grad_norm": 0.6277068945217112, "learning_rate": 3.6775811615581413e-06, "loss": 0.022697058320045472, "step": 31440 }, { "epoch": 0.2959529411764706, "grad_norm": 1.6425870219383643, "learning_rate": 3.6772887586061597e-06, "loss": 0.026303693652153015, "step": 31445 }, { "epoch": 0.296, "grad_norm": 0.5424590985219694, "learning_rate": 3.6769964253896e-06, "loss": 0.015740734338760377, "step": 31450 }, { "epoch": 0.2960470588235294, "grad_norm": 0.696628110662135, "learning_rate": 3.6767041618807504e-06, "loss": 0.023449595272541045, "step": 31455 }, { "epoch": 0.29609411764705884, "grad_norm": 0.47508982629620294, "learning_rate": 3.676411968051909e-06, "loss": 0.022811834514141083, "step": 31460 }, { "epoch": 0.29614117647058824, "grad_norm": 0.46659418602246894, "learning_rate": 3.676119843875395e-06, "loss": 0.022508841753005982, "step": 31465 }, { "epoch": 0.29618823529411764, "grad_norm": 0.4978003097947897, "learning_rate": 3.6758277893235377e-06, "loss": 0.022232791781425475, "step": 31470 }, { "epoch": 0.29623529411764704, "grad_norm": 0.679854589187112, "learning_rate": 3.675535804368686e-06, "loss": 0.02124239057302475, "step": 31475 }, { "epoch": 0.2962823529411765, "grad_norm": 0.6360303951398794, "learning_rate": 3.675243888983202e-06, "loss": 0.02317664325237274, "step": 31480 }, { "epoch": 0.2963294117647059, "grad_norm": 0.5083218376508474, "learning_rate": 3.674952043139464e-06, "loss": 0.02090844660997391, "step": 31485 }, { "epoch": 0.2963764705882353, "grad_norm": 0.46144298074214235, "learning_rate": 3.674660266809866e-06, "loss": 0.017958825826644896, "step": 31490 }, { "epoch": 0.2964235294117647, "grad_norm": 0.4562458142500769, "learning_rate": 3.6743685599668143e-06, "loss": 0.018128186464309692, "step": 31495 }, { "epoch": 0.2964705882352941, "grad_norm": 0.5679466873712594, "learning_rate": 3.6740769225827346e-06, "loss": 0.020140114426612853, "step": 31500 }, { "epoch": 0.29651764705882355, "grad_norm": 0.6224511911926113, "learning_rate": 3.6737853546300666e-06, "loss": 0.02289091646671295, "step": 31505 }, { "epoch": 0.29656470588235295, "grad_norm": 0.5117939381139249, "learning_rate": 3.6734938560812646e-06, "loss": 0.021837329864501952, "step": 31510 }, { "epoch": 0.29661176470588235, "grad_norm": 0.6511429817528727, "learning_rate": 3.6732024269087973e-06, "loss": 0.022513920068740846, "step": 31515 }, { "epoch": 0.29665882352941175, "grad_norm": 0.6230482513493745, "learning_rate": 3.6729110670851514e-06, "loss": 0.023328378796577454, "step": 31520 }, { "epoch": 0.29670588235294115, "grad_norm": 0.5351145908275158, "learning_rate": 3.6726197765828264e-06, "loss": 0.019486558437347413, "step": 31525 }, { "epoch": 0.2967529411764706, "grad_norm": 0.5391429358765543, "learning_rate": 3.6723285553743393e-06, "loss": 0.016622841358184814, "step": 31530 }, { "epoch": 0.2968, "grad_norm": 0.5300352433390401, "learning_rate": 3.67203740343222e-06, "loss": 0.020183804631233215, "step": 31535 }, { "epoch": 0.2968470588235294, "grad_norm": 0.6754669456567636, "learning_rate": 3.6717463207290157e-06, "loss": 0.019982284307479857, "step": 31540 }, { "epoch": 0.2968941176470588, "grad_norm": 0.5957784089528287, "learning_rate": 3.6714553072372854e-06, "loss": 0.01928950548171997, "step": 31545 }, { "epoch": 0.2969411764705882, "grad_norm": 0.6037786162248377, "learning_rate": 3.6711643629296096e-06, "loss": 0.019642744958400727, "step": 31550 }, { "epoch": 0.29698823529411766, "grad_norm": 0.5917596243056885, "learning_rate": 3.6708734877785774e-06, "loss": 0.02304063141345978, "step": 31555 }, { "epoch": 0.29703529411764706, "grad_norm": 0.4951960714462352, "learning_rate": 3.6705826817567962e-06, "loss": 0.023326364159584046, "step": 31560 }, { "epoch": 0.29708235294117646, "grad_norm": 0.8651836631900223, "learning_rate": 3.67029194483689e-06, "loss": 0.022402417659759522, "step": 31565 }, { "epoch": 0.29712941176470586, "grad_norm": 0.44842080888285346, "learning_rate": 3.6700012769914944e-06, "loss": 0.02239389568567276, "step": 31570 }, { "epoch": 0.2971764705882353, "grad_norm": 0.37014297833306364, "learning_rate": 3.6697106781932635e-06, "loss": 0.02315342128276825, "step": 31575 }, { "epoch": 0.2972235294117647, "grad_norm": 0.4774147798126514, "learning_rate": 3.6694201484148634e-06, "loss": 0.02231953740119934, "step": 31580 }, { "epoch": 0.2972705882352941, "grad_norm": 0.7256579233307968, "learning_rate": 3.6691296876289782e-06, "loss": 0.02506871521472931, "step": 31585 }, { "epoch": 0.2973176470588235, "grad_norm": 0.5572661639212767, "learning_rate": 3.6688392958083064e-06, "loss": 0.025216448307037353, "step": 31590 }, { "epoch": 0.2973647058823529, "grad_norm": 0.502549875510147, "learning_rate": 3.668548972925561e-06, "loss": 0.025884556770324706, "step": 31595 }, { "epoch": 0.2974117647058824, "grad_norm": 0.5334191501724923, "learning_rate": 3.6682587189534686e-06, "loss": 0.019165515899658203, "step": 31600 }, { "epoch": 0.2974588235294118, "grad_norm": 0.5805853763941861, "learning_rate": 3.667968533864774e-06, "loss": 0.027314096689224243, "step": 31605 }, { "epoch": 0.2975058823529412, "grad_norm": 0.7535083986690411, "learning_rate": 3.6676784176322356e-06, "loss": 0.025960272550582884, "step": 31610 }, { "epoch": 0.2975529411764706, "grad_norm": 0.3640574436887079, "learning_rate": 3.667388370228628e-06, "loss": 0.018656975030899046, "step": 31615 }, { "epoch": 0.2976, "grad_norm": 0.4915470495373797, "learning_rate": 3.6670983916267385e-06, "loss": 0.018658140301704408, "step": 31620 }, { "epoch": 0.29764705882352943, "grad_norm": 0.3606714013104337, "learning_rate": 3.6668084817993703e-06, "loss": 0.02024209499359131, "step": 31625 }, { "epoch": 0.29769411764705883, "grad_norm": 0.5995052209728973, "learning_rate": 3.666518640719344e-06, "loss": 0.02512986958026886, "step": 31630 }, { "epoch": 0.29774117647058823, "grad_norm": 0.5307430736125511, "learning_rate": 3.6662288683594908e-06, "loss": 0.03185062408447266, "step": 31635 }, { "epoch": 0.29778823529411763, "grad_norm": 0.8184986212629869, "learning_rate": 3.665939164692663e-06, "loss": 0.022568784654140472, "step": 31640 }, { "epoch": 0.29783529411764703, "grad_norm": 0.4950490106814639, "learning_rate": 3.665649529691721e-06, "loss": 0.020747298002243043, "step": 31645 }, { "epoch": 0.2978823529411765, "grad_norm": 0.609283219560974, "learning_rate": 3.665359963329547e-06, "loss": 0.023572388291358947, "step": 31650 }, { "epoch": 0.2979294117647059, "grad_norm": 0.6628489212059337, "learning_rate": 3.6650704655790315e-06, "loss": 0.024069900810718536, "step": 31655 }, { "epoch": 0.2979764705882353, "grad_norm": 0.816533096129082, "learning_rate": 3.6647810364130856e-06, "loss": 0.02223197966814041, "step": 31660 }, { "epoch": 0.2980235294117647, "grad_norm": 0.6519934263656323, "learning_rate": 3.664491675804631e-06, "loss": 0.024771606922149657, "step": 31665 }, { "epoch": 0.29807058823529414, "grad_norm": 0.45599949623374764, "learning_rate": 3.6642023837266093e-06, "loss": 0.022603926062583924, "step": 31670 }, { "epoch": 0.29811764705882354, "grad_norm": 0.6454969109774683, "learning_rate": 3.663913160151973e-06, "loss": 0.023012495040893553, "step": 31675 }, { "epoch": 0.29816470588235294, "grad_norm": 0.7726108922752372, "learning_rate": 3.6636240050536897e-06, "loss": 0.021181483566761018, "step": 31680 }, { "epoch": 0.29821176470588234, "grad_norm": 0.7413193639072776, "learning_rate": 3.6633349184047432e-06, "loss": 0.028934174776077272, "step": 31685 }, { "epoch": 0.29825882352941174, "grad_norm": 0.47840395790939055, "learning_rate": 3.663045900178133e-06, "loss": 0.021878035366535188, "step": 31690 }, { "epoch": 0.2983058823529412, "grad_norm": 0.6208435550286026, "learning_rate": 3.6627569503468716e-06, "loss": 0.025801366567611693, "step": 31695 }, { "epoch": 0.2983529411764706, "grad_norm": 0.4793814464300273, "learning_rate": 3.6624680688839893e-06, "loss": 0.02031407356262207, "step": 31700 }, { "epoch": 0.2984, "grad_norm": 0.6164938813321665, "learning_rate": 3.6621792557625263e-06, "loss": 0.023746414482593535, "step": 31705 }, { "epoch": 0.2984470588235294, "grad_norm": 0.5149869542779392, "learning_rate": 3.6618905109555427e-06, "loss": 0.017744700610637664, "step": 31710 }, { "epoch": 0.2984941176470588, "grad_norm": 0.766374718902467, "learning_rate": 3.661601834436111e-06, "loss": 0.020212483406066895, "step": 31715 }, { "epoch": 0.29854117647058825, "grad_norm": 0.7197220229427933, "learning_rate": 3.6613132261773186e-06, "loss": 0.021171373128890992, "step": 31720 }, { "epoch": 0.29858823529411765, "grad_norm": 0.564475612164059, "learning_rate": 3.661024686152269e-06, "loss": 0.021726194024085998, "step": 31725 }, { "epoch": 0.29863529411764705, "grad_norm": 0.6730234670810955, "learning_rate": 3.660736214334078e-06, "loss": 0.023943057656288146, "step": 31730 }, { "epoch": 0.29868235294117645, "grad_norm": 0.6756122272228741, "learning_rate": 3.66044781069588e-06, "loss": 0.01883634626865387, "step": 31735 }, { "epoch": 0.2987294117647059, "grad_norm": 0.589326007921199, "learning_rate": 3.660159475210821e-06, "loss": 0.024644169211387634, "step": 31740 }, { "epoch": 0.2987764705882353, "grad_norm": 0.6029182109986191, "learning_rate": 3.659871207852063e-06, "loss": 0.02383035570383072, "step": 31745 }, { "epoch": 0.2988235294117647, "grad_norm": 0.5601935996440556, "learning_rate": 3.659583008592783e-06, "loss": 0.01977337896823883, "step": 31750 }, { "epoch": 0.2988705882352941, "grad_norm": 0.4337866891581188, "learning_rate": 3.6592948774061716e-06, "loss": 0.020798565447330476, "step": 31755 }, { "epoch": 0.2989176470588235, "grad_norm": 0.6992470756071922, "learning_rate": 3.659006814265436e-06, "loss": 0.019917580485343932, "step": 31760 }, { "epoch": 0.29896470588235297, "grad_norm": 0.6766515935728498, "learning_rate": 3.658718819143798e-06, "loss": 0.024994322657585145, "step": 31765 }, { "epoch": 0.29901176470588237, "grad_norm": 0.4006004239380805, "learning_rate": 3.6584308920144913e-06, "loss": 0.024454835057258605, "step": 31770 }, { "epoch": 0.29905882352941177, "grad_norm": 0.6060129676837777, "learning_rate": 3.658143032850768e-06, "loss": 0.03129777908325195, "step": 31775 }, { "epoch": 0.29910588235294117, "grad_norm": 0.6164211533878047, "learning_rate": 3.657855241625893e-06, "loss": 0.020795874297618866, "step": 31780 }, { "epoch": 0.29915294117647057, "grad_norm": 0.5934064642589851, "learning_rate": 3.6575675183131456e-06, "loss": 0.027830904722213744, "step": 31785 }, { "epoch": 0.2992, "grad_norm": 0.4505088839340598, "learning_rate": 3.6572798628858213e-06, "loss": 0.0241306871175766, "step": 31790 }, { "epoch": 0.2992470588235294, "grad_norm": 0.6251369412111207, "learning_rate": 3.6569922753172294e-06, "loss": 0.02198697030544281, "step": 31795 }, { "epoch": 0.2992941176470588, "grad_norm": 0.6035468218748512, "learning_rate": 3.6567047555806945e-06, "loss": 0.021259185671806336, "step": 31800 }, { "epoch": 0.2993411764705882, "grad_norm": 0.6884092957048458, "learning_rate": 3.6564173036495536e-06, "loss": 0.026552879810333253, "step": 31805 }, { "epoch": 0.2993882352941176, "grad_norm": 0.544509912528603, "learning_rate": 3.6561299194971624e-06, "loss": 0.02366849482059479, "step": 31810 }, { "epoch": 0.2994352941176471, "grad_norm": 0.6396111011806825, "learning_rate": 3.655842603096887e-06, "loss": 0.021389935910701752, "step": 31815 }, { "epoch": 0.2994823529411765, "grad_norm": 0.68540576494523, "learning_rate": 3.655555354422111e-06, "loss": 0.021234817802906036, "step": 31820 }, { "epoch": 0.2995294117647059, "grad_norm": 0.488384344432615, "learning_rate": 3.6552681734462314e-06, "loss": 0.024468940496444703, "step": 31825 }, { "epoch": 0.2995764705882353, "grad_norm": 0.5135549030975785, "learning_rate": 3.6549810601426616e-06, "loss": 0.021734438836574554, "step": 31830 }, { "epoch": 0.29962352941176473, "grad_norm": 0.5030805967607941, "learning_rate": 3.6546940144848264e-06, "loss": 0.022594329714775086, "step": 31835 }, { "epoch": 0.29967058823529413, "grad_norm": 0.4732935584659636, "learning_rate": 3.654407036446168e-06, "loss": 0.019853928685188295, "step": 31840 }, { "epoch": 0.29971764705882353, "grad_norm": 0.6937683921421214, "learning_rate": 3.6541201260001417e-06, "loss": 0.022060005366802214, "step": 31845 }, { "epoch": 0.29976470588235293, "grad_norm": 0.9173989190400067, "learning_rate": 3.6538332831202187e-06, "loss": 0.02516378164291382, "step": 31850 }, { "epoch": 0.29981176470588233, "grad_norm": 0.7224711844809495, "learning_rate": 3.6535465077798836e-06, "loss": 0.022223997116088866, "step": 31855 }, { "epoch": 0.2998588235294118, "grad_norm": 0.7410993320234991, "learning_rate": 3.6532597999526353e-06, "loss": 0.018551692366600037, "step": 31860 }, { "epoch": 0.2999058823529412, "grad_norm": 0.505617463391024, "learning_rate": 3.6529731596119882e-06, "loss": 0.020715612173080444, "step": 31865 }, { "epoch": 0.2999529411764706, "grad_norm": 0.48339497853575075, "learning_rate": 3.652686586731472e-06, "loss": 0.01978270262479782, "step": 31870 }, { "epoch": 0.3, "grad_norm": 0.4820426978563704, "learning_rate": 3.652400081284629e-06, "loss": 0.017630515992641448, "step": 31875 }, { "epoch": 0.3000470588235294, "grad_norm": 0.605606306892661, "learning_rate": 3.6521136432450168e-06, "loss": 0.018757028877735137, "step": 31880 }, { "epoch": 0.30009411764705884, "grad_norm": 0.5714570598204697, "learning_rate": 3.651827272586207e-06, "loss": 0.025954750180244446, "step": 31885 }, { "epoch": 0.30014117647058824, "grad_norm": 0.6005557501249191, "learning_rate": 3.6515409692817883e-06, "loss": 0.022934071719646454, "step": 31890 }, { "epoch": 0.30018823529411764, "grad_norm": 0.5012465576737672, "learning_rate": 3.6512547333053606e-06, "loss": 0.027161574363708495, "step": 31895 }, { "epoch": 0.30023529411764704, "grad_norm": 0.44870029406214346, "learning_rate": 3.6509685646305387e-06, "loss": 0.01868384927511215, "step": 31900 }, { "epoch": 0.30028235294117644, "grad_norm": 0.38819636052646156, "learning_rate": 3.6506824632309546e-06, "loss": 0.019764941930770875, "step": 31905 }, { "epoch": 0.3003294117647059, "grad_norm": 0.5017254865842214, "learning_rate": 3.650396429080252e-06, "loss": 0.02219236046075821, "step": 31910 }, { "epoch": 0.3003764705882353, "grad_norm": 0.6542995763919828, "learning_rate": 3.6501104621520895e-06, "loss": 0.018498757481575014, "step": 31915 }, { "epoch": 0.3004235294117647, "grad_norm": 0.6960474897508218, "learning_rate": 3.6498245624201416e-06, "loss": 0.023406611382961275, "step": 31920 }, { "epoch": 0.3004705882352941, "grad_norm": 0.8103100595547118, "learning_rate": 3.6495387298580958e-06, "loss": 0.026250141859054565, "step": 31925 }, { "epoch": 0.30051764705882356, "grad_norm": 0.6619696845140912, "learning_rate": 3.649252964439656e-06, "loss": 0.01977858543395996, "step": 31930 }, { "epoch": 0.30056470588235296, "grad_norm": 0.633942582198013, "learning_rate": 3.6489672661385354e-06, "loss": 0.026928579807281493, "step": 31935 }, { "epoch": 0.30061176470588236, "grad_norm": 0.45351953586077004, "learning_rate": 3.6486816349284684e-06, "loss": 0.018746286630630493, "step": 31940 }, { "epoch": 0.30065882352941176, "grad_norm": 0.5371429769751555, "learning_rate": 3.6483960707831993e-06, "loss": 0.02752305269241333, "step": 31945 }, { "epoch": 0.30070588235294116, "grad_norm": 0.5669726972338095, "learning_rate": 3.6481105736764888e-06, "loss": 0.02145606279373169, "step": 31950 }, { "epoch": 0.3007529411764706, "grad_norm": 0.5364932234171257, "learning_rate": 3.6478251435821106e-06, "loss": 0.02142729014158249, "step": 31955 }, { "epoch": 0.3008, "grad_norm": 0.5953893454761467, "learning_rate": 3.6475397804738532e-06, "loss": 0.02121995687484741, "step": 31960 }, { "epoch": 0.3008470588235294, "grad_norm": 0.6674759498920402, "learning_rate": 3.6472544843255197e-06, "loss": 0.021341127157211304, "step": 31965 }, { "epoch": 0.3008941176470588, "grad_norm": 0.6668276203170987, "learning_rate": 3.6469692551109286e-06, "loss": 0.02651374936103821, "step": 31970 }, { "epoch": 0.3009411764705882, "grad_norm": 0.4619262677129915, "learning_rate": 3.6466840928039097e-06, "loss": 0.022857603430747987, "step": 31975 }, { "epoch": 0.30098823529411767, "grad_norm": 0.690457542477044, "learning_rate": 3.6463989973783116e-06, "loss": 0.0244473397731781, "step": 31980 }, { "epoch": 0.30103529411764707, "grad_norm": 0.6996695869947477, "learning_rate": 3.6461139688079923e-06, "loss": 0.024175891280174257, "step": 31985 }, { "epoch": 0.30108235294117647, "grad_norm": 0.638840749692683, "learning_rate": 3.6458290070668277e-06, "loss": 0.019053220748901367, "step": 31990 }, { "epoch": 0.30112941176470587, "grad_norm": 0.5199080659229848, "learning_rate": 3.6455441121287067e-06, "loss": 0.0247697651386261, "step": 31995 }, { "epoch": 0.30117647058823527, "grad_norm": 0.39109219695264025, "learning_rate": 3.6452592839675317e-06, "loss": 0.019567805528640746, "step": 32000 }, { "epoch": 0.3012235294117647, "grad_norm": 0.392257671906015, "learning_rate": 3.6449745225572214e-06, "loss": 0.01825297772884369, "step": 32005 }, { "epoch": 0.3012705882352941, "grad_norm": 0.7798133689586942, "learning_rate": 3.6446898278717062e-06, "loss": 0.024531781673431396, "step": 32010 }, { "epoch": 0.3013176470588235, "grad_norm": 0.5825400832947893, "learning_rate": 3.6444051998849327e-06, "loss": 0.021011731028556822, "step": 32015 }, { "epoch": 0.3013647058823529, "grad_norm": 0.8038709928260555, "learning_rate": 3.6441206385708618e-06, "loss": 0.02239839732646942, "step": 32020 }, { "epoch": 0.3014117647058824, "grad_norm": 0.570518036109102, "learning_rate": 3.643836143903467e-06, "loss": 0.026846516132354736, "step": 32025 }, { "epoch": 0.3014588235294118, "grad_norm": 0.6741889632293057, "learning_rate": 3.643551715856738e-06, "loss": 0.02030748873949051, "step": 32030 }, { "epoch": 0.3015058823529412, "grad_norm": 0.9053239773598031, "learning_rate": 3.6432673544046766e-06, "loss": 0.023413403332233428, "step": 32035 }, { "epoch": 0.3015529411764706, "grad_norm": 0.5096964032877042, "learning_rate": 3.642983059521301e-06, "loss": 0.014957459270954132, "step": 32040 }, { "epoch": 0.3016, "grad_norm": 0.6165316572922618, "learning_rate": 3.6426988311806416e-06, "loss": 0.019034624099731445, "step": 32045 }, { "epoch": 0.30164705882352943, "grad_norm": 0.6427032483519279, "learning_rate": 3.6424146693567442e-06, "loss": 0.021019479632377623, "step": 32050 }, { "epoch": 0.30169411764705883, "grad_norm": 0.5485439061188898, "learning_rate": 3.6421305740236686e-06, "loss": 0.021788853406906127, "step": 32055 }, { "epoch": 0.30174117647058823, "grad_norm": 0.7258911271489, "learning_rate": 3.641846545155488e-06, "loss": 0.024811109900474547, "step": 32060 }, { "epoch": 0.30178823529411763, "grad_norm": 0.5475149984162682, "learning_rate": 3.6415625827262914e-06, "loss": 0.02127101719379425, "step": 32065 }, { "epoch": 0.30183529411764703, "grad_norm": 0.48520365571882756, "learning_rate": 3.64127868671018e-06, "loss": 0.01785534918308258, "step": 32070 }, { "epoch": 0.3018823529411765, "grad_norm": 0.732136662120118, "learning_rate": 3.640994857081271e-06, "loss": 0.024175992608070372, "step": 32075 }, { "epoch": 0.3019294117647059, "grad_norm": 0.5280315228799368, "learning_rate": 3.6407110938136938e-06, "loss": 0.021031050384044646, "step": 32080 }, { "epoch": 0.3019764705882353, "grad_norm": 0.6132289770708299, "learning_rate": 3.640427396881593e-06, "loss": 0.024063196778297425, "step": 32085 }, { "epoch": 0.3020235294117647, "grad_norm": 0.6377236136331703, "learning_rate": 3.6401437662591275e-06, "loss": 0.020352739095687866, "step": 32090 }, { "epoch": 0.3020705882352941, "grad_norm": 0.7600720615954968, "learning_rate": 3.63986020192047e-06, "loss": 0.02268810272216797, "step": 32095 }, { "epoch": 0.30211764705882355, "grad_norm": 0.49802409563382083, "learning_rate": 3.6395767038398066e-06, "loss": 0.017985090613365173, "step": 32100 }, { "epoch": 0.30216470588235295, "grad_norm": 0.44187064808274973, "learning_rate": 3.6392932719913393e-06, "loss": 0.02469591498374939, "step": 32105 }, { "epoch": 0.30221176470588235, "grad_norm": 0.6120156568559149, "learning_rate": 3.6390099063492816e-06, "loss": 0.02114108353853226, "step": 32110 }, { "epoch": 0.30225882352941175, "grad_norm": 0.6347512584014101, "learning_rate": 3.6387266068878635e-06, "loss": 0.026030302047729492, "step": 32115 }, { "epoch": 0.3023058823529412, "grad_norm": 0.5857711427891668, "learning_rate": 3.6384433735813275e-06, "loss": 0.02340046763420105, "step": 32120 }, { "epoch": 0.3023529411764706, "grad_norm": 0.5613291335244561, "learning_rate": 3.6381602064039306e-06, "loss": 0.020632970333099365, "step": 32125 }, { "epoch": 0.3024, "grad_norm": 0.47754193615992435, "learning_rate": 3.6378771053299437e-06, "loss": 0.023968642950057982, "step": 32130 }, { "epoch": 0.3024470588235294, "grad_norm": 0.5467169437375661, "learning_rate": 3.637594070333652e-06, "loss": 0.020060278475284576, "step": 32135 }, { "epoch": 0.3024941176470588, "grad_norm": 0.46615157761723724, "learning_rate": 3.6373111013893546e-06, "loss": 0.015310619771480561, "step": 32140 }, { "epoch": 0.30254117647058826, "grad_norm": 0.7904750516415145, "learning_rate": 3.6370281984713646e-06, "loss": 0.02276729792356491, "step": 32145 }, { "epoch": 0.30258823529411766, "grad_norm": 0.5450440325611945, "learning_rate": 3.6367453615540086e-06, "loss": 0.02257086932659149, "step": 32150 }, { "epoch": 0.30263529411764706, "grad_norm": 0.6517449934417153, "learning_rate": 3.636462590611628e-06, "loss": 0.0276653915643692, "step": 32155 }, { "epoch": 0.30268235294117646, "grad_norm": 0.7781204757695183, "learning_rate": 3.6361798856185778e-06, "loss": 0.021363528072834016, "step": 32160 }, { "epoch": 0.30272941176470586, "grad_norm": 0.8357916970853435, "learning_rate": 3.6358972465492265e-06, "loss": 0.02135913372039795, "step": 32165 }, { "epoch": 0.3027764705882353, "grad_norm": 0.6839849474668569, "learning_rate": 3.6356146733779573e-06, "loss": 0.020143455266952513, "step": 32170 }, { "epoch": 0.3028235294117647, "grad_norm": 1.6089897128205082, "learning_rate": 3.635332166079167e-06, "loss": 0.026401272416114806, "step": 32175 }, { "epoch": 0.3028705882352941, "grad_norm": 0.6412061830388209, "learning_rate": 3.6350497246272655e-06, "loss": 0.019520144164562225, "step": 32180 }, { "epoch": 0.3029176470588235, "grad_norm": 1.1600330297388448, "learning_rate": 3.6347673489966783e-06, "loss": 0.03169259130954742, "step": 32185 }, { "epoch": 0.3029647058823529, "grad_norm": 0.7789876381361583, "learning_rate": 3.6344850391618436e-06, "loss": 0.020308540761470796, "step": 32190 }, { "epoch": 0.30301176470588237, "grad_norm": 0.704750952069859, "learning_rate": 3.6342027950972143e-06, "loss": 0.022031912207603456, "step": 32195 }, { "epoch": 0.30305882352941177, "grad_norm": 0.5681840475041748, "learning_rate": 3.6339206167772555e-06, "loss": 0.02194032371044159, "step": 32200 }, { "epoch": 0.30310588235294117, "grad_norm": 0.40306901854911, "learning_rate": 3.633638504176449e-06, "loss": 0.023893117904663086, "step": 32205 }, { "epoch": 0.30315294117647057, "grad_norm": 1.0461915312531453, "learning_rate": 3.633356457269288e-06, "loss": 0.02929587960243225, "step": 32210 }, { "epoch": 0.3032, "grad_norm": 0.5428691012418223, "learning_rate": 3.63307447603028e-06, "loss": 0.023231345415115356, "step": 32215 }, { "epoch": 0.3032470588235294, "grad_norm": 0.5646790105223325, "learning_rate": 3.6327925604339476e-06, "loss": 0.019458797574043275, "step": 32220 }, { "epoch": 0.3032941176470588, "grad_norm": 0.7065232394823353, "learning_rate": 3.632510710454825e-06, "loss": 0.02456327825784683, "step": 32225 }, { "epoch": 0.3033411764705882, "grad_norm": 0.5652098644983153, "learning_rate": 3.632228926067463e-06, "loss": 0.023708800971508025, "step": 32230 }, { "epoch": 0.3033882352941176, "grad_norm": 0.8722980061857387, "learning_rate": 3.6319472072464253e-06, "loss": 0.021234552562236785, "step": 32235 }, { "epoch": 0.3034352941176471, "grad_norm": 0.92852776666428, "learning_rate": 3.631665553966287e-06, "loss": 0.017111358046531678, "step": 32240 }, { "epoch": 0.3034823529411765, "grad_norm": 0.7126935655034671, "learning_rate": 3.63138396620164e-06, "loss": 0.025995898246765136, "step": 32245 }, { "epoch": 0.3035294117647059, "grad_norm": 0.6226942636575455, "learning_rate": 3.6311024439270898e-06, "loss": 0.02020663022994995, "step": 32250 }, { "epoch": 0.3035764705882353, "grad_norm": 0.5598458937989458, "learning_rate": 3.6308209871172535e-06, "loss": 0.02365700900554657, "step": 32255 }, { "epoch": 0.3036235294117647, "grad_norm": 0.514788295045174, "learning_rate": 3.6305395957467633e-06, "loss": 0.023766477406024934, "step": 32260 }, { "epoch": 0.30367058823529414, "grad_norm": 0.6461961132333566, "learning_rate": 3.630258269790265e-06, "loss": 0.02387705147266388, "step": 32265 }, { "epoch": 0.30371764705882354, "grad_norm": 0.7471362557732135, "learning_rate": 3.62997700922242e-06, "loss": 0.02055760324001312, "step": 32270 }, { "epoch": 0.30376470588235294, "grad_norm": 0.6537226084432644, "learning_rate": 3.6296958140179e-06, "loss": 0.02274824380874634, "step": 32275 }, { "epoch": 0.30381176470588234, "grad_norm": 0.5347233834145337, "learning_rate": 3.6294146841513926e-06, "loss": 0.0248573899269104, "step": 32280 }, { "epoch": 0.3038588235294118, "grad_norm": 0.3939433327367988, "learning_rate": 3.629133619597599e-06, "loss": 0.018938300013542176, "step": 32285 }, { "epoch": 0.3039058823529412, "grad_norm": 0.8079940521717797, "learning_rate": 3.628852620331234e-06, "loss": 0.024587443470954894, "step": 32290 }, { "epoch": 0.3039529411764706, "grad_norm": 0.5404045181464893, "learning_rate": 3.6285716863270245e-06, "loss": 0.024692031741142272, "step": 32295 }, { "epoch": 0.304, "grad_norm": 0.5555216880270712, "learning_rate": 3.6282908175597135e-06, "loss": 0.022430276870727538, "step": 32300 }, { "epoch": 0.3040470588235294, "grad_norm": 0.6347994069210489, "learning_rate": 3.6280100140040576e-06, "loss": 0.025329989194869996, "step": 32305 }, { "epoch": 0.30409411764705885, "grad_norm": 0.5407474557530688, "learning_rate": 3.627729275634825e-06, "loss": 0.020399942994117737, "step": 32310 }, { "epoch": 0.30414117647058825, "grad_norm": 0.4722983259063716, "learning_rate": 3.627448602426798e-06, "loss": 0.023878008127212524, "step": 32315 }, { "epoch": 0.30418823529411765, "grad_norm": 0.616777405213553, "learning_rate": 3.6271679943547746e-06, "loss": 0.020247140526771547, "step": 32320 }, { "epoch": 0.30423529411764705, "grad_norm": 0.3849088046860905, "learning_rate": 3.626887451393565e-06, "loss": 0.022420921921730043, "step": 32325 }, { "epoch": 0.30428235294117645, "grad_norm": 0.9726736058882434, "learning_rate": 3.626606973517992e-06, "loss": 0.020015698671340943, "step": 32330 }, { "epoch": 0.3043294117647059, "grad_norm": 0.6351716571603184, "learning_rate": 3.6263265607028947e-06, "loss": 0.01968201994895935, "step": 32335 }, { "epoch": 0.3043764705882353, "grad_norm": 0.7335583643998271, "learning_rate": 3.626046212923123e-06, "loss": 0.021683543920516968, "step": 32340 }, { "epoch": 0.3044235294117647, "grad_norm": 0.6028718706912597, "learning_rate": 3.6257659301535425e-06, "loss": 0.021718883514404298, "step": 32345 }, { "epoch": 0.3044705882352941, "grad_norm": 0.7553813360561692, "learning_rate": 3.625485712369031e-06, "loss": 0.02099684476852417, "step": 32350 }, { "epoch": 0.3045176470588235, "grad_norm": 0.5276178836160856, "learning_rate": 3.6252055595444812e-06, "loss": 0.02143191695213318, "step": 32355 }, { "epoch": 0.30456470588235296, "grad_norm": 0.801514232248927, "learning_rate": 3.624925471654799e-06, "loss": 0.01917113959789276, "step": 32360 }, { "epoch": 0.30461176470588236, "grad_norm": 0.7889899793553532, "learning_rate": 3.6246454486749024e-06, "loss": 0.020452392101287842, "step": 32365 }, { "epoch": 0.30465882352941176, "grad_norm": 0.47081692965166916, "learning_rate": 3.6243654905797237e-06, "loss": 0.01925741136074066, "step": 32370 }, { "epoch": 0.30470588235294116, "grad_norm": 0.6113809348923127, "learning_rate": 3.624085597344211e-06, "loss": 0.021106040477752684, "step": 32375 }, { "epoch": 0.3047529411764706, "grad_norm": 0.4355839439936852, "learning_rate": 3.623805768943322e-06, "loss": 0.01748671382665634, "step": 32380 }, { "epoch": 0.3048, "grad_norm": 0.5031252280505647, "learning_rate": 3.6235260053520315e-06, "loss": 0.01996424198150635, "step": 32385 }, { "epoch": 0.3048470588235294, "grad_norm": 0.7025000373988389, "learning_rate": 3.623246306545326e-06, "loss": 0.021551668643951416, "step": 32390 }, { "epoch": 0.3048941176470588, "grad_norm": 0.6662652332639324, "learning_rate": 3.622966672498206e-06, "loss": 0.02453851103782654, "step": 32395 }, { "epoch": 0.3049411764705882, "grad_norm": 0.6722036026685362, "learning_rate": 3.622687103185685e-06, "loss": 0.026605406403541566, "step": 32400 }, { "epoch": 0.30498823529411767, "grad_norm": 0.5137325433160121, "learning_rate": 3.6224075985827905e-06, "loss": 0.023607468605041503, "step": 32405 }, { "epoch": 0.30503529411764707, "grad_norm": 0.5673905513863918, "learning_rate": 3.622128158664563e-06, "loss": 0.02354672849178314, "step": 32410 }, { "epoch": 0.30508235294117647, "grad_norm": 0.5771534539483317, "learning_rate": 3.6218487834060577e-06, "loss": 0.024365997314453124, "step": 32415 }, { "epoch": 0.30512941176470587, "grad_norm": 0.6260437710195709, "learning_rate": 3.6215694727823412e-06, "loss": 0.01986076533794403, "step": 32420 }, { "epoch": 0.30517647058823527, "grad_norm": 0.4971085213381237, "learning_rate": 3.621290226768496e-06, "loss": 0.023330669105052947, "step": 32425 }, { "epoch": 0.3052235294117647, "grad_norm": 0.48324769566798437, "learning_rate": 3.621011045339615e-06, "loss": 0.027536433935165406, "step": 32430 }, { "epoch": 0.3052705882352941, "grad_norm": 0.5561372905329801, "learning_rate": 3.6207319284708085e-06, "loss": 0.024840010702610014, "step": 32435 }, { "epoch": 0.3053176470588235, "grad_norm": 0.4079142657348578, "learning_rate": 3.620452876137196e-06, "loss": 0.020005108416080476, "step": 32440 }, { "epoch": 0.3053647058823529, "grad_norm": 0.41564973611403216, "learning_rate": 3.6201738883139145e-06, "loss": 0.02221587151288986, "step": 32445 }, { "epoch": 0.3054117647058823, "grad_norm": 0.3028517106315672, "learning_rate": 3.6198949649761112e-06, "loss": 0.02250908613204956, "step": 32450 }, { "epoch": 0.3054588235294118, "grad_norm": 0.5537387030556455, "learning_rate": 3.6196161060989472e-06, "loss": 0.024467536807060243, "step": 32455 }, { "epoch": 0.3055058823529412, "grad_norm": 0.6301427791745398, "learning_rate": 3.6193373116575993e-06, "loss": 0.021787932515144347, "step": 32460 }, { "epoch": 0.3055529411764706, "grad_norm": 0.6666706362861944, "learning_rate": 3.6190585816272548e-06, "loss": 0.025123819708824158, "step": 32465 }, { "epoch": 0.3056, "grad_norm": 0.5625628812527862, "learning_rate": 3.6187799159831165e-06, "loss": 0.024465298652648924, "step": 32470 }, { "epoch": 0.30564705882352944, "grad_norm": 0.5657485213291926, "learning_rate": 3.618501314700399e-06, "loss": 0.025175708532333373, "step": 32475 }, { "epoch": 0.30569411764705884, "grad_norm": 0.5916345271105212, "learning_rate": 3.618222777754331e-06, "loss": 0.023102733492851257, "step": 32480 }, { "epoch": 0.30574117647058824, "grad_norm": 0.5498067915638182, "learning_rate": 3.6179443051201552e-06, "loss": 0.02035731077194214, "step": 32485 }, { "epoch": 0.30578823529411764, "grad_norm": 0.5958990311748387, "learning_rate": 3.6176658967731264e-06, "loss": 0.023130868375301362, "step": 32490 }, { "epoch": 0.30583529411764704, "grad_norm": 0.5721780358149285, "learning_rate": 3.6173875526885133e-06, "loss": 0.026193925738334657, "step": 32495 }, { "epoch": 0.3058823529411765, "grad_norm": 0.8252091899601304, "learning_rate": 3.6171092728415974e-06, "loss": 0.027664047479629517, "step": 32500 }, { "epoch": 0.3059294117647059, "grad_norm": 0.6338717615660228, "learning_rate": 3.6168310572076744e-06, "loss": 0.01961592584848404, "step": 32505 }, { "epoch": 0.3059764705882353, "grad_norm": 0.3486986028247376, "learning_rate": 3.6165529057620535e-06, "loss": 0.019503629207611083, "step": 32510 }, { "epoch": 0.3060235294117647, "grad_norm": 0.6490784634193821, "learning_rate": 3.6162748184800563e-06, "loss": 0.022294317185878754, "step": 32515 }, { "epoch": 0.3060705882352941, "grad_norm": 0.6126799573403607, "learning_rate": 3.6159967953370167e-06, "loss": 0.02101012021303177, "step": 32520 }, { "epoch": 0.30611764705882355, "grad_norm": 0.7893863275115723, "learning_rate": 3.615718836308285e-06, "loss": 0.021252363920211792, "step": 32525 }, { "epoch": 0.30616470588235295, "grad_norm": 0.7024238176737997, "learning_rate": 3.615440941369221e-06, "loss": 0.020385533571243286, "step": 32530 }, { "epoch": 0.30621176470588235, "grad_norm": 0.6746146149593572, "learning_rate": 3.6151631104952012e-06, "loss": 0.019010014832019806, "step": 32535 }, { "epoch": 0.30625882352941175, "grad_norm": 1.7876333946935794, "learning_rate": 3.614885343661614e-06, "loss": 0.016672033071517944, "step": 32540 }, { "epoch": 0.30630588235294115, "grad_norm": 0.6768937259407812, "learning_rate": 3.614607640843859e-06, "loss": 0.017702588438987733, "step": 32545 }, { "epoch": 0.3063529411764706, "grad_norm": 0.5110153609265652, "learning_rate": 3.6143300020173524e-06, "loss": 0.01725708544254303, "step": 32550 }, { "epoch": 0.3064, "grad_norm": 0.6319006811515452, "learning_rate": 3.6140524271575208e-06, "loss": 0.017295865714550017, "step": 32555 }, { "epoch": 0.3064470588235294, "grad_norm": 0.6949069467153716, "learning_rate": 3.6137749162398066e-06, "loss": 0.023639008402824402, "step": 32560 }, { "epoch": 0.3064941176470588, "grad_norm": 0.7529644041758308, "learning_rate": 3.6134974692396636e-06, "loss": 0.022385561466217042, "step": 32565 }, { "epoch": 0.30654117647058826, "grad_norm": 0.518967353531699, "learning_rate": 3.6132200861325587e-06, "loss": 0.020275941491127013, "step": 32570 }, { "epoch": 0.30658823529411766, "grad_norm": 0.494121386361207, "learning_rate": 3.612942766893973e-06, "loss": 0.018846693634986877, "step": 32575 }, { "epoch": 0.30663529411764706, "grad_norm": 0.6312418465748221, "learning_rate": 3.612665511499401e-06, "loss": 0.021502488851547243, "step": 32580 }, { "epoch": 0.30668235294117646, "grad_norm": 0.5404893872362279, "learning_rate": 3.612388319924349e-06, "loss": 0.026266184449195863, "step": 32585 }, { "epoch": 0.30672941176470586, "grad_norm": 0.7088207984013782, "learning_rate": 3.6121111921443357e-06, "loss": 0.025349447131156923, "step": 32590 }, { "epoch": 0.3067764705882353, "grad_norm": 0.7400763919621094, "learning_rate": 3.6118341281348968e-06, "loss": 0.02153606414794922, "step": 32595 }, { "epoch": 0.3068235294117647, "grad_norm": 0.3735476693437813, "learning_rate": 3.6115571278715773e-06, "loss": 0.01864216923713684, "step": 32600 }, { "epoch": 0.3068705882352941, "grad_norm": 0.6816276592799618, "learning_rate": 3.6112801913299374e-06, "loss": 0.026764604449272155, "step": 32605 }, { "epoch": 0.3069176470588235, "grad_norm": 0.3754825358747778, "learning_rate": 3.6110033184855496e-06, "loss": 0.019930119812488555, "step": 32610 }, { "epoch": 0.3069647058823529, "grad_norm": 0.7615956912573792, "learning_rate": 3.6107265093139993e-06, "loss": 0.025914299488067626, "step": 32615 }, { "epoch": 0.3070117647058824, "grad_norm": 0.5774167638866451, "learning_rate": 3.6104497637908856e-06, "loss": 0.02068350315093994, "step": 32620 }, { "epoch": 0.3070588235294118, "grad_norm": 0.5369756636107688, "learning_rate": 3.6101730818918205e-06, "loss": 0.021015989780426025, "step": 32625 }, { "epoch": 0.3071058823529412, "grad_norm": 0.7978539953650224, "learning_rate": 3.6098964635924287e-06, "loss": 0.03001604676246643, "step": 32630 }, { "epoch": 0.3071529411764706, "grad_norm": 0.6493571872189859, "learning_rate": 3.609619908868349e-06, "loss": 0.01813690662384033, "step": 32635 }, { "epoch": 0.3072, "grad_norm": 0.5944335467197702, "learning_rate": 3.6093434176952313e-06, "loss": 0.02816084921360016, "step": 32640 }, { "epoch": 0.30724705882352943, "grad_norm": 0.3959029863271617, "learning_rate": 3.6090669900487414e-06, "loss": 0.025151267647743225, "step": 32645 }, { "epoch": 0.30729411764705883, "grad_norm": 0.42586269760123296, "learning_rate": 3.6087906259045552e-06, "loss": 0.022105297446250914, "step": 32650 }, { "epoch": 0.30734117647058823, "grad_norm": 0.48633988300638914, "learning_rate": 3.6085143252383636e-06, "loss": 0.02243908941745758, "step": 32655 }, { "epoch": 0.30738823529411763, "grad_norm": 0.47847974777981506, "learning_rate": 3.6082380880258698e-06, "loss": 0.01742241680622101, "step": 32660 }, { "epoch": 0.3074352941176471, "grad_norm": 0.5679660053523213, "learning_rate": 3.6079619142427907e-06, "loss": 0.01724977195262909, "step": 32665 }, { "epoch": 0.3074823529411765, "grad_norm": 0.4844886242283738, "learning_rate": 3.607685803864854e-06, "loss": 0.02070653289556503, "step": 32670 }, { "epoch": 0.3075294117647059, "grad_norm": 0.735722754530459, "learning_rate": 3.607409756867805e-06, "loss": 0.024699409306049348, "step": 32675 }, { "epoch": 0.3075764705882353, "grad_norm": 0.6292228503861148, "learning_rate": 3.607133773227396e-06, "loss": 0.021492895483970643, "step": 32680 }, { "epoch": 0.3076235294117647, "grad_norm": 0.49287323979583053, "learning_rate": 3.6068578529193972e-06, "loss": 0.01980857253074646, "step": 32685 }, { "epoch": 0.30767058823529414, "grad_norm": 0.49916709137314375, "learning_rate": 3.6065819959195884e-06, "loss": 0.02350029945373535, "step": 32690 }, { "epoch": 0.30771764705882354, "grad_norm": 0.5370471115875541, "learning_rate": 3.6063062022037656e-06, "loss": 0.024785833060741426, "step": 32695 }, { "epoch": 0.30776470588235294, "grad_norm": 0.5573417433290397, "learning_rate": 3.606030471747734e-06, "loss": 0.026929089426994325, "step": 32700 }, { "epoch": 0.30781176470588234, "grad_norm": 1.0712553772439646, "learning_rate": 3.605754804527316e-06, "loss": 0.027889692783355714, "step": 32705 }, { "epoch": 0.30785882352941174, "grad_norm": 0.5743316497584078, "learning_rate": 3.605479200518343e-06, "loss": 0.02011553943157196, "step": 32710 }, { "epoch": 0.3079058823529412, "grad_norm": 0.6115904812978292, "learning_rate": 3.6052036596966618e-06, "loss": 0.021671631932258607, "step": 32715 }, { "epoch": 0.3079529411764706, "grad_norm": 0.4367440576574606, "learning_rate": 3.6049281820381304e-06, "loss": 0.022224003076553346, "step": 32720 }, { "epoch": 0.308, "grad_norm": 0.43795611924197747, "learning_rate": 3.604652767518622e-06, "loss": 0.01883590966463089, "step": 32725 }, { "epoch": 0.3080470588235294, "grad_norm": 0.5208518860444045, "learning_rate": 3.604377416114021e-06, "loss": 0.025319784879684448, "step": 32730 }, { "epoch": 0.3080941176470588, "grad_norm": 0.4176480200233574, "learning_rate": 3.6041021278002243e-06, "loss": 0.017824719846248626, "step": 32735 }, { "epoch": 0.30814117647058825, "grad_norm": 0.5318881239556714, "learning_rate": 3.603826902553143e-06, "loss": 0.018518415093421937, "step": 32740 }, { "epoch": 0.30818823529411765, "grad_norm": 0.4565480283794255, "learning_rate": 3.6035517403487e-06, "loss": 0.0212859183549881, "step": 32745 }, { "epoch": 0.30823529411764705, "grad_norm": 0.5291988115489324, "learning_rate": 3.603276641162832e-06, "loss": 0.019927236437797546, "step": 32750 }, { "epoch": 0.30828235294117645, "grad_norm": 0.6026062788904535, "learning_rate": 3.6030016049714887e-06, "loss": 0.018297338485717775, "step": 32755 }, { "epoch": 0.3083294117647059, "grad_norm": 0.607920808769555, "learning_rate": 3.6027266317506317e-06, "loss": 0.02171064615249634, "step": 32760 }, { "epoch": 0.3083764705882353, "grad_norm": 0.5209344832442457, "learning_rate": 3.602451721476235e-06, "loss": 0.01984359323978424, "step": 32765 }, { "epoch": 0.3084235294117647, "grad_norm": 0.509278982893549, "learning_rate": 3.602176874124287e-06, "loss": 0.022770896553993225, "step": 32770 }, { "epoch": 0.3084705882352941, "grad_norm": 0.6359043124876087, "learning_rate": 3.601902089670788e-06, "loss": 0.017917245626449585, "step": 32775 }, { "epoch": 0.3085176470588235, "grad_norm": 0.8917710915360149, "learning_rate": 3.601627368091751e-06, "loss": 0.023907047510147095, "step": 32780 }, { "epoch": 0.30856470588235296, "grad_norm": 0.3815973140149107, "learning_rate": 3.6013527093632025e-06, "loss": 0.017684876918792725, "step": 32785 }, { "epoch": 0.30861176470588236, "grad_norm": 0.5608668839873285, "learning_rate": 3.601078113461181e-06, "loss": 0.018919980525970458, "step": 32790 }, { "epoch": 0.30865882352941176, "grad_norm": 0.4611683957804261, "learning_rate": 3.600803580361739e-06, "loss": 0.02429248094558716, "step": 32795 }, { "epoch": 0.30870588235294116, "grad_norm": 0.529462174383578, "learning_rate": 3.60052911004094e-06, "loss": 0.024785354733467102, "step": 32800 }, { "epoch": 0.30875294117647056, "grad_norm": 1.0181273396962685, "learning_rate": 3.6002547024748618e-06, "loss": 0.030973908305168153, "step": 32805 }, { "epoch": 0.3088, "grad_norm": 0.4709085837443725, "learning_rate": 3.599980357639594e-06, "loss": 0.024231454730033873, "step": 32810 }, { "epoch": 0.3088470588235294, "grad_norm": 0.5439395717471797, "learning_rate": 3.59970607551124e-06, "loss": 0.023393121361732484, "step": 32815 }, { "epoch": 0.3088941176470588, "grad_norm": 0.6055788974573327, "learning_rate": 3.599431856065914e-06, "loss": 0.02110520452260971, "step": 32820 }, { "epoch": 0.3089411764705882, "grad_norm": 0.4390396633918706, "learning_rate": 3.599157699279746e-06, "loss": 0.02173939496278763, "step": 32825 }, { "epoch": 0.3089882352941177, "grad_norm": 0.6315900474634635, "learning_rate": 3.598883605128875e-06, "loss": 0.0198258176445961, "step": 32830 }, { "epoch": 0.3090352941176471, "grad_norm": 0.4632452925246689, "learning_rate": 3.5986095735894554e-06, "loss": 0.021735405921936034, "step": 32835 }, { "epoch": 0.3090823529411765, "grad_norm": 0.7009939406914324, "learning_rate": 3.5983356046376544e-06, "loss": 0.028655388951301576, "step": 32840 }, { "epoch": 0.3091294117647059, "grad_norm": 0.5012748473986568, "learning_rate": 3.5980616982496503e-06, "loss": 0.023978886008262635, "step": 32845 }, { "epoch": 0.3091764705882353, "grad_norm": 0.4673157231057263, "learning_rate": 3.597787854401635e-06, "loss": 0.020672743022441865, "step": 32850 }, { "epoch": 0.30922352941176473, "grad_norm": 0.8237098304201278, "learning_rate": 3.597514073069813e-06, "loss": 0.020211343467235566, "step": 32855 }, { "epoch": 0.30927058823529413, "grad_norm": 0.5316874773454077, "learning_rate": 3.5972403542304003e-06, "loss": 0.019913774728775025, "step": 32860 }, { "epoch": 0.30931764705882353, "grad_norm": 0.6513964757487714, "learning_rate": 3.5969666978596286e-06, "loss": 0.019832393527030943, "step": 32865 }, { "epoch": 0.30936470588235293, "grad_norm": 0.6578431944197853, "learning_rate": 3.596693103933739e-06, "loss": 0.024713081121444703, "step": 32870 }, { "epoch": 0.30941176470588233, "grad_norm": 0.623254805801549, "learning_rate": 3.5964195724289873e-06, "loss": 0.0199002742767334, "step": 32875 }, { "epoch": 0.3094588235294118, "grad_norm": 0.4826050024104156, "learning_rate": 3.59614610332164e-06, "loss": 0.01959882378578186, "step": 32880 }, { "epoch": 0.3095058823529412, "grad_norm": 0.6678920061345686, "learning_rate": 3.595872696587979e-06, "loss": 0.02076745927333832, "step": 32885 }, { "epoch": 0.3095529411764706, "grad_norm": 5.518525900478496, "learning_rate": 3.5955993522042964e-06, "loss": 0.023621290922164917, "step": 32890 }, { "epoch": 0.3096, "grad_norm": 0.5680149113470484, "learning_rate": 3.5953260701468984e-06, "loss": 0.017950977385044097, "step": 32895 }, { "epoch": 0.3096470588235294, "grad_norm": 0.836048766853156, "learning_rate": 3.595052850392102e-06, "loss": 0.03180727958679199, "step": 32900 }, { "epoch": 0.30969411764705884, "grad_norm": 0.8221516724520695, "learning_rate": 3.594779692916239e-06, "loss": 0.022286951541900635, "step": 32905 }, { "epoch": 0.30974117647058824, "grad_norm": 0.4727350128750369, "learning_rate": 3.5945065976956522e-06, "loss": 0.019392500817775726, "step": 32910 }, { "epoch": 0.30978823529411764, "grad_norm": 0.5720460925630733, "learning_rate": 3.5942335647066983e-06, "loss": 0.021907603740692137, "step": 32915 }, { "epoch": 0.30983529411764704, "grad_norm": 0.9695258419259041, "learning_rate": 3.5939605939257442e-06, "loss": 0.021102511882781984, "step": 32920 }, { "epoch": 0.3098823529411765, "grad_norm": 0.6557502733197068, "learning_rate": 3.5936876853291736e-06, "loss": 0.023154979944229125, "step": 32925 }, { "epoch": 0.3099294117647059, "grad_norm": 0.6176143340870106, "learning_rate": 3.5934148388933775e-06, "loss": 0.019651851058006285, "step": 32930 }, { "epoch": 0.3099764705882353, "grad_norm": 0.6150997005422172, "learning_rate": 3.5931420545947643e-06, "loss": 0.018064548075199128, "step": 32935 }, { "epoch": 0.3100235294117647, "grad_norm": 0.5748545594350996, "learning_rate": 3.5928693324097506e-06, "loss": 0.020587369799613953, "step": 32940 }, { "epoch": 0.3100705882352941, "grad_norm": 1.0033050497315072, "learning_rate": 3.592596672314769e-06, "loss": 0.019155722856521607, "step": 32945 }, { "epoch": 0.31011764705882355, "grad_norm": 0.6519061661850246, "learning_rate": 3.592324074286263e-06, "loss": 0.02071240544319153, "step": 32950 }, { "epoch": 0.31016470588235295, "grad_norm": 0.5638889573234919, "learning_rate": 3.5920515383006887e-06, "loss": 0.0235548734664917, "step": 32955 }, { "epoch": 0.31021176470588235, "grad_norm": 0.44890118729638373, "learning_rate": 3.591779064334514e-06, "loss": 0.023189714550971983, "step": 32960 }, { "epoch": 0.31025882352941175, "grad_norm": 0.6049842090968621, "learning_rate": 3.5915066523642223e-06, "loss": 0.018107807636260985, "step": 32965 }, { "epoch": 0.31030588235294115, "grad_norm": 0.6136962452107172, "learning_rate": 3.5912343023663055e-06, "loss": 0.024529334902763367, "step": 32970 }, { "epoch": 0.3103529411764706, "grad_norm": 0.3706242892433496, "learning_rate": 3.5909620143172707e-06, "loss": 0.02259775847196579, "step": 32975 }, { "epoch": 0.3104, "grad_norm": 0.4183649890070336, "learning_rate": 3.590689788193636e-06, "loss": 0.021593642234802247, "step": 32980 }, { "epoch": 0.3104470588235294, "grad_norm": 0.7211289290033465, "learning_rate": 3.5904176239719322e-06, "loss": 0.028795069456100462, "step": 32985 }, { "epoch": 0.3104941176470588, "grad_norm": 0.462485068612771, "learning_rate": 3.590145521628705e-06, "loss": 0.019150134921073914, "step": 32990 }, { "epoch": 0.3105411764705882, "grad_norm": 0.6123719623202026, "learning_rate": 3.589873481140508e-06, "loss": 0.026046162843704222, "step": 32995 }, { "epoch": 0.31058823529411766, "grad_norm": 0.5046530794918958, "learning_rate": 3.5896015024839104e-06, "loss": 0.022463999688625336, "step": 33000 }, { "epoch": 0.31063529411764707, "grad_norm": 0.7489225107582436, "learning_rate": 3.5893295856354942e-06, "loss": 0.027396661043167115, "step": 33005 }, { "epoch": 0.31068235294117647, "grad_norm": 0.34704651470563563, "learning_rate": 3.589057730571851e-06, "loss": 0.02077782303094864, "step": 33010 }, { "epoch": 0.31072941176470587, "grad_norm": 0.5967379141114877, "learning_rate": 3.5887859372695877e-06, "loss": 0.017857103049755095, "step": 33015 }, { "epoch": 0.3107764705882353, "grad_norm": 0.5717082903132454, "learning_rate": 3.588514205705322e-06, "loss": 0.01999658942222595, "step": 33020 }, { "epoch": 0.3108235294117647, "grad_norm": 0.5939163373120442, "learning_rate": 3.5882425358556846e-06, "loss": 0.020925365388393402, "step": 33025 }, { "epoch": 0.3108705882352941, "grad_norm": 0.3969917491830044, "learning_rate": 3.587970927697318e-06, "loss": 0.01950763463973999, "step": 33030 }, { "epoch": 0.3109176470588235, "grad_norm": 0.4639325748633742, "learning_rate": 3.587699381206878e-06, "loss": 0.017903798818588258, "step": 33035 }, { "epoch": 0.3109647058823529, "grad_norm": 0.6170814386524984, "learning_rate": 3.5874278963610314e-06, "loss": 0.026663821935653687, "step": 33040 }, { "epoch": 0.3110117647058824, "grad_norm": 0.6795226208977032, "learning_rate": 3.5871564731364592e-06, "loss": 0.022851945459842683, "step": 33045 }, { "epoch": 0.3110588235294118, "grad_norm": 0.5796104172371304, "learning_rate": 3.586885111509853e-06, "loss": 0.018845759332180023, "step": 33050 }, { "epoch": 0.3111058823529412, "grad_norm": 0.5287961076869553, "learning_rate": 3.5866138114579174e-06, "loss": 0.020880603790283205, "step": 33055 }, { "epoch": 0.3111529411764706, "grad_norm": 0.544420970974496, "learning_rate": 3.586342572957371e-06, "loss": 0.027606019377708436, "step": 33060 }, { "epoch": 0.3112, "grad_norm": 0.5070778424883022, "learning_rate": 3.586071395984941e-06, "loss": 0.01764085441827774, "step": 33065 }, { "epoch": 0.31124705882352943, "grad_norm": 0.8031031755419494, "learning_rate": 3.5858002805173696e-06, "loss": 0.023173749446868896, "step": 33070 }, { "epoch": 0.31129411764705883, "grad_norm": 0.6652722522818527, "learning_rate": 3.5855292265314117e-06, "loss": 0.020018354058265686, "step": 33075 }, { "epoch": 0.31134117647058823, "grad_norm": 0.7447058160854347, "learning_rate": 3.585258234003832e-06, "loss": 0.022042790055274965, "step": 33080 }, { "epoch": 0.31138823529411763, "grad_norm": 0.49914090614320233, "learning_rate": 3.5849873029114107e-06, "loss": 0.022204461693763732, "step": 33085 }, { "epoch": 0.31143529411764703, "grad_norm": 0.5165454190837511, "learning_rate": 3.5847164332309376e-06, "loss": 0.02065320611000061, "step": 33090 }, { "epoch": 0.3114823529411765, "grad_norm": 0.5811391463682821, "learning_rate": 3.5844456249392167e-06, "loss": 0.025825053453445435, "step": 33095 }, { "epoch": 0.3115294117647059, "grad_norm": 0.41414753050882597, "learning_rate": 3.584174878013062e-06, "loss": 0.023341017961502075, "step": 33100 }, { "epoch": 0.3115764705882353, "grad_norm": 0.3974597090488642, "learning_rate": 3.583904192429302e-06, "loss": 0.017779289186000823, "step": 33105 }, { "epoch": 0.3116235294117647, "grad_norm": 0.58012350319704, "learning_rate": 3.583633568164776e-06, "loss": 0.031830206513404846, "step": 33110 }, { "epoch": 0.31167058823529414, "grad_norm": 0.49580441206027265, "learning_rate": 3.5833630051963374e-06, "loss": 0.018247607350349426, "step": 33115 }, { "epoch": 0.31171764705882354, "grad_norm": 0.4867133461731465, "learning_rate": 3.583092503500849e-06, "loss": 0.01607757806777954, "step": 33120 }, { "epoch": 0.31176470588235294, "grad_norm": 0.5887184947893862, "learning_rate": 3.5828220630551883e-06, "loss": 0.020114174485206603, "step": 33125 }, { "epoch": 0.31181176470588234, "grad_norm": 0.8623067658669971, "learning_rate": 3.582551683836244e-06, "loss": 0.027072817087173462, "step": 33130 }, { "epoch": 0.31185882352941174, "grad_norm": 1.1227500638549759, "learning_rate": 3.5822813658209164e-06, "loss": 0.023090837895870207, "step": 33135 }, { "epoch": 0.3119058823529412, "grad_norm": 0.6412502403375347, "learning_rate": 3.58201110898612e-06, "loss": 0.0216171532869339, "step": 33140 }, { "epoch": 0.3119529411764706, "grad_norm": 0.6966527314901895, "learning_rate": 3.5817409133087783e-06, "loss": 0.02166754603385925, "step": 33145 }, { "epoch": 0.312, "grad_norm": 0.9034653459945081, "learning_rate": 3.5814707787658305e-06, "loss": 0.02690749168395996, "step": 33150 }, { "epoch": 0.3120470588235294, "grad_norm": 0.6303299783243976, "learning_rate": 3.5812007053342253e-06, "loss": 0.022367982566356658, "step": 33155 }, { "epoch": 0.3120941176470588, "grad_norm": 0.6048591796922497, "learning_rate": 3.580930692990925e-06, "loss": 0.019593600928783417, "step": 33160 }, { "epoch": 0.31214117647058826, "grad_norm": 0.4263265324599612, "learning_rate": 3.5806607417129037e-06, "loss": 0.018148890137672423, "step": 33165 }, { "epoch": 0.31218823529411766, "grad_norm": 0.5174093081388009, "learning_rate": 3.580390851477148e-06, "loss": 0.017404147982597352, "step": 33170 }, { "epoch": 0.31223529411764706, "grad_norm": 0.5693228428978838, "learning_rate": 3.5801210222606554e-06, "loss": 0.020965036749839783, "step": 33175 }, { "epoch": 0.31228235294117646, "grad_norm": 0.5086524790642242, "learning_rate": 3.5798512540404367e-06, "loss": 0.02203517556190491, "step": 33180 }, { "epoch": 0.31232941176470586, "grad_norm": 0.41866641657286124, "learning_rate": 3.5795815467935146e-06, "loss": 0.021600356698036192, "step": 33185 }, { "epoch": 0.3123764705882353, "grad_norm": 0.48931596327632143, "learning_rate": 3.5793119004969235e-06, "loss": 0.024323147535324097, "step": 33190 }, { "epoch": 0.3124235294117647, "grad_norm": 0.7296685262238176, "learning_rate": 3.5790423151277116e-06, "loss": 0.01791633665561676, "step": 33195 }, { "epoch": 0.3124705882352941, "grad_norm": 0.7521723504330695, "learning_rate": 3.578772790662936e-06, "loss": 0.029124253988265993, "step": 33200 }, { "epoch": 0.3125176470588235, "grad_norm": 0.660410006805587, "learning_rate": 3.578503327079668e-06, "loss": 0.024266190826892853, "step": 33205 }, { "epoch": 0.31256470588235297, "grad_norm": 0.4794749970716243, "learning_rate": 3.5782339243549912e-06, "loss": 0.01953517198562622, "step": 33210 }, { "epoch": 0.31261176470588237, "grad_norm": 0.7085911644557986, "learning_rate": 3.577964582466001e-06, "loss": 0.02123730182647705, "step": 33215 }, { "epoch": 0.31265882352941177, "grad_norm": 1.0968884655277196, "learning_rate": 3.577695301389805e-06, "loss": 0.02281075865030289, "step": 33220 }, { "epoch": 0.31270588235294117, "grad_norm": 0.7568660674845172, "learning_rate": 3.577426081103521e-06, "loss": 0.024131561815738677, "step": 33225 }, { "epoch": 0.31275294117647057, "grad_norm": 0.6552260977319923, "learning_rate": 3.577156921584281e-06, "loss": 0.025684604048728944, "step": 33230 }, { "epoch": 0.3128, "grad_norm": 0.537512170457495, "learning_rate": 3.576887822809229e-06, "loss": 0.02156960666179657, "step": 33235 }, { "epoch": 0.3128470588235294, "grad_norm": 0.3967552615664324, "learning_rate": 3.5766187847555197e-06, "loss": 0.02264358550310135, "step": 33240 }, { "epoch": 0.3128941176470588, "grad_norm": 0.49721978077503265, "learning_rate": 3.5763498074003212e-06, "loss": 0.019068937003612518, "step": 33245 }, { "epoch": 0.3129411764705882, "grad_norm": 0.4139906226738791, "learning_rate": 3.576080890720812e-06, "loss": 0.02034142017364502, "step": 33250 }, { "epoch": 0.3129882352941176, "grad_norm": 0.47576723203952165, "learning_rate": 3.575812034694185e-06, "loss": 0.024669256806373597, "step": 33255 }, { "epoch": 0.3130352941176471, "grad_norm": 0.5392328831059052, "learning_rate": 3.5755432392976425e-06, "loss": 0.020881855487823488, "step": 33260 }, { "epoch": 0.3130823529411765, "grad_norm": 0.49066487272367026, "learning_rate": 3.5752745045084e-06, "loss": 0.03054651916027069, "step": 33265 }, { "epoch": 0.3131294117647059, "grad_norm": 0.7824806860405402, "learning_rate": 3.5750058303036857e-06, "loss": 0.021558044850826262, "step": 33270 }, { "epoch": 0.3131764705882353, "grad_norm": 0.47213483444738524, "learning_rate": 3.574737216660738e-06, "loss": 0.021610382199287414, "step": 33275 }, { "epoch": 0.3132235294117647, "grad_norm": 0.5096774068275695, "learning_rate": 3.574468663556809e-06, "loss": 0.020573779940605164, "step": 33280 }, { "epoch": 0.31327058823529413, "grad_norm": 0.6084195385424157, "learning_rate": 3.5742001709691632e-06, "loss": 0.01978103518486023, "step": 33285 }, { "epoch": 0.31331764705882353, "grad_norm": 0.8807337700371185, "learning_rate": 3.5739317388750735e-06, "loss": 0.020599104464054108, "step": 33290 }, { "epoch": 0.31336470588235293, "grad_norm": 0.8733474620763249, "learning_rate": 3.573663367251829e-06, "loss": 0.02180211842060089, "step": 33295 }, { "epoch": 0.31341176470588233, "grad_norm": 0.5823201045317767, "learning_rate": 3.573395056076728e-06, "loss": 0.027330148220062255, "step": 33300 }, { "epoch": 0.3134588235294118, "grad_norm": 0.44376399634192637, "learning_rate": 3.5731268053270816e-06, "loss": 0.023776575922966003, "step": 33305 }, { "epoch": 0.3135058823529412, "grad_norm": 0.5740633041592642, "learning_rate": 3.5728586149802138e-06, "loss": 0.017848211526870727, "step": 33310 }, { "epoch": 0.3135529411764706, "grad_norm": 0.5062634416917916, "learning_rate": 3.572590485013458e-06, "loss": 0.017631101608276366, "step": 33315 }, { "epoch": 0.3136, "grad_norm": 0.5325343980280473, "learning_rate": 3.5723224154041624e-06, "loss": 0.02532340884208679, "step": 33320 }, { "epoch": 0.3136470588235294, "grad_norm": 0.5886571707412135, "learning_rate": 3.572054406129686e-06, "loss": 0.021789346635341645, "step": 33325 }, { "epoch": 0.31369411764705885, "grad_norm": 0.9182034836967164, "learning_rate": 3.571786457167397e-06, "loss": 0.025580662488937377, "step": 33330 }, { "epoch": 0.31374117647058825, "grad_norm": 0.48465857619488684, "learning_rate": 3.5715185684946817e-06, "loss": 0.018315747380256653, "step": 33335 }, { "epoch": 0.31378823529411765, "grad_norm": 0.5511833074955155, "learning_rate": 3.5712507400889313e-06, "loss": 0.02244718074798584, "step": 33340 }, { "epoch": 0.31383529411764705, "grad_norm": 0.32362812936897284, "learning_rate": 3.570982971927554e-06, "loss": 0.015700146555900574, "step": 33345 }, { "epoch": 0.31388235294117645, "grad_norm": 0.6053633731168969, "learning_rate": 3.5707152639879673e-06, "loss": 0.020975276827812195, "step": 33350 }, { "epoch": 0.3139294117647059, "grad_norm": 0.4517876846666702, "learning_rate": 3.570447616247601e-06, "loss": 0.02137269824743271, "step": 33355 }, { "epoch": 0.3139764705882353, "grad_norm": 0.38815625886552935, "learning_rate": 3.5701800286838966e-06, "loss": 0.01861232966184616, "step": 33360 }, { "epoch": 0.3140235294117647, "grad_norm": 0.43431501721758, "learning_rate": 3.5699125012743085e-06, "loss": 0.04318981170654297, "step": 33365 }, { "epoch": 0.3140705882352941, "grad_norm": 0.5392957884947034, "learning_rate": 3.5696450339963016e-06, "loss": 0.021171325445175172, "step": 33370 }, { "epoch": 0.31411764705882356, "grad_norm": 0.48487214700095627, "learning_rate": 3.5693776268273534e-06, "loss": 0.025026118755340575, "step": 33375 }, { "epoch": 0.31416470588235296, "grad_norm": 0.9300924478913053, "learning_rate": 3.5691102797449536e-06, "loss": 0.02700181007385254, "step": 33380 }, { "epoch": 0.31421176470588236, "grad_norm": 0.7132375783552433, "learning_rate": 3.5688429927266015e-06, "loss": 0.02165449261665344, "step": 33385 }, { "epoch": 0.31425882352941176, "grad_norm": 1.0641434883466334, "learning_rate": 3.5685757657498114e-06, "loss": 0.019864626228809357, "step": 33390 }, { "epoch": 0.31430588235294116, "grad_norm": 0.6929127166173833, "learning_rate": 3.5683085987921074e-06, "loss": 0.018164192140102387, "step": 33395 }, { "epoch": 0.3143529411764706, "grad_norm": 0.42449617844906085, "learning_rate": 3.568041491831026e-06, "loss": 0.019212520122528075, "step": 33400 }, { "epoch": 0.3144, "grad_norm": 0.5716638438864747, "learning_rate": 3.5677744448441132e-06, "loss": 0.020199319720268248, "step": 33405 }, { "epoch": 0.3144470588235294, "grad_norm": 0.5684881209301363, "learning_rate": 3.5675074578089313e-06, "loss": 0.018746130168437958, "step": 33410 }, { "epoch": 0.3144941176470588, "grad_norm": 0.46240476531872815, "learning_rate": 3.5672405307030515e-06, "loss": 0.01945009231567383, "step": 33415 }, { "epoch": 0.3145411764705882, "grad_norm": 0.5122223765424678, "learning_rate": 3.566973663504056e-06, "loss": 0.023233914375305177, "step": 33420 }, { "epoch": 0.31458823529411767, "grad_norm": 0.6800746900094575, "learning_rate": 3.5667068561895397e-06, "loss": 0.024144795536994935, "step": 33425 }, { "epoch": 0.31463529411764707, "grad_norm": 0.5959698323648133, "learning_rate": 3.56644010873711e-06, "loss": 0.02008182406425476, "step": 33430 }, { "epoch": 0.31468235294117647, "grad_norm": 0.44597436331754453, "learning_rate": 3.5661734211243867e-06, "loss": 0.019658389687538146, "step": 33435 }, { "epoch": 0.31472941176470587, "grad_norm": 0.5974221337421818, "learning_rate": 3.5659067933289978e-06, "loss": 0.020675820112228394, "step": 33440 }, { "epoch": 0.31477647058823527, "grad_norm": 0.5957703425785225, "learning_rate": 3.5656402253285856e-06, "loss": 0.019487982988357543, "step": 33445 }, { "epoch": 0.3148235294117647, "grad_norm": 0.664315463924913, "learning_rate": 3.565373717100804e-06, "loss": 0.018120729923248292, "step": 33450 }, { "epoch": 0.3148705882352941, "grad_norm": 0.9629739459501843, "learning_rate": 3.565107268623319e-06, "loss": 0.026574546098709108, "step": 33455 }, { "epoch": 0.3149176470588235, "grad_norm": 0.6130402609263229, "learning_rate": 3.564840879873806e-06, "loss": 0.019193612039089203, "step": 33460 }, { "epoch": 0.3149647058823529, "grad_norm": 0.6449739187704698, "learning_rate": 3.5645745508299554e-06, "loss": 0.02266208827495575, "step": 33465 }, { "epoch": 0.3150117647058824, "grad_norm": 0.6458373090820915, "learning_rate": 3.564308281469466e-06, "loss": 0.019323718547821046, "step": 33470 }, { "epoch": 0.3150588235294118, "grad_norm": 0.5718864046597444, "learning_rate": 3.5640420717700516e-06, "loss": 0.02298728972673416, "step": 33475 }, { "epoch": 0.3151058823529412, "grad_norm": 0.5078578210519596, "learning_rate": 3.563775921709433e-06, "loss": 0.02363606244325638, "step": 33480 }, { "epoch": 0.3151529411764706, "grad_norm": 0.5635726655086497, "learning_rate": 3.5635098312653484e-06, "loss": 0.020532163977622985, "step": 33485 }, { "epoch": 0.3152, "grad_norm": 0.8668137249576086, "learning_rate": 3.563243800415542e-06, "loss": 0.022290021181106567, "step": 33490 }, { "epoch": 0.31524705882352944, "grad_norm": 0.9169931327488237, "learning_rate": 3.562977829137775e-06, "loss": 0.026471176743507387, "step": 33495 }, { "epoch": 0.31529411764705884, "grad_norm": 1.2719894131240452, "learning_rate": 3.562711917409816e-06, "loss": 0.024140635132789613, "step": 33500 }, { "epoch": 0.31534117647058824, "grad_norm": 0.5174195152235379, "learning_rate": 3.562446065209446e-06, "loss": 0.01940726339817047, "step": 33505 }, { "epoch": 0.31538823529411764, "grad_norm": 0.8353243605832115, "learning_rate": 3.56218027251446e-06, "loss": 0.023952464759349822, "step": 33510 }, { "epoch": 0.31543529411764704, "grad_norm": 0.8392198298290265, "learning_rate": 3.5619145393026627e-06, "loss": 0.0191007599234581, "step": 33515 }, { "epoch": 0.3154823529411765, "grad_norm": 0.676728998572337, "learning_rate": 3.56164886555187e-06, "loss": 0.022168779373168947, "step": 33520 }, { "epoch": 0.3155294117647059, "grad_norm": 0.5894089583775227, "learning_rate": 3.56138325123991e-06, "loss": 0.024037857353687287, "step": 33525 }, { "epoch": 0.3155764705882353, "grad_norm": 0.5019014342601908, "learning_rate": 3.561117696344622e-06, "loss": 0.031148749589920043, "step": 33530 }, { "epoch": 0.3156235294117647, "grad_norm": 0.5531559321164617, "learning_rate": 3.5608522008438586e-06, "loss": 0.029822897911071778, "step": 33535 }, { "epoch": 0.3156705882352941, "grad_norm": 0.992081721412206, "learning_rate": 3.5605867647154812e-06, "loss": 0.023596666753292084, "step": 33540 }, { "epoch": 0.31571764705882355, "grad_norm": 0.5480895092407453, "learning_rate": 3.5603213879373654e-06, "loss": 0.02094578295946121, "step": 33545 }, { "epoch": 0.31576470588235295, "grad_norm": 0.5376426058855177, "learning_rate": 3.560056070487396e-06, "loss": 0.023612667620182038, "step": 33550 }, { "epoch": 0.31581176470588235, "grad_norm": 0.4526178809111849, "learning_rate": 3.5597908123434718e-06, "loss": 0.02302628755569458, "step": 33555 }, { "epoch": 0.31585882352941175, "grad_norm": 0.9936426283883278, "learning_rate": 3.5595256134835e-06, "loss": 0.03046838045120239, "step": 33560 }, { "epoch": 0.3159058823529412, "grad_norm": 0.5921959248581239, "learning_rate": 3.559260473885402e-06, "loss": 0.019347232580184937, "step": 33565 }, { "epoch": 0.3159529411764706, "grad_norm": 0.9690349964297325, "learning_rate": 3.5589953935271106e-06, "loss": 0.027856606245040893, "step": 33570 }, { "epoch": 0.316, "grad_norm": 0.636100100794725, "learning_rate": 3.558730372386568e-06, "loss": 0.022859206795692442, "step": 33575 }, { "epoch": 0.3160470588235294, "grad_norm": 0.4887144150221197, "learning_rate": 3.5584654104417294e-06, "loss": 0.017935116589069367, "step": 33580 }, { "epoch": 0.3160941176470588, "grad_norm": 0.5566410066120281, "learning_rate": 3.5582005076705616e-06, "loss": 0.02048593759536743, "step": 33585 }, { "epoch": 0.31614117647058826, "grad_norm": 0.6054374827920689, "learning_rate": 3.5579356640510427e-06, "loss": 0.017552779614925386, "step": 33590 }, { "epoch": 0.31618823529411766, "grad_norm": 0.5389599847421901, "learning_rate": 3.557670879561162e-06, "loss": 0.02531256675720215, "step": 33595 }, { "epoch": 0.31623529411764706, "grad_norm": 0.5494006464318196, "learning_rate": 3.5574061541789206e-06, "loss": 0.023996226489543915, "step": 33600 }, { "epoch": 0.31628235294117646, "grad_norm": 0.7019373579195107, "learning_rate": 3.5571414878823297e-06, "loss": 0.02037666440010071, "step": 33605 }, { "epoch": 0.31632941176470586, "grad_norm": 0.5111039540551957, "learning_rate": 3.556876880649415e-06, "loss": 0.02079533040523529, "step": 33610 }, { "epoch": 0.3163764705882353, "grad_norm": 0.5187253579927574, "learning_rate": 3.55661233245821e-06, "loss": 0.025540223717689513, "step": 33615 }, { "epoch": 0.3164235294117647, "grad_norm": 0.6357670408107716, "learning_rate": 3.556347843286762e-06, "loss": 0.018423911929130555, "step": 33620 }, { "epoch": 0.3164705882352941, "grad_norm": 0.6768588827127562, "learning_rate": 3.556083413113129e-06, "loss": 0.02542162239551544, "step": 33625 }, { "epoch": 0.3165176470588235, "grad_norm": 0.6333600008955496, "learning_rate": 3.555819041915381e-06, "loss": 0.02223527729511261, "step": 33630 }, { "epoch": 0.3165647058823529, "grad_norm": 0.3926376150448869, "learning_rate": 3.555554729671599e-06, "loss": 0.024081598222255706, "step": 33635 }, { "epoch": 0.31661176470588237, "grad_norm": 0.52249750060561, "learning_rate": 3.555290476359875e-06, "loss": 0.024564284086227416, "step": 33640 }, { "epoch": 0.31665882352941177, "grad_norm": 0.4027845519907784, "learning_rate": 3.555026281958312e-06, "loss": 0.019633084535598755, "step": 33645 }, { "epoch": 0.31670588235294117, "grad_norm": 0.32620223645019925, "learning_rate": 3.5547621464450264e-06, "loss": 0.020490947365760803, "step": 33650 }, { "epoch": 0.31675294117647057, "grad_norm": 0.41278013141186093, "learning_rate": 3.5544980697981436e-06, "loss": 0.0184119313955307, "step": 33655 }, { "epoch": 0.3168, "grad_norm": 0.5143765072939286, "learning_rate": 3.5542340519958026e-06, "loss": 0.01989283561706543, "step": 33660 }, { "epoch": 0.3168470588235294, "grad_norm": 0.4737804520883669, "learning_rate": 3.553970093016152e-06, "loss": 0.016235591471195222, "step": 33665 }, { "epoch": 0.3168941176470588, "grad_norm": 0.6197106240236895, "learning_rate": 3.5537061928373518e-06, "loss": 0.020611663162708283, "step": 33670 }, { "epoch": 0.3169411764705882, "grad_norm": 0.6314054551294322, "learning_rate": 3.5534423514375752e-06, "loss": 0.021124809980392456, "step": 33675 }, { "epoch": 0.3169882352941176, "grad_norm": 0.43559202358919036, "learning_rate": 3.5531785687950043e-06, "loss": 0.020919115841388704, "step": 33680 }, { "epoch": 0.3170352941176471, "grad_norm": 0.6157024638050617, "learning_rate": 3.552914844887835e-06, "loss": 0.02087705135345459, "step": 33685 }, { "epoch": 0.3170823529411765, "grad_norm": 0.5574018330716354, "learning_rate": 3.5526511796942713e-06, "loss": 0.019803440570831297, "step": 33690 }, { "epoch": 0.3171294117647059, "grad_norm": 0.6608054036007593, "learning_rate": 3.552387573192533e-06, "loss": 0.028376320004463197, "step": 33695 }, { "epoch": 0.3171764705882353, "grad_norm": 0.5046936731022759, "learning_rate": 3.5521240253608467e-06, "loss": 0.02479294240474701, "step": 33700 }, { "epoch": 0.3172235294117647, "grad_norm": 0.49857250290499305, "learning_rate": 3.5518605361774532e-06, "loss": 0.020353242754936218, "step": 33705 }, { "epoch": 0.31727058823529414, "grad_norm": 0.568416820061012, "learning_rate": 3.5515971056206032e-06, "loss": 0.026983001828193666, "step": 33710 }, { "epoch": 0.31731764705882354, "grad_norm": 0.6505691725732728, "learning_rate": 3.5513337336685598e-06, "loss": 0.02019060552120209, "step": 33715 }, { "epoch": 0.31736470588235294, "grad_norm": 0.564929921274, "learning_rate": 3.551070420299596e-06, "loss": 0.019782087206840514, "step": 33720 }, { "epoch": 0.31741176470588234, "grad_norm": 0.5690970022536129, "learning_rate": 3.5508071654919974e-06, "loss": 0.02280597239732742, "step": 33725 }, { "epoch": 0.31745882352941174, "grad_norm": 0.41387515149366766, "learning_rate": 3.550543969224061e-06, "loss": 0.022019979357719422, "step": 33730 }, { "epoch": 0.3175058823529412, "grad_norm": 0.5688755798358346, "learning_rate": 3.5502808314740917e-06, "loss": 0.02293192744255066, "step": 33735 }, { "epoch": 0.3175529411764706, "grad_norm": 0.5487818570309038, "learning_rate": 3.550017752220411e-06, "loss": 0.0226372629404068, "step": 33740 }, { "epoch": 0.3176, "grad_norm": 0.5275602549199659, "learning_rate": 3.549754731441348e-06, "loss": 0.01789671927690506, "step": 33745 }, { "epoch": 0.3176470588235294, "grad_norm": 0.4797734346423732, "learning_rate": 3.5494917691152437e-06, "loss": 0.020012477040290834, "step": 33750 }, { "epoch": 0.31769411764705885, "grad_norm": 0.44781660623753394, "learning_rate": 3.549228865220451e-06, "loss": 0.02284264862537384, "step": 33755 }, { "epoch": 0.31774117647058825, "grad_norm": 0.5300061495183824, "learning_rate": 3.5489660197353338e-06, "loss": 0.02704018950462341, "step": 33760 }, { "epoch": 0.31778823529411765, "grad_norm": 0.7510904964289983, "learning_rate": 3.548703232638267e-06, "loss": 0.024933406710624696, "step": 33765 }, { "epoch": 0.31783529411764705, "grad_norm": 0.6960767904071381, "learning_rate": 3.5484405039076363e-06, "loss": 0.021027863025665283, "step": 33770 }, { "epoch": 0.31788235294117645, "grad_norm": 0.628576709785833, "learning_rate": 3.5481778335218393e-06, "loss": 0.02379482239484787, "step": 33775 }, { "epoch": 0.3179294117647059, "grad_norm": 0.46023351541933977, "learning_rate": 3.547915221459285e-06, "loss": 0.02239498496055603, "step": 33780 }, { "epoch": 0.3179764705882353, "grad_norm": 0.3948840772438702, "learning_rate": 3.5476526676983925e-06, "loss": 0.01930946111679077, "step": 33785 }, { "epoch": 0.3180235294117647, "grad_norm": 0.7801677589613791, "learning_rate": 3.5473901722175922e-06, "loss": 0.020502910017967224, "step": 33790 }, { "epoch": 0.3180705882352941, "grad_norm": 0.5898919895148556, "learning_rate": 3.547127734995328e-06, "loss": 0.02450893521308899, "step": 33795 }, { "epoch": 0.3181176470588235, "grad_norm": 0.6987570854859708, "learning_rate": 3.5468653560100517e-06, "loss": 0.023519334197044373, "step": 33800 }, { "epoch": 0.31816470588235296, "grad_norm": 0.5321686649805969, "learning_rate": 3.5466030352402284e-06, "loss": 0.021706312894821167, "step": 33805 }, { "epoch": 0.31821176470588236, "grad_norm": 0.4688902406615993, "learning_rate": 3.5463407726643334e-06, "loss": 0.018337355554103853, "step": 33810 }, { "epoch": 0.31825882352941176, "grad_norm": 0.6819251550339723, "learning_rate": 3.5460785682608527e-06, "loss": 0.02338533103466034, "step": 33815 }, { "epoch": 0.31830588235294116, "grad_norm": 0.5364548716563884, "learning_rate": 3.5458164220082853e-06, "loss": 0.021239827573299407, "step": 33820 }, { "epoch": 0.31835294117647056, "grad_norm": 0.6680170507834107, "learning_rate": 3.5455543338851395e-06, "loss": 0.0230252742767334, "step": 33825 }, { "epoch": 0.3184, "grad_norm": 0.525916413794038, "learning_rate": 3.5452923038699353e-06, "loss": 0.021594594419002532, "step": 33830 }, { "epoch": 0.3184470588235294, "grad_norm": 0.7959768590181988, "learning_rate": 3.5450303319412043e-06, "loss": 0.019079709053039552, "step": 33835 }, { "epoch": 0.3184941176470588, "grad_norm": 0.606978252394176, "learning_rate": 3.544768418077489e-06, "loss": 0.023292019963264465, "step": 33840 }, { "epoch": 0.3185411764705882, "grad_norm": 0.6001543309765531, "learning_rate": 3.5445065622573417e-06, "loss": 0.018707041442394257, "step": 33845 }, { "epoch": 0.31858823529411767, "grad_norm": 0.4497076240948196, "learning_rate": 3.5442447644593274e-06, "loss": 0.026357650756835938, "step": 33850 }, { "epoch": 0.31863529411764707, "grad_norm": 0.5660134252512173, "learning_rate": 3.543983024662023e-06, "loss": 0.021496105194091796, "step": 33855 }, { "epoch": 0.31868235294117647, "grad_norm": 0.5980495905913065, "learning_rate": 3.543721342844012e-06, "loss": 0.01957317888736725, "step": 33860 }, { "epoch": 0.31872941176470587, "grad_norm": 0.5472951365131905, "learning_rate": 3.5434597189838947e-06, "loss": 0.01869651973247528, "step": 33865 }, { "epoch": 0.3187764705882353, "grad_norm": 0.7655586657752939, "learning_rate": 3.5431981530602797e-06, "loss": 0.0331084817647934, "step": 33870 }, { "epoch": 0.31882352941176473, "grad_norm": 0.4953195301615439, "learning_rate": 3.542936645051786e-06, "loss": 0.017304690182209016, "step": 33875 }, { "epoch": 0.31887058823529413, "grad_norm": 0.47465749271294777, "learning_rate": 3.542675194937044e-06, "loss": 0.020706772804260254, "step": 33880 }, { "epoch": 0.31891764705882353, "grad_norm": 0.45500863003785424, "learning_rate": 3.5424138026946962e-06, "loss": 0.018466244637966155, "step": 33885 }, { "epoch": 0.31896470588235293, "grad_norm": 0.9868195996274474, "learning_rate": 3.542152468303396e-06, "loss": 0.023697569966316223, "step": 33890 }, { "epoch": 0.31901176470588233, "grad_norm": 0.6865762537551502, "learning_rate": 3.5418911917418063e-06, "loss": 0.01834302246570587, "step": 33895 }, { "epoch": 0.3190588235294118, "grad_norm": 0.43624213818281093, "learning_rate": 3.5416299729886035e-06, "loss": 0.01874624490737915, "step": 33900 }, { "epoch": 0.3191058823529412, "grad_norm": 0.4196331002884787, "learning_rate": 3.5413688120224727e-06, "loss": 0.01951223611831665, "step": 33905 }, { "epoch": 0.3191529411764706, "grad_norm": 0.6836919069703721, "learning_rate": 3.5411077088221097e-06, "loss": 0.021269023418426514, "step": 33910 }, { "epoch": 0.3192, "grad_norm": 0.755706903655584, "learning_rate": 3.540846663366225e-06, "loss": 0.024675263464450835, "step": 33915 }, { "epoch": 0.31924705882352944, "grad_norm": 0.437448867101354, "learning_rate": 3.5405856756335343e-06, "loss": 0.023754677176475524, "step": 33920 }, { "epoch": 0.31929411764705884, "grad_norm": 0.5478899319977825, "learning_rate": 3.54032474560277e-06, "loss": 0.018229837715625762, "step": 33925 }, { "epoch": 0.31934117647058824, "grad_norm": 0.6460283239491271, "learning_rate": 3.5400638732526732e-06, "loss": 0.01971112787723541, "step": 33930 }, { "epoch": 0.31938823529411764, "grad_norm": 0.6093554696358178, "learning_rate": 3.539803058561994e-06, "loss": 0.02274352014064789, "step": 33935 }, { "epoch": 0.31943529411764704, "grad_norm": 0.5150758343713804, "learning_rate": 3.539542301509496e-06, "loss": 0.021294498443603517, "step": 33940 }, { "epoch": 0.3194823529411765, "grad_norm": 0.5547950892622199, "learning_rate": 3.539281602073953e-06, "loss": 0.029087048768997193, "step": 33945 }, { "epoch": 0.3195294117647059, "grad_norm": 0.6092041202837755, "learning_rate": 3.5390209602341497e-06, "loss": 0.02189643830060959, "step": 33950 }, { "epoch": 0.3195764705882353, "grad_norm": 0.7040655803359175, "learning_rate": 3.5387603759688814e-06, "loss": 0.018944299221038817, "step": 33955 }, { "epoch": 0.3196235294117647, "grad_norm": 0.45688952322233956, "learning_rate": 3.5384998492569553e-06, "loss": 0.019012594223022462, "step": 33960 }, { "epoch": 0.3196705882352941, "grad_norm": 1.912549582045515, "learning_rate": 3.5382393800771892e-06, "loss": 0.01686747372150421, "step": 33965 }, { "epoch": 0.31971764705882355, "grad_norm": 0.7627529272371848, "learning_rate": 3.5379789684084094e-06, "loss": 0.020373800396919252, "step": 33970 }, { "epoch": 0.31976470588235295, "grad_norm": 0.9377079855998182, "learning_rate": 3.537718614229458e-06, "loss": 0.02481164038181305, "step": 33975 }, { "epoch": 0.31981176470588235, "grad_norm": 0.6141951459782539, "learning_rate": 3.5374583175191824e-06, "loss": 0.02208973467350006, "step": 33980 }, { "epoch": 0.31985882352941175, "grad_norm": 0.5168262581306706, "learning_rate": 3.5371980782564462e-06, "loss": 0.024435707926750184, "step": 33985 }, { "epoch": 0.31990588235294115, "grad_norm": 0.42727031130649257, "learning_rate": 3.5369378964201197e-06, "loss": 0.022958889603614807, "step": 33990 }, { "epoch": 0.3199529411764706, "grad_norm": 0.5812140824411939, "learning_rate": 3.536677771989086e-06, "loss": 0.024948710203170778, "step": 33995 }, { "epoch": 0.32, "grad_norm": 0.40024033242430757, "learning_rate": 3.5364177049422394e-06, "loss": 0.017555391788482665, "step": 34000 }, { "epoch": 0.3200470588235294, "grad_norm": 0.5184275832810619, "learning_rate": 3.536157695258485e-06, "loss": 0.018335530161857606, "step": 34005 }, { "epoch": 0.3200941176470588, "grad_norm": 0.5545095243038405, "learning_rate": 3.535897742916736e-06, "loss": 0.02115238308906555, "step": 34010 }, { "epoch": 0.32014117647058826, "grad_norm": 0.44828672828017085, "learning_rate": 3.535637847895921e-06, "loss": 0.018778708577156068, "step": 34015 }, { "epoch": 0.32018823529411766, "grad_norm": 0.7337381096405285, "learning_rate": 3.5353780101749755e-06, "loss": 0.020838981866836546, "step": 34020 }, { "epoch": 0.32023529411764706, "grad_norm": 0.475863831676581, "learning_rate": 3.535118229732849e-06, "loss": 0.01871952712535858, "step": 34025 }, { "epoch": 0.32028235294117646, "grad_norm": 0.5248891884214784, "learning_rate": 3.5348585065485e-06, "loss": 0.020864444971084594, "step": 34030 }, { "epoch": 0.32032941176470586, "grad_norm": 0.6677279739821786, "learning_rate": 3.534598840600897e-06, "loss": 0.019691364467144014, "step": 34035 }, { "epoch": 0.3203764705882353, "grad_norm": 0.8230116064451615, "learning_rate": 3.53433923186902e-06, "loss": 0.023743635416030882, "step": 34040 }, { "epoch": 0.3204235294117647, "grad_norm": 1.596455311584755, "learning_rate": 3.5340796803318626e-06, "loss": 0.023869800567626952, "step": 34045 }, { "epoch": 0.3204705882352941, "grad_norm": 0.6706737612488415, "learning_rate": 3.533820185968424e-06, "loss": 0.02690568268299103, "step": 34050 }, { "epoch": 0.3205176470588235, "grad_norm": 0.46536511804083286, "learning_rate": 3.5335607487577195e-06, "loss": 0.018410295248031616, "step": 34055 }, { "epoch": 0.3205647058823529, "grad_norm": 0.6456356030333956, "learning_rate": 3.5333013686787715e-06, "loss": 0.02188837081193924, "step": 34060 }, { "epoch": 0.3206117647058824, "grad_norm": 0.6133017859834542, "learning_rate": 3.5330420457106147e-06, "loss": 0.01887475997209549, "step": 34065 }, { "epoch": 0.3206588235294118, "grad_norm": 0.5088340400427143, "learning_rate": 3.5327827798322935e-06, "loss": 0.02605355381965637, "step": 34070 }, { "epoch": 0.3207058823529412, "grad_norm": 0.6831039705074664, "learning_rate": 3.5325235710228645e-06, "loss": 0.019355842471122743, "step": 34075 }, { "epoch": 0.3207529411764706, "grad_norm": 0.7090799763895405, "learning_rate": 3.5322644192613946e-06, "loss": 0.021093136072158812, "step": 34080 }, { "epoch": 0.3208, "grad_norm": 0.648215147925301, "learning_rate": 3.5320053245269604e-06, "loss": 0.02546754479408264, "step": 34085 }, { "epoch": 0.32084705882352943, "grad_norm": 0.6780726628951579, "learning_rate": 3.531746286798651e-06, "loss": 0.023643293976783754, "step": 34090 }, { "epoch": 0.32089411764705883, "grad_norm": 0.8335772458768117, "learning_rate": 3.5314873060555644e-06, "loss": 0.02406841367483139, "step": 34095 }, { "epoch": 0.32094117647058823, "grad_norm": 0.6212895898018224, "learning_rate": 3.531228382276811e-06, "loss": 0.01843252032995224, "step": 34100 }, { "epoch": 0.32098823529411763, "grad_norm": 1.0482832398738384, "learning_rate": 3.530969515441509e-06, "loss": 0.027814722061157225, "step": 34105 }, { "epoch": 0.3210352941176471, "grad_norm": 0.5879385167531492, "learning_rate": 3.530710705528793e-06, "loss": 0.020191000401973726, "step": 34110 }, { "epoch": 0.3210823529411765, "grad_norm": 0.40207865848916463, "learning_rate": 3.5304519525178015e-06, "loss": 0.024705398082733154, "step": 34115 }, { "epoch": 0.3211294117647059, "grad_norm": 0.6365611834839436, "learning_rate": 3.5301932563876893e-06, "loss": 0.02417839616537094, "step": 34120 }, { "epoch": 0.3211764705882353, "grad_norm": 0.45847347750264783, "learning_rate": 3.5299346171176185e-06, "loss": 0.018714481592178346, "step": 34125 }, { "epoch": 0.3212235294117647, "grad_norm": 0.5829129660437158, "learning_rate": 3.5296760346867632e-06, "loss": 0.020438317954540253, "step": 34130 }, { "epoch": 0.32127058823529414, "grad_norm": 0.6299441030760863, "learning_rate": 3.5294175090743076e-06, "loss": 0.023324364423751832, "step": 34135 }, { "epoch": 0.32131764705882354, "grad_norm": 0.553899982347729, "learning_rate": 3.5291590402594467e-06, "loss": 0.02205684781074524, "step": 34140 }, { "epoch": 0.32136470588235294, "grad_norm": 0.5988635288961672, "learning_rate": 3.528900628221386e-06, "loss": 0.019813673198223115, "step": 34145 }, { "epoch": 0.32141176470588234, "grad_norm": 0.4676051652134228, "learning_rate": 3.5286422729393435e-06, "loss": 0.026554948091506957, "step": 34150 }, { "epoch": 0.32145882352941174, "grad_norm": 0.5921368364135401, "learning_rate": 3.528383974392545e-06, "loss": 0.029837828874588013, "step": 34155 }, { "epoch": 0.3215058823529412, "grad_norm": 0.4191438640541292, "learning_rate": 3.5281257325602296e-06, "loss": 0.018539074063301086, "step": 34160 }, { "epoch": 0.3215529411764706, "grad_norm": 0.27597327013438544, "learning_rate": 3.5278675474216435e-06, "loss": 0.017192529141902925, "step": 34165 }, { "epoch": 0.3216, "grad_norm": 0.5379879274359627, "learning_rate": 3.5276094189560478e-06, "loss": 0.02002364844083786, "step": 34170 }, { "epoch": 0.3216470588235294, "grad_norm": 0.6041033234743631, "learning_rate": 3.5273513471427116e-06, "loss": 0.023167741298675538, "step": 34175 }, { "epoch": 0.3216941176470588, "grad_norm": 0.6600995494544658, "learning_rate": 3.5270933319609154e-06, "loss": 0.023405852913856506, "step": 34180 }, { "epoch": 0.32174117647058825, "grad_norm": 0.6394237588907897, "learning_rate": 3.526835373389949e-06, "loss": 0.0231770783662796, "step": 34185 }, { "epoch": 0.32178823529411765, "grad_norm": 0.5717060614667301, "learning_rate": 3.5265774714091143e-06, "loss": 0.027857667207717894, "step": 34190 }, { "epoch": 0.32183529411764705, "grad_norm": 0.49451155236549266, "learning_rate": 3.5263196259977243e-06, "loss": 0.02197968363761902, "step": 34195 }, { "epoch": 0.32188235294117645, "grad_norm": 0.6147452542469795, "learning_rate": 3.5260618371351013e-06, "loss": 0.02269526422023773, "step": 34200 }, { "epoch": 0.3219294117647059, "grad_norm": 0.5363617503432399, "learning_rate": 3.525804104800578e-06, "loss": 0.02315688133239746, "step": 34205 }, { "epoch": 0.3219764705882353, "grad_norm": 0.5286064505646221, "learning_rate": 3.5255464289734986e-06, "loss": 0.01920783072710037, "step": 34210 }, { "epoch": 0.3220235294117647, "grad_norm": 0.3988158732246992, "learning_rate": 3.5252888096332184e-06, "loss": 0.018728189170360565, "step": 34215 }, { "epoch": 0.3220705882352941, "grad_norm": 0.6282073498257839, "learning_rate": 3.5250312467591e-06, "loss": 0.02682938575744629, "step": 34220 }, { "epoch": 0.3221176470588235, "grad_norm": 0.672057985718158, "learning_rate": 3.524773740330521e-06, "loss": 0.028919577598571777, "step": 34225 }, { "epoch": 0.32216470588235296, "grad_norm": 0.9981610945399713, "learning_rate": 3.5245162903268665e-06, "loss": 0.015730416774749754, "step": 34230 }, { "epoch": 0.32221176470588236, "grad_norm": 0.7819905542159519, "learning_rate": 3.524258896727534e-06, "loss": 0.021476319432258605, "step": 34235 }, { "epoch": 0.32225882352941176, "grad_norm": 0.598754036811739, "learning_rate": 3.5240015595119292e-06, "loss": 0.016057462990283967, "step": 34240 }, { "epoch": 0.32230588235294116, "grad_norm": 0.6985386823111329, "learning_rate": 3.523744278659471e-06, "loss": 0.017594705522060394, "step": 34245 }, { "epoch": 0.32235294117647056, "grad_norm": 0.5676019781824658, "learning_rate": 3.5234870541495865e-06, "loss": 0.021122546494007112, "step": 34250 }, { "epoch": 0.3224, "grad_norm": 0.5019628040058378, "learning_rate": 3.523229885961716e-06, "loss": 0.02344392240047455, "step": 34255 }, { "epoch": 0.3224470588235294, "grad_norm": 0.618602359289596, "learning_rate": 3.522972774075307e-06, "loss": 0.026111331582069398, "step": 34260 }, { "epoch": 0.3224941176470588, "grad_norm": 0.5031080253086408, "learning_rate": 3.5227157184698185e-06, "loss": 0.02435125708580017, "step": 34265 }, { "epoch": 0.3225411764705882, "grad_norm": 0.5272248861430515, "learning_rate": 3.5224587191247235e-06, "loss": 0.023454515635967253, "step": 34270 }, { "epoch": 0.3225882352941176, "grad_norm": 0.5122513307914451, "learning_rate": 3.522201776019501e-06, "loss": 0.025995194911956787, "step": 34275 }, { "epoch": 0.3226352941176471, "grad_norm": 0.5339621598814444, "learning_rate": 3.5219448891336417e-06, "loss": 0.026309102773666382, "step": 34280 }, { "epoch": 0.3226823529411765, "grad_norm": 0.49061072716483867, "learning_rate": 3.5216880584466474e-06, "loss": 0.018721061944961547, "step": 34285 }, { "epoch": 0.3227294117647059, "grad_norm": 0.6586416955820062, "learning_rate": 3.5214312839380318e-06, "loss": 0.022057971358299254, "step": 34290 }, { "epoch": 0.3227764705882353, "grad_norm": 0.381959395960889, "learning_rate": 3.5211745655873142e-06, "loss": 0.019985245168209077, "step": 34295 }, { "epoch": 0.32282352941176473, "grad_norm": 0.4545756678790458, "learning_rate": 3.5209179033740306e-06, "loss": 0.018509328365325928, "step": 34300 }, { "epoch": 0.32287058823529413, "grad_norm": 0.49466726347165973, "learning_rate": 3.5206612972777227e-06, "loss": 0.02153298258781433, "step": 34305 }, { "epoch": 0.32291764705882353, "grad_norm": 0.4280750301594922, "learning_rate": 3.5204047472779447e-06, "loss": 0.017083409428596496, "step": 34310 }, { "epoch": 0.32296470588235293, "grad_norm": 0.8006674762021141, "learning_rate": 3.520148253354261e-06, "loss": 0.021720410883426668, "step": 34315 }, { "epoch": 0.32301176470588233, "grad_norm": 0.5276913666667659, "learning_rate": 3.519891815486246e-06, "loss": 0.018674635887145997, "step": 34320 }, { "epoch": 0.3230588235294118, "grad_norm": 0.45756993951535513, "learning_rate": 3.5196354336534853e-06, "loss": 0.022055205702781678, "step": 34325 }, { "epoch": 0.3231058823529412, "grad_norm": 0.5111209192816437, "learning_rate": 3.5193791078355745e-06, "loss": 0.017384530603885652, "step": 34330 }, { "epoch": 0.3231529411764706, "grad_norm": 0.5968871781398046, "learning_rate": 3.5191228380121184e-06, "loss": 0.023194268345832825, "step": 34335 }, { "epoch": 0.3232, "grad_norm": 0.4591557661655391, "learning_rate": 3.5188666241627345e-06, "loss": 0.021422001719474792, "step": 34340 }, { "epoch": 0.3232470588235294, "grad_norm": 0.5816416270334142, "learning_rate": 3.518610466267049e-06, "loss": 0.02168167233467102, "step": 34345 }, { "epoch": 0.32329411764705884, "grad_norm": 0.45541055335637287, "learning_rate": 3.518354364304698e-06, "loss": 0.019474689662456513, "step": 34350 }, { "epoch": 0.32334117647058824, "grad_norm": 0.4962460551779979, "learning_rate": 3.5180983182553307e-06, "loss": 0.027206462621688843, "step": 34355 }, { "epoch": 0.32338823529411764, "grad_norm": 0.5952690770866691, "learning_rate": 3.517842328098604e-06, "loss": 0.02257194072008133, "step": 34360 }, { "epoch": 0.32343529411764704, "grad_norm": 0.4087394753288221, "learning_rate": 3.5175863938141864e-06, "loss": 0.01989719271659851, "step": 34365 }, { "epoch": 0.32348235294117644, "grad_norm": 0.8345758088651369, "learning_rate": 3.5173305153817557e-06, "loss": 0.023378607630729676, "step": 34370 }, { "epoch": 0.3235294117647059, "grad_norm": 0.6942319543183922, "learning_rate": 3.517074692781001e-06, "loss": 0.019795504212379456, "step": 34375 }, { "epoch": 0.3235764705882353, "grad_norm": 0.672651201288344, "learning_rate": 3.5168189259916213e-06, "loss": 0.026530247926712037, "step": 34380 }, { "epoch": 0.3236235294117647, "grad_norm": 0.5100300252272101, "learning_rate": 3.5165632149933275e-06, "loss": 0.021790644526481627, "step": 34385 }, { "epoch": 0.3236705882352941, "grad_norm": 0.7087096031537319, "learning_rate": 3.516307559765838e-06, "loss": 0.017704638838768005, "step": 34390 }, { "epoch": 0.32371764705882355, "grad_norm": 0.7916584375313153, "learning_rate": 3.516051960288884e-06, "loss": 0.024207328259944916, "step": 34395 }, { "epoch": 0.32376470588235295, "grad_norm": 0.693945660336383, "learning_rate": 3.515796416542205e-06, "loss": 0.017381033301353453, "step": 34400 }, { "epoch": 0.32381176470588235, "grad_norm": 0.7687620361021437, "learning_rate": 3.515540928505552e-06, "loss": 0.023038479685783386, "step": 34405 }, { "epoch": 0.32385882352941175, "grad_norm": 0.5214743261300416, "learning_rate": 3.5152854961586865e-06, "loss": 0.023052647709846497, "step": 34410 }, { "epoch": 0.32390588235294115, "grad_norm": 1.0579762883845425, "learning_rate": 3.515030119481379e-06, "loss": 0.022553619742393494, "step": 34415 }, { "epoch": 0.3239529411764706, "grad_norm": 0.7106123090098935, "learning_rate": 3.5147747984534126e-06, "loss": 0.021176284551620482, "step": 34420 }, { "epoch": 0.324, "grad_norm": 0.6265438576380005, "learning_rate": 3.514519533054579e-06, "loss": 0.02182827889919281, "step": 34425 }, { "epoch": 0.3240470588235294, "grad_norm": 0.552880719188199, "learning_rate": 3.514264323264679e-06, "loss": 0.022613808512687683, "step": 34430 }, { "epoch": 0.3240941176470588, "grad_norm": 0.45608913878056606, "learning_rate": 3.514009169063526e-06, "loss": 0.022749817371368407, "step": 34435 }, { "epoch": 0.3241411764705882, "grad_norm": 0.7079627814406834, "learning_rate": 3.513754070430943e-06, "loss": 0.021522608399391175, "step": 34440 }, { "epoch": 0.32418823529411767, "grad_norm": 0.58286873101656, "learning_rate": 3.513499027346763e-06, "loss": 0.02106103003025055, "step": 34445 }, { "epoch": 0.32423529411764707, "grad_norm": 0.6143322481403197, "learning_rate": 3.5132440397908284e-06, "loss": 0.01928810924291611, "step": 34450 }, { "epoch": 0.32428235294117647, "grad_norm": 0.49392503980312014, "learning_rate": 3.5129891077429932e-06, "loss": 0.019459787011146545, "step": 34455 }, { "epoch": 0.32432941176470587, "grad_norm": 0.7362306844565186, "learning_rate": 3.5127342311831226e-06, "loss": 0.025205686688423157, "step": 34460 }, { "epoch": 0.3243764705882353, "grad_norm": 0.5267605984443361, "learning_rate": 3.5124794100910877e-06, "loss": 0.02385745644569397, "step": 34465 }, { "epoch": 0.3244235294117647, "grad_norm": 0.5462533874654965, "learning_rate": 3.5122246444467744e-06, "loss": 0.019258111715316772, "step": 34470 }, { "epoch": 0.3244705882352941, "grad_norm": 0.35736834595594985, "learning_rate": 3.5119699342300774e-06, "loss": 0.01745385080575943, "step": 34475 }, { "epoch": 0.3245176470588235, "grad_norm": 0.4516115896288949, "learning_rate": 3.5117152794209e-06, "loss": 0.02477289140224457, "step": 34480 }, { "epoch": 0.3245647058823529, "grad_norm": 0.5329428991409655, "learning_rate": 3.511460679999157e-06, "loss": 0.022707179188728333, "step": 34485 }, { "epoch": 0.3246117647058824, "grad_norm": 0.6371773826025072, "learning_rate": 3.5112061359447745e-06, "loss": 0.019624596834182738, "step": 34490 }, { "epoch": 0.3246588235294118, "grad_norm": 0.5051777219684697, "learning_rate": 3.510951647237687e-06, "loss": 0.021881569921970368, "step": 34495 }, { "epoch": 0.3247058823529412, "grad_norm": 0.45850781180075384, "learning_rate": 3.5106972138578403e-06, "loss": 0.016018152236938477, "step": 34500 }, { "epoch": 0.3247529411764706, "grad_norm": 0.607518880249296, "learning_rate": 3.510442835785189e-06, "loss": 0.020276957750320436, "step": 34505 }, { "epoch": 0.3248, "grad_norm": 0.502982412366792, "learning_rate": 3.5101885129996993e-06, "loss": 0.019371329247951506, "step": 34510 }, { "epoch": 0.32484705882352943, "grad_norm": 0.5521959093625309, "learning_rate": 3.509934245481347e-06, "loss": 0.01900689899921417, "step": 34515 }, { "epoch": 0.32489411764705883, "grad_norm": 0.4862880613003685, "learning_rate": 3.5096800332101183e-06, "loss": 0.02096288651227951, "step": 34520 }, { "epoch": 0.32494117647058823, "grad_norm": 0.6232987955634057, "learning_rate": 3.5094258761660092e-06, "loss": 0.01888470947742462, "step": 34525 }, { "epoch": 0.32498823529411763, "grad_norm": 0.5096394376798692, "learning_rate": 3.509171774329025e-06, "loss": 0.023317751288414002, "step": 34530 }, { "epoch": 0.32503529411764703, "grad_norm": 0.6120665329846359, "learning_rate": 3.5089177276791836e-06, "loss": 0.02546640336513519, "step": 34535 }, { "epoch": 0.3250823529411765, "grad_norm": 0.5032875967802671, "learning_rate": 3.5086637361965105e-06, "loss": 0.021145492792129517, "step": 34540 }, { "epoch": 0.3251294117647059, "grad_norm": 0.3967859194885601, "learning_rate": 3.5084097998610427e-06, "loss": 0.018869104981422424, "step": 34545 }, { "epoch": 0.3251764705882353, "grad_norm": 0.6423107820245008, "learning_rate": 3.508155918652827e-06, "loss": 0.02322083115577698, "step": 34550 }, { "epoch": 0.3252235294117647, "grad_norm": 0.6492020423770971, "learning_rate": 3.50790209255192e-06, "loss": 0.024496200680732726, "step": 34555 }, { "epoch": 0.32527058823529414, "grad_norm": 0.5762125128940306, "learning_rate": 3.5076483215383893e-06, "loss": 0.02718568444252014, "step": 34560 }, { "epoch": 0.32531764705882354, "grad_norm": 0.5567679382286679, "learning_rate": 3.507394605592311e-06, "loss": 0.026124289631843566, "step": 34565 }, { "epoch": 0.32536470588235294, "grad_norm": 0.40819174946423775, "learning_rate": 3.5071409446937726e-06, "loss": 0.020629243552684785, "step": 34570 }, { "epoch": 0.32541176470588234, "grad_norm": 0.6835146592438115, "learning_rate": 3.506887338822872e-06, "loss": 0.02337593734264374, "step": 34575 }, { "epoch": 0.32545882352941174, "grad_norm": 0.31212155939596575, "learning_rate": 3.5066337879597145e-06, "loss": 0.018363985419273376, "step": 34580 }, { "epoch": 0.3255058823529412, "grad_norm": 0.5887975262067437, "learning_rate": 3.5063802920844205e-06, "loss": 0.019956380128860474, "step": 34585 }, { "epoch": 0.3255529411764706, "grad_norm": 0.7130758189140091, "learning_rate": 3.506126851177114e-06, "loss": 0.022209402918815613, "step": 34590 }, { "epoch": 0.3256, "grad_norm": 0.5055974459784303, "learning_rate": 3.5058734652179356e-06, "loss": 0.026389425992965697, "step": 34595 }, { "epoch": 0.3256470588235294, "grad_norm": 0.5402997749001301, "learning_rate": 3.5056201341870303e-06, "loss": 0.02492443323135376, "step": 34600 }, { "epoch": 0.3256941176470588, "grad_norm": 0.4515306112320672, "learning_rate": 3.5053668580645577e-06, "loss": 0.027010589838027954, "step": 34605 }, { "epoch": 0.32574117647058826, "grad_norm": 0.5858038677859212, "learning_rate": 3.5051136368306835e-06, "loss": 0.019087690114974975, "step": 34610 }, { "epoch": 0.32578823529411766, "grad_norm": 0.598678484383618, "learning_rate": 3.5048604704655866e-06, "loss": 0.023294723033905028, "step": 34615 }, { "epoch": 0.32583529411764706, "grad_norm": 0.3872924223908508, "learning_rate": 3.504607358949455e-06, "loss": 0.018640395998954774, "step": 34620 }, { "epoch": 0.32588235294117646, "grad_norm": 0.6632913513379748, "learning_rate": 3.5043543022624844e-06, "loss": 0.026130950450897215, "step": 34625 }, { "epoch": 0.32592941176470586, "grad_norm": 0.7969877254183645, "learning_rate": 3.5041013003848847e-06, "loss": 0.02302561402320862, "step": 34630 }, { "epoch": 0.3259764705882353, "grad_norm": 0.6273964677523399, "learning_rate": 3.5038483532968713e-06, "loss": 0.024535053968429567, "step": 34635 }, { "epoch": 0.3260235294117647, "grad_norm": 0.5470098090767894, "learning_rate": 3.5035954609786744e-06, "loss": 0.02427527606487274, "step": 34640 }, { "epoch": 0.3260705882352941, "grad_norm": 0.5891657732158516, "learning_rate": 3.503342623410529e-06, "loss": 0.023018956184387207, "step": 34645 }, { "epoch": 0.3261176470588235, "grad_norm": 0.7811380482495616, "learning_rate": 3.5030898405726844e-06, "loss": 0.02864103317260742, "step": 34650 }, { "epoch": 0.32616470588235297, "grad_norm": 0.4646041793900667, "learning_rate": 3.5028371124453976e-06, "loss": 0.029047197103500365, "step": 34655 }, { "epoch": 0.32621176470588237, "grad_norm": 0.52102668004241, "learning_rate": 3.502584439008937e-06, "loss": 0.026831912994384765, "step": 34660 }, { "epoch": 0.32625882352941177, "grad_norm": 0.42600616371130245, "learning_rate": 3.5023318202435793e-06, "loss": 0.017162859439849854, "step": 34665 }, { "epoch": 0.32630588235294117, "grad_norm": 0.5201354231950849, "learning_rate": 3.5020792561296117e-06, "loss": 0.01954202800989151, "step": 34670 }, { "epoch": 0.32635294117647057, "grad_norm": 0.5621203081899222, "learning_rate": 3.501826746647332e-06, "loss": 0.022650396823883055, "step": 34675 }, { "epoch": 0.3264, "grad_norm": 0.627831361424907, "learning_rate": 3.5015742917770478e-06, "loss": 0.023749500513076782, "step": 34680 }, { "epoch": 0.3264470588235294, "grad_norm": 0.40705484538686265, "learning_rate": 3.501321891499077e-06, "loss": 0.017508864402770996, "step": 34685 }, { "epoch": 0.3264941176470588, "grad_norm": 0.5216479313719029, "learning_rate": 3.501069545793745e-06, "loss": 0.027084040641784667, "step": 34690 }, { "epoch": 0.3265411764705882, "grad_norm": 0.7236347147487237, "learning_rate": 3.5008172546413904e-06, "loss": 0.02099277973175049, "step": 34695 }, { "epoch": 0.3265882352941176, "grad_norm": 0.38944488530881527, "learning_rate": 3.5005650180223603e-06, "loss": 0.01647166758775711, "step": 34700 }, { "epoch": 0.3266352941176471, "grad_norm": 0.46660719292468017, "learning_rate": 3.500312835917011e-06, "loss": 0.02182219922542572, "step": 34705 }, { "epoch": 0.3266823529411765, "grad_norm": 0.5186650142289753, "learning_rate": 3.5000607083057094e-06, "loss": 0.019575148820877075, "step": 34710 }, { "epoch": 0.3267294117647059, "grad_norm": 0.5631212268108156, "learning_rate": 3.4998086351688327e-06, "loss": 0.022528937458992003, "step": 34715 }, { "epoch": 0.3267764705882353, "grad_norm": 0.40304808140445797, "learning_rate": 3.499556616486768e-06, "loss": 0.019439259171485902, "step": 34720 }, { "epoch": 0.3268235294117647, "grad_norm": 0.4529502084148167, "learning_rate": 3.499304652239911e-06, "loss": 0.01956833600997925, "step": 34725 }, { "epoch": 0.32687058823529413, "grad_norm": 0.6618438339238774, "learning_rate": 3.499052742408668e-06, "loss": 0.02295074164867401, "step": 34730 }, { "epoch": 0.32691764705882354, "grad_norm": 0.4579054946018038, "learning_rate": 3.4988008869734567e-06, "loss": 0.024999135732650758, "step": 34735 }, { "epoch": 0.32696470588235294, "grad_norm": 0.531854795990262, "learning_rate": 3.498549085914702e-06, "loss": 0.019861763715744017, "step": 34740 }, { "epoch": 0.32701176470588234, "grad_norm": 0.5339318360916856, "learning_rate": 3.49829733921284e-06, "loss": 0.021097156405448913, "step": 34745 }, { "epoch": 0.3270588235294118, "grad_norm": 0.6168960996653309, "learning_rate": 3.498045646848317e-06, "loss": 0.019890877604484557, "step": 34750 }, { "epoch": 0.3271058823529412, "grad_norm": 0.534294576037517, "learning_rate": 3.4977940088015884e-06, "loss": 0.01874067783355713, "step": 34755 }, { "epoch": 0.3271529411764706, "grad_norm": 0.4090835263944643, "learning_rate": 3.497542425053121e-06, "loss": 0.020809704065322877, "step": 34760 }, { "epoch": 0.3272, "grad_norm": 0.5249405513777384, "learning_rate": 3.4972908955833885e-06, "loss": 0.02192918658256531, "step": 34765 }, { "epoch": 0.3272470588235294, "grad_norm": 0.6807933630056298, "learning_rate": 3.4970394203728774e-06, "loss": 0.020653486251831055, "step": 34770 }, { "epoch": 0.32729411764705885, "grad_norm": 0.6236742385266119, "learning_rate": 3.496787999402081e-06, "loss": 0.02364514470100403, "step": 34775 }, { "epoch": 0.32734117647058825, "grad_norm": 1.0179222389112175, "learning_rate": 3.4965366326515066e-06, "loss": 0.02002483606338501, "step": 34780 }, { "epoch": 0.32738823529411765, "grad_norm": 0.7624483771538243, "learning_rate": 3.4962853201016672e-06, "loss": 0.022679357230663298, "step": 34785 }, { "epoch": 0.32743529411764705, "grad_norm": 0.5788348153691261, "learning_rate": 3.4960340617330887e-06, "loss": 0.02576553225517273, "step": 34790 }, { "epoch": 0.32748235294117645, "grad_norm": 0.4371368455892259, "learning_rate": 3.4957828575263035e-06, "loss": 0.02100345343351364, "step": 34795 }, { "epoch": 0.3275294117647059, "grad_norm": 0.45395713833642815, "learning_rate": 3.495531707461857e-06, "loss": 0.021601146459579466, "step": 34800 }, { "epoch": 0.3275764705882353, "grad_norm": 0.6608490134306308, "learning_rate": 3.4952806115203026e-06, "loss": 0.019201412796974182, "step": 34805 }, { "epoch": 0.3276235294117647, "grad_norm": 0.5946640396794473, "learning_rate": 3.4950295696822046e-06, "loss": 0.02028650641441345, "step": 34810 }, { "epoch": 0.3276705882352941, "grad_norm": 0.5040967701883431, "learning_rate": 3.4947785819281356e-06, "loss": 0.023463504016399385, "step": 34815 }, { "epoch": 0.3277176470588235, "grad_norm": 0.4117164259785328, "learning_rate": 3.4945276482386787e-06, "loss": 0.01989678144454956, "step": 34820 }, { "epoch": 0.32776470588235296, "grad_norm": 0.803026428233821, "learning_rate": 3.4942767685944267e-06, "loss": 0.023217347264289857, "step": 34825 }, { "epoch": 0.32781176470588236, "grad_norm": 0.43964090501945047, "learning_rate": 3.4940259429759838e-06, "loss": 0.020132812857627868, "step": 34830 }, { "epoch": 0.32785882352941176, "grad_norm": 0.5825178363652747, "learning_rate": 3.4937751713639605e-06, "loss": 0.022456344962120057, "step": 34835 }, { "epoch": 0.32790588235294116, "grad_norm": 0.7319332037948318, "learning_rate": 3.49352445373898e-06, "loss": 0.02071947604417801, "step": 34840 }, { "epoch": 0.3279529411764706, "grad_norm": 0.46484367839375274, "learning_rate": 3.493273790081675e-06, "loss": 0.024230197072029114, "step": 34845 }, { "epoch": 0.328, "grad_norm": 0.6548331767411752, "learning_rate": 3.4930231803726857e-06, "loss": 0.018556150794029235, "step": 34850 }, { "epoch": 0.3280470588235294, "grad_norm": 0.7190041925123065, "learning_rate": 3.492772624592663e-06, "loss": 0.02151113301515579, "step": 34855 }, { "epoch": 0.3280941176470588, "grad_norm": 0.48260886758436256, "learning_rate": 3.4925221227222692e-06, "loss": 0.018942616879940033, "step": 34860 }, { "epoch": 0.3281411764705882, "grad_norm": 0.6690632787765318, "learning_rate": 3.4922716747421753e-06, "loss": 0.027808332443237306, "step": 34865 }, { "epoch": 0.32818823529411767, "grad_norm": 0.5969285610314495, "learning_rate": 3.4920212806330613e-06, "loss": 0.027639690041542053, "step": 34870 }, { "epoch": 0.32823529411764707, "grad_norm": 0.5152710397268838, "learning_rate": 3.491770940375616e-06, "loss": 0.019528047740459444, "step": 34875 }, { "epoch": 0.32828235294117647, "grad_norm": 0.6287958761855407, "learning_rate": 3.491520653950542e-06, "loss": 0.017995645105838776, "step": 34880 }, { "epoch": 0.32832941176470587, "grad_norm": 0.6077793522247686, "learning_rate": 3.491270421338546e-06, "loss": 0.020592695474624632, "step": 34885 }, { "epoch": 0.32837647058823527, "grad_norm": 0.6055318148569784, "learning_rate": 3.4910202425203493e-06, "loss": 0.022012192010879516, "step": 34890 }, { "epoch": 0.3284235294117647, "grad_norm": 0.6703276132318737, "learning_rate": 3.4907701174766797e-06, "loss": 0.021612006425857543, "step": 34895 }, { "epoch": 0.3284705882352941, "grad_norm": 0.5243054526108936, "learning_rate": 3.490520046188276e-06, "loss": 0.02765944004058838, "step": 34900 }, { "epoch": 0.3285176470588235, "grad_norm": 0.5820430849503104, "learning_rate": 3.4902700286358867e-06, "loss": 0.023157736659049986, "step": 34905 }, { "epoch": 0.3285647058823529, "grad_norm": 0.5378565064930976, "learning_rate": 3.4900200648002687e-06, "loss": 0.022458434104919434, "step": 34910 }, { "epoch": 0.3286117647058823, "grad_norm": 0.8204234817664773, "learning_rate": 3.489770154662191e-06, "loss": 0.01983034610748291, "step": 34915 }, { "epoch": 0.3286588235294118, "grad_norm": 0.5109960851730175, "learning_rate": 3.4895202982024294e-06, "loss": 0.021044078469276428, "step": 34920 }, { "epoch": 0.3287058823529412, "grad_norm": 0.6082780984683782, "learning_rate": 3.489270495401771e-06, "loss": 0.01902327537536621, "step": 34925 }, { "epoch": 0.3287529411764706, "grad_norm": 0.5462458138679552, "learning_rate": 3.489020746241013e-06, "loss": 0.019665053486824034, "step": 34930 }, { "epoch": 0.3288, "grad_norm": 0.48215741253765887, "learning_rate": 3.48877105070096e-06, "loss": 0.023888060450553895, "step": 34935 }, { "epoch": 0.32884705882352944, "grad_norm": 0.431863171166551, "learning_rate": 3.488521408762429e-06, "loss": 0.018759576976299285, "step": 34940 }, { "epoch": 0.32889411764705884, "grad_norm": 1.351760897132769, "learning_rate": 3.488271820406244e-06, "loss": 0.01823505461215973, "step": 34945 }, { "epoch": 0.32894117647058824, "grad_norm": 0.5700007055345007, "learning_rate": 3.488022285613242e-06, "loss": 0.026168900728225707, "step": 34950 }, { "epoch": 0.32898823529411764, "grad_norm": 0.6981546633995317, "learning_rate": 3.487772804364264e-06, "loss": 0.021993517875671387, "step": 34955 }, { "epoch": 0.32903529411764704, "grad_norm": 0.6017512532420582, "learning_rate": 3.4875233766401673e-06, "loss": 0.02087467610836029, "step": 34960 }, { "epoch": 0.3290823529411765, "grad_norm": 0.44079236672185534, "learning_rate": 3.487274002421813e-06, "loss": 0.016685789823532103, "step": 34965 }, { "epoch": 0.3291294117647059, "grad_norm": 0.5195572705879941, "learning_rate": 3.4870246816900768e-06, "loss": 0.020814383029937746, "step": 34970 }, { "epoch": 0.3291764705882353, "grad_norm": 0.4708920434221267, "learning_rate": 3.486775414425839e-06, "loss": 0.018530870974063873, "step": 34975 }, { "epoch": 0.3292235294117647, "grad_norm": 0.4848775061069281, "learning_rate": 3.4865262006099935e-06, "loss": 0.017795202136039735, "step": 34980 }, { "epoch": 0.3292705882352941, "grad_norm": 0.5618337413550539, "learning_rate": 3.4862770402234415e-06, "loss": 0.019312840700149537, "step": 34985 }, { "epoch": 0.32931764705882355, "grad_norm": 0.8440680203817057, "learning_rate": 3.486027933247094e-06, "loss": 0.026473766565322875, "step": 34990 }, { "epoch": 0.32936470588235295, "grad_norm": 0.5061021321160312, "learning_rate": 3.4857788796618735e-06, "loss": 0.022602739930152892, "step": 34995 }, { "epoch": 0.32941176470588235, "grad_norm": 0.588908398514784, "learning_rate": 3.4855298794487085e-06, "loss": 0.025517746806144714, "step": 35000 }, { "epoch": 0.32945882352941175, "grad_norm": 0.5313691536315339, "learning_rate": 3.485280932588541e-06, "loss": 0.0259145587682724, "step": 35005 }, { "epoch": 0.3295058823529412, "grad_norm": 0.5225936118141975, "learning_rate": 3.4850320390623196e-06, "loss": 0.0178339421749115, "step": 35010 }, { "epoch": 0.3295529411764706, "grad_norm": 0.6126480515192657, "learning_rate": 3.4847831988510033e-06, "loss": 0.023578931391239167, "step": 35015 }, { "epoch": 0.3296, "grad_norm": 0.5073529791723048, "learning_rate": 3.4845344119355613e-06, "loss": 0.02093341052532196, "step": 35020 }, { "epoch": 0.3296470588235294, "grad_norm": 0.5666334258966612, "learning_rate": 3.484285678296971e-06, "loss": 0.017591646313667296, "step": 35025 }, { "epoch": 0.3296941176470588, "grad_norm": 0.6268497164152214, "learning_rate": 3.48403699791622e-06, "loss": 0.02436627894639969, "step": 35030 }, { "epoch": 0.32974117647058826, "grad_norm": 0.5196570982439899, "learning_rate": 3.4837883707743062e-06, "loss": 0.017469456791877745, "step": 35035 }, { "epoch": 0.32978823529411766, "grad_norm": 0.6638653611196405, "learning_rate": 3.483539796852236e-06, "loss": 0.0239724338054657, "step": 35040 }, { "epoch": 0.32983529411764706, "grad_norm": 0.6001974731249997, "learning_rate": 3.4832912761310257e-06, "loss": 0.023508577048778533, "step": 35045 }, { "epoch": 0.32988235294117646, "grad_norm": 0.4306125403854129, "learning_rate": 3.4830428085916993e-06, "loss": 0.019949591159820555, "step": 35050 }, { "epoch": 0.32992941176470586, "grad_norm": 0.5941598980205366, "learning_rate": 3.4827943942152943e-06, "loss": 0.024602605402469634, "step": 35055 }, { "epoch": 0.3299764705882353, "grad_norm": 0.4744796836408819, "learning_rate": 3.4825460329828524e-06, "loss": 0.021643692255020143, "step": 35060 }, { "epoch": 0.3300235294117647, "grad_norm": 0.8090675531048439, "learning_rate": 3.482297724875431e-06, "loss": 0.02263961285352707, "step": 35065 }, { "epoch": 0.3300705882352941, "grad_norm": 0.695924061297231, "learning_rate": 3.48204946987409e-06, "loss": 0.018129366636276244, "step": 35070 }, { "epoch": 0.3301176470588235, "grad_norm": 0.418816967884815, "learning_rate": 3.481801267959905e-06, "loss": 0.02077055871486664, "step": 35075 }, { "epoch": 0.3301647058823529, "grad_norm": 0.6900338802335607, "learning_rate": 3.4815531191139574e-06, "loss": 0.024896463751792906, "step": 35080 }, { "epoch": 0.33021176470588237, "grad_norm": 0.6633643140990532, "learning_rate": 3.481305023317338e-06, "loss": 0.019560593366622924, "step": 35085 }, { "epoch": 0.33025882352941177, "grad_norm": 0.5650663359154245, "learning_rate": 3.481056980551149e-06, "loss": 0.0257457435131073, "step": 35090 }, { "epoch": 0.33030588235294117, "grad_norm": 0.35415145543320986, "learning_rate": 3.4808089907965015e-06, "loss": 0.01854771375656128, "step": 35095 }, { "epoch": 0.33035294117647057, "grad_norm": 0.8685302932030445, "learning_rate": 3.4805610540345146e-06, "loss": 0.024172022938728333, "step": 35100 }, { "epoch": 0.3304, "grad_norm": 0.8129734158766231, "learning_rate": 3.4803131702463177e-06, "loss": 0.020380863547325136, "step": 35105 }, { "epoch": 0.3304470588235294, "grad_norm": 0.6003267770304828, "learning_rate": 3.48006533941305e-06, "loss": 0.016708841919898985, "step": 35110 }, { "epoch": 0.3304941176470588, "grad_norm": 0.5055355871065952, "learning_rate": 3.47981756151586e-06, "loss": 0.021797871589660643, "step": 35115 }, { "epoch": 0.3305411764705882, "grad_norm": 0.5704684615045826, "learning_rate": 3.4795698365359054e-06, "loss": 0.018038457632064818, "step": 35120 }, { "epoch": 0.3305882352941176, "grad_norm": 0.8893569207865694, "learning_rate": 3.4793221644543525e-06, "loss": 0.019037798047065735, "step": 35125 }, { "epoch": 0.3306352941176471, "grad_norm": 0.6964874540117889, "learning_rate": 3.4790745452523783e-06, "loss": 0.020628389716148377, "step": 35130 }, { "epoch": 0.3306823529411765, "grad_norm": 0.7456396234859918, "learning_rate": 3.4788269789111685e-06, "loss": 0.020519062876701355, "step": 35135 }, { "epoch": 0.3307294117647059, "grad_norm": 0.7128317335501753, "learning_rate": 3.478579465411918e-06, "loss": 0.018128812313079834, "step": 35140 }, { "epoch": 0.3307764705882353, "grad_norm": 0.6292765963481254, "learning_rate": 3.4783320047358313e-06, "loss": 0.02440630495548248, "step": 35145 }, { "epoch": 0.3308235294117647, "grad_norm": 0.39473500017053026, "learning_rate": 3.4780845968641238e-06, "loss": 0.016143906116485595, "step": 35150 }, { "epoch": 0.33087058823529414, "grad_norm": 1.2239282292818754, "learning_rate": 3.4778372417780166e-06, "loss": 0.02135486900806427, "step": 35155 }, { "epoch": 0.33091764705882354, "grad_norm": 0.6183922013965941, "learning_rate": 3.4775899394587433e-06, "loss": 0.023336435854434966, "step": 35160 }, { "epoch": 0.33096470588235294, "grad_norm": 0.8597739322818102, "learning_rate": 3.477342689887545e-06, "loss": 0.02119479775428772, "step": 35165 }, { "epoch": 0.33101176470588234, "grad_norm": 0.6502691711180022, "learning_rate": 3.477095493045674e-06, "loss": 0.021019524335861205, "step": 35170 }, { "epoch": 0.33105882352941174, "grad_norm": 0.461020768725234, "learning_rate": 3.4768483489143906e-06, "loss": 0.01674734950065613, "step": 35175 }, { "epoch": 0.3311058823529412, "grad_norm": 0.5549569138536216, "learning_rate": 3.4766012574749645e-06, "loss": 0.021687206625938416, "step": 35180 }, { "epoch": 0.3311529411764706, "grad_norm": 0.7683960348965393, "learning_rate": 3.4763542187086765e-06, "loss": 0.027086156606674194, "step": 35185 }, { "epoch": 0.3312, "grad_norm": 0.5849436184733612, "learning_rate": 3.4761072325968114e-06, "loss": 0.022010010480880738, "step": 35190 }, { "epoch": 0.3312470588235294, "grad_norm": 0.404860950116888, "learning_rate": 3.475860299120671e-06, "loss": 0.01893412172794342, "step": 35195 }, { "epoch": 0.33129411764705885, "grad_norm": 0.6377547071953278, "learning_rate": 3.4756134182615604e-06, "loss": 0.0170664981007576, "step": 35200 }, { "epoch": 0.33134117647058825, "grad_norm": 0.836467986712126, "learning_rate": 3.4753665900007966e-06, "loss": 0.023427322506904602, "step": 35205 }, { "epoch": 0.33138823529411765, "grad_norm": 0.4972150438528056, "learning_rate": 3.4751198143197046e-06, "loss": 0.023814156651496887, "step": 35210 }, { "epoch": 0.33143529411764705, "grad_norm": 0.5804522712096215, "learning_rate": 3.4748730911996205e-06, "loss": 0.018573623895645142, "step": 35215 }, { "epoch": 0.33148235294117645, "grad_norm": 0.7511882781987659, "learning_rate": 3.474626420621888e-06, "loss": 0.021296805143356322, "step": 35220 }, { "epoch": 0.3315294117647059, "grad_norm": 0.558281381316869, "learning_rate": 3.4743798025678606e-06, "loss": 0.023706275224685668, "step": 35225 }, { "epoch": 0.3315764705882353, "grad_norm": 0.7975394852298887, "learning_rate": 3.474133237018901e-06, "loss": 0.023294207453727723, "step": 35230 }, { "epoch": 0.3316235294117647, "grad_norm": 0.6534026092019181, "learning_rate": 3.4738867239563816e-06, "loss": 0.026456457376480103, "step": 35235 }, { "epoch": 0.3316705882352941, "grad_norm": 0.5247572244530913, "learning_rate": 3.473640263361683e-06, "loss": 0.02044712007045746, "step": 35240 }, { "epoch": 0.3317176470588235, "grad_norm": 0.27884518317888785, "learning_rate": 3.473393855216197e-06, "loss": 0.017810368537902833, "step": 35245 }, { "epoch": 0.33176470588235296, "grad_norm": 0.5098176728951891, "learning_rate": 3.4731474995013235e-06, "loss": 0.021791957318782806, "step": 35250 }, { "epoch": 0.33181176470588236, "grad_norm": 0.4659496682720771, "learning_rate": 3.4729011961984695e-06, "loss": 0.016191934049129487, "step": 35255 }, { "epoch": 0.33185882352941176, "grad_norm": 0.6950834749646723, "learning_rate": 3.4726549452890552e-06, "loss": 0.018864554166793824, "step": 35260 }, { "epoch": 0.33190588235294116, "grad_norm": 0.4621928140180388, "learning_rate": 3.472408746754507e-06, "loss": 0.019699376821517945, "step": 35265 }, { "epoch": 0.33195294117647056, "grad_norm": 0.45387113665667217, "learning_rate": 3.4721626005762626e-06, "loss": 0.019400019943714143, "step": 35270 }, { "epoch": 0.332, "grad_norm": 0.49180879156648183, "learning_rate": 3.4719165067357664e-06, "loss": 0.01810055673122406, "step": 35275 }, { "epoch": 0.3320470588235294, "grad_norm": 0.513501719080157, "learning_rate": 3.4716704652144756e-06, "loss": 0.02207043468952179, "step": 35280 }, { "epoch": 0.3320941176470588, "grad_norm": 0.5844110277359309, "learning_rate": 3.4714244759938524e-06, "loss": 0.01797584891319275, "step": 35285 }, { "epoch": 0.3321411764705882, "grad_norm": 0.5104072931309686, "learning_rate": 3.4711785390553715e-06, "loss": 0.0220621794462204, "step": 35290 }, { "epoch": 0.3321882352941177, "grad_norm": 0.5132647337480725, "learning_rate": 3.4709326543805153e-06, "loss": 0.021121492981910704, "step": 35295 }, { "epoch": 0.3322352941176471, "grad_norm": 0.7215924699344531, "learning_rate": 3.4706868219507767e-06, "loss": 0.021875756978988647, "step": 35300 }, { "epoch": 0.3322823529411765, "grad_norm": 0.48336886009720137, "learning_rate": 3.4704410417476543e-06, "loss": 0.022497564554214478, "step": 35305 }, { "epoch": 0.3323294117647059, "grad_norm": 0.3242986962211116, "learning_rate": 3.4701953137526613e-06, "loss": 0.01780809313058853, "step": 35310 }, { "epoch": 0.3323764705882353, "grad_norm": 0.7054642505815576, "learning_rate": 3.469949637947314e-06, "loss": 0.019540446996688842, "step": 35315 }, { "epoch": 0.33242352941176473, "grad_norm": 0.5433320213112689, "learning_rate": 3.4697040143131434e-06, "loss": 0.020436799526214598, "step": 35320 }, { "epoch": 0.33247058823529413, "grad_norm": 0.5280497617186415, "learning_rate": 3.4694584428316858e-06, "loss": 0.02714652419090271, "step": 35325 }, { "epoch": 0.33251764705882353, "grad_norm": 0.3781920954023763, "learning_rate": 3.4692129234844886e-06, "loss": 0.020361661911010742, "step": 35330 }, { "epoch": 0.33256470588235293, "grad_norm": 0.5126348786265845, "learning_rate": 3.468967456253109e-06, "loss": 0.014580672979354859, "step": 35335 }, { "epoch": 0.33261176470588233, "grad_norm": 0.7315527518821879, "learning_rate": 3.4687220411191087e-06, "loss": 0.02526921033859253, "step": 35340 }, { "epoch": 0.3326588235294118, "grad_norm": 0.6787328950666519, "learning_rate": 3.4684766780640656e-06, "loss": 0.02250814139842987, "step": 35345 }, { "epoch": 0.3327058823529412, "grad_norm": 0.531636466171398, "learning_rate": 3.4682313670695605e-06, "loss": 0.020574066042900085, "step": 35350 }, { "epoch": 0.3327529411764706, "grad_norm": 0.6175753870564418, "learning_rate": 3.467986108117187e-06, "loss": 0.02557206153869629, "step": 35355 }, { "epoch": 0.3328, "grad_norm": 0.4550599386858737, "learning_rate": 3.4677409011885473e-06, "loss": 0.016964063048362732, "step": 35360 }, { "epoch": 0.3328470588235294, "grad_norm": 0.47816097811585867, "learning_rate": 3.4674957462652508e-06, "loss": 0.018879060447216035, "step": 35365 }, { "epoch": 0.33289411764705884, "grad_norm": 0.8137562003703994, "learning_rate": 3.4672506433289177e-06, "loss": 0.025736305117607116, "step": 35370 }, { "epoch": 0.33294117647058824, "grad_norm": 0.7855109347179464, "learning_rate": 3.467005592361177e-06, "loss": 0.01788974702358246, "step": 35375 }, { "epoch": 0.33298823529411764, "grad_norm": 0.6406016136209535, "learning_rate": 3.4667605933436663e-06, "loss": 0.019209636747837065, "step": 35380 }, { "epoch": 0.33303529411764704, "grad_norm": 0.49611969428952063, "learning_rate": 3.466515646258034e-06, "loss": 0.01788342893123627, "step": 35385 }, { "epoch": 0.3330823529411765, "grad_norm": 0.40463727912907105, "learning_rate": 3.4662707510859345e-06, "loss": 0.018382656574249267, "step": 35390 }, { "epoch": 0.3331294117647059, "grad_norm": 0.8322208977597803, "learning_rate": 3.466025907809034e-06, "loss": 0.018586951494216918, "step": 35395 }, { "epoch": 0.3331764705882353, "grad_norm": 0.6199923801415435, "learning_rate": 3.4657811164090064e-06, "loss": 0.0185503825545311, "step": 35400 }, { "epoch": 0.3332235294117647, "grad_norm": 0.7181688512759593, "learning_rate": 3.465536376867535e-06, "loss": 0.02030515521764755, "step": 35405 }, { "epoch": 0.3332705882352941, "grad_norm": 0.676018587108831, "learning_rate": 3.4652916891663123e-06, "loss": 0.020880559086799623, "step": 35410 }, { "epoch": 0.33331764705882355, "grad_norm": 0.4873035174026406, "learning_rate": 3.4650470532870393e-06, "loss": 0.023126479983329774, "step": 35415 }, { "epoch": 0.33336470588235295, "grad_norm": 0.8657514825529563, "learning_rate": 3.4648024692114273e-06, "loss": 0.024335302412509918, "step": 35420 }, { "epoch": 0.33341176470588235, "grad_norm": 0.6841611517947455, "learning_rate": 3.464557936921196e-06, "loss": 0.02032536268234253, "step": 35425 }, { "epoch": 0.33345882352941175, "grad_norm": 0.545491603321258, "learning_rate": 3.4643134563980726e-06, "loss": 0.022058701515197753, "step": 35430 }, { "epoch": 0.33350588235294115, "grad_norm": 0.6061459805582615, "learning_rate": 3.464069027623795e-06, "loss": 0.021876516938209533, "step": 35435 }, { "epoch": 0.3335529411764706, "grad_norm": 0.7143981065586036, "learning_rate": 3.4638246505801105e-06, "loss": 0.02585269808769226, "step": 35440 }, { "epoch": 0.3336, "grad_norm": 0.43809556507770475, "learning_rate": 3.463580325248774e-06, "loss": 0.020984190702438354, "step": 35445 }, { "epoch": 0.3336470588235294, "grad_norm": 0.4942047254523481, "learning_rate": 3.4633360516115505e-06, "loss": 0.017092412710189818, "step": 35450 }, { "epoch": 0.3336941176470588, "grad_norm": 0.5302135923551742, "learning_rate": 3.4630918296502132e-06, "loss": 0.019426068663597106, "step": 35455 }, { "epoch": 0.3337411764705882, "grad_norm": 0.7050657870733968, "learning_rate": 3.4628476593465456e-06, "loss": 0.02745371460914612, "step": 35460 }, { "epoch": 0.33378823529411766, "grad_norm": 0.6254287858915679, "learning_rate": 3.462603540682338e-06, "loss": 0.023608297109603882, "step": 35465 }, { "epoch": 0.33383529411764706, "grad_norm": 0.5837232870800948, "learning_rate": 3.4623594736393916e-06, "loss": 0.020873242616653444, "step": 35470 }, { "epoch": 0.33388235294117646, "grad_norm": 0.7354329061954555, "learning_rate": 3.462115458199516e-06, "loss": 0.022362899780273438, "step": 35475 }, { "epoch": 0.33392941176470586, "grad_norm": 0.6571695951406309, "learning_rate": 3.4618714943445298e-06, "loss": 0.020172494649887084, "step": 35480 }, { "epoch": 0.3339764705882353, "grad_norm": 0.4957729997117038, "learning_rate": 3.4616275820562594e-06, "loss": 0.01731084883213043, "step": 35485 }, { "epoch": 0.3340235294117647, "grad_norm": 0.5463071869869615, "learning_rate": 3.461383721316543e-06, "loss": 0.019610324501991273, "step": 35490 }, { "epoch": 0.3340705882352941, "grad_norm": 0.4465664830629694, "learning_rate": 3.4611399121072248e-06, "loss": 0.02024614214897156, "step": 35495 }, { "epoch": 0.3341176470588235, "grad_norm": 0.4421241864868387, "learning_rate": 3.46089615441016e-06, "loss": 0.01869831681251526, "step": 35500 }, { "epoch": 0.3341647058823529, "grad_norm": 0.8152333219317119, "learning_rate": 3.4606524482072106e-06, "loss": 0.026834017038345336, "step": 35505 }, { "epoch": 0.3342117647058824, "grad_norm": 0.5795481596890963, "learning_rate": 3.46040879348025e-06, "loss": 0.02301696836948395, "step": 35510 }, { "epoch": 0.3342588235294118, "grad_norm": 0.4624576537366909, "learning_rate": 3.460165190211159e-06, "loss": 0.01716093122959137, "step": 35515 }, { "epoch": 0.3343058823529412, "grad_norm": 0.4910790379826899, "learning_rate": 3.4599216383818272e-06, "loss": 0.020345224440097807, "step": 35520 }, { "epoch": 0.3343529411764706, "grad_norm": 0.37938728212649336, "learning_rate": 3.459678137974154e-06, "loss": 0.017924878001213073, "step": 35525 }, { "epoch": 0.3344, "grad_norm": 1.0170525436084443, "learning_rate": 3.459434688970048e-06, "loss": 0.0246551513671875, "step": 35530 }, { "epoch": 0.33444705882352943, "grad_norm": 0.5571136260469689, "learning_rate": 3.4591912913514248e-06, "loss": 0.01961504518985748, "step": 35535 }, { "epoch": 0.33449411764705883, "grad_norm": 0.5787433469871156, "learning_rate": 3.4589479451002106e-06, "loss": 0.023076543211936952, "step": 35540 }, { "epoch": 0.33454117647058823, "grad_norm": 0.5898563657464195, "learning_rate": 3.4587046501983405e-06, "loss": 0.03097520172595978, "step": 35545 }, { "epoch": 0.33458823529411763, "grad_norm": 0.7738215850859501, "learning_rate": 3.4584614066277577e-06, "loss": 0.017568868398666383, "step": 35550 }, { "epoch": 0.3346352941176471, "grad_norm": 0.5426398246300512, "learning_rate": 3.458218214370415e-06, "loss": 0.02067866027355194, "step": 35555 }, { "epoch": 0.3346823529411765, "grad_norm": 0.7566461069403698, "learning_rate": 3.4579750734082723e-06, "loss": 0.018436935544013978, "step": 35560 }, { "epoch": 0.3347294117647059, "grad_norm": 0.45177060176780365, "learning_rate": 3.4577319837233014e-06, "loss": 0.019526316225528716, "step": 35565 }, { "epoch": 0.3347764705882353, "grad_norm": 0.8234973367703082, "learning_rate": 3.457488945297481e-06, "loss": 0.020731401443481446, "step": 35570 }, { "epoch": 0.3348235294117647, "grad_norm": 0.5260704075035606, "learning_rate": 3.4572459581127983e-06, "loss": 0.024816995859146117, "step": 35575 }, { "epoch": 0.33487058823529414, "grad_norm": 0.8176527105333095, "learning_rate": 3.4570030221512507e-06, "loss": 0.030220043659210206, "step": 35580 }, { "epoch": 0.33491764705882354, "grad_norm": 0.8250978657528717, "learning_rate": 3.456760137394843e-06, "loss": 0.020950856804847717, "step": 35585 }, { "epoch": 0.33496470588235294, "grad_norm": 0.5395411788877733, "learning_rate": 3.4565173038255916e-06, "loss": 0.019212336838245393, "step": 35590 }, { "epoch": 0.33501176470588234, "grad_norm": 0.6106814376694895, "learning_rate": 3.4562745214255172e-06, "loss": 0.02142534852027893, "step": 35595 }, { "epoch": 0.33505882352941174, "grad_norm": 0.4369172318923349, "learning_rate": 3.456031790176654e-06, "loss": 0.019260288774967195, "step": 35600 }, { "epoch": 0.3351058823529412, "grad_norm": 0.44813736189434117, "learning_rate": 3.4557891100610424e-06, "loss": 0.019312742352485656, "step": 35605 }, { "epoch": 0.3351529411764706, "grad_norm": 0.613263769611569, "learning_rate": 3.4555464810607324e-06, "loss": 0.020907557010650633, "step": 35610 }, { "epoch": 0.3352, "grad_norm": 0.6488573447669993, "learning_rate": 3.4553039031577824e-06, "loss": 0.026284217834472656, "step": 35615 }, { "epoch": 0.3352470588235294, "grad_norm": 0.5342566933307713, "learning_rate": 3.4550613763342594e-06, "loss": 0.02367706596851349, "step": 35620 }, { "epoch": 0.3352941176470588, "grad_norm": 0.49839393719357694, "learning_rate": 3.45481890057224e-06, "loss": 0.018699710071086884, "step": 35625 }, { "epoch": 0.33534117647058825, "grad_norm": 0.47371725224131656, "learning_rate": 3.45457647585381e-06, "loss": 0.018625715374946596, "step": 35630 }, { "epoch": 0.33538823529411765, "grad_norm": 0.5260800066998803, "learning_rate": 3.4543341021610616e-06, "loss": 0.015211385488510133, "step": 35635 }, { "epoch": 0.33543529411764705, "grad_norm": 0.4571030797753168, "learning_rate": 3.4540917794760986e-06, "loss": 0.017204976081848143, "step": 35640 }, { "epoch": 0.33548235294117645, "grad_norm": 0.41540361491294076, "learning_rate": 3.453849507781033e-06, "loss": 0.018150395154953, "step": 35645 }, { "epoch": 0.3355294117647059, "grad_norm": 0.5908802751973455, "learning_rate": 3.4536072870579838e-06, "loss": 0.024373024702072144, "step": 35650 }, { "epoch": 0.3355764705882353, "grad_norm": 0.7206499359368026, "learning_rate": 3.4533651172890807e-06, "loss": 0.023617568612098693, "step": 35655 }, { "epoch": 0.3356235294117647, "grad_norm": 0.5903151082475815, "learning_rate": 3.453122998456461e-06, "loss": 0.021606388688087463, "step": 35660 }, { "epoch": 0.3356705882352941, "grad_norm": 0.7095522732239075, "learning_rate": 3.4528809305422723e-06, "loss": 0.02043571025133133, "step": 35665 }, { "epoch": 0.3357176470588235, "grad_norm": 0.5238117345186553, "learning_rate": 3.4526389135286686e-06, "loss": 0.020376305282115936, "step": 35670 }, { "epoch": 0.33576470588235297, "grad_norm": 0.493644071736351, "learning_rate": 3.452396947397815e-06, "loss": 0.021907398104667665, "step": 35675 }, { "epoch": 0.33581176470588237, "grad_norm": 0.6362528032082476, "learning_rate": 3.4521550321318836e-06, "loss": 0.01589394360780716, "step": 35680 }, { "epoch": 0.33585882352941177, "grad_norm": 0.5777558471361, "learning_rate": 3.4519131677130556e-06, "loss": 0.018457072973251342, "step": 35685 }, { "epoch": 0.33590588235294117, "grad_norm": 0.6536998861871455, "learning_rate": 3.4516713541235224e-06, "loss": 0.019715717434883116, "step": 35690 }, { "epoch": 0.33595294117647057, "grad_norm": 0.5959852042602066, "learning_rate": 3.451429591345482e-06, "loss": 0.02037135511636734, "step": 35695 }, { "epoch": 0.336, "grad_norm": 0.4832477884908704, "learning_rate": 3.451187879361143e-06, "loss": 0.020827272534370424, "step": 35700 }, { "epoch": 0.3360470588235294, "grad_norm": 0.449829804562203, "learning_rate": 3.4509462181527213e-06, "loss": 0.024235545098781584, "step": 35705 }, { "epoch": 0.3360941176470588, "grad_norm": 0.7028850432374969, "learning_rate": 3.450704607702442e-06, "loss": 0.020670944452285768, "step": 35710 }, { "epoch": 0.3361411764705882, "grad_norm": 0.45687586639259165, "learning_rate": 3.45046304799254e-06, "loss": 0.016672414541244508, "step": 35715 }, { "epoch": 0.3361882352941176, "grad_norm": 0.6879611721039429, "learning_rate": 3.4502215390052557e-06, "loss": 0.02151040583848953, "step": 35720 }, { "epoch": 0.3362352941176471, "grad_norm": 0.6010376753448868, "learning_rate": 3.449980080722843e-06, "loss": 0.025267046689987183, "step": 35725 }, { "epoch": 0.3362823529411765, "grad_norm": 0.4653750445851906, "learning_rate": 3.44973867312756e-06, "loss": 0.023809084296226503, "step": 35730 }, { "epoch": 0.3363294117647059, "grad_norm": 0.4952046662383111, "learning_rate": 3.4494973162016766e-06, "loss": 0.01878800094127655, "step": 35735 }, { "epoch": 0.3363764705882353, "grad_norm": 0.5569393183898448, "learning_rate": 3.449256009927469e-06, "loss": 0.022566182911396025, "step": 35740 }, { "epoch": 0.33642352941176473, "grad_norm": 0.539366163869945, "learning_rate": 3.4490147542872237e-06, "loss": 0.017985376715660095, "step": 35745 }, { "epoch": 0.33647058823529413, "grad_norm": 0.6220334135962935, "learning_rate": 3.448773549263236e-06, "loss": 0.025519481301307677, "step": 35750 }, { "epoch": 0.33651764705882353, "grad_norm": 0.4492833489521456, "learning_rate": 3.4485323948378084e-06, "loss": 0.022293952107429505, "step": 35755 }, { "epoch": 0.33656470588235293, "grad_norm": 0.5693180658851322, "learning_rate": 3.448291290993254e-06, "loss": 0.023513910174369813, "step": 35760 }, { "epoch": 0.33661176470588233, "grad_norm": 0.5865782575351022, "learning_rate": 3.4480502377118925e-06, "loss": 0.019945627450942992, "step": 35765 }, { "epoch": 0.3366588235294118, "grad_norm": 0.7314242364423077, "learning_rate": 3.4478092349760535e-06, "loss": 0.020447182655334472, "step": 35770 }, { "epoch": 0.3367058823529412, "grad_norm": 0.5879919990806793, "learning_rate": 3.4475682827680744e-06, "loss": 0.024424010515213014, "step": 35775 }, { "epoch": 0.3367529411764706, "grad_norm": 0.7752887069381248, "learning_rate": 3.4473273810703034e-06, "loss": 0.019038426876068115, "step": 35780 }, { "epoch": 0.3368, "grad_norm": 0.3194244172800839, "learning_rate": 3.447086529865094e-06, "loss": 0.022300422191619873, "step": 35785 }, { "epoch": 0.3368470588235294, "grad_norm": 0.7594522228031294, "learning_rate": 3.4468457291348116e-06, "loss": 0.031183478236198426, "step": 35790 }, { "epoch": 0.33689411764705884, "grad_norm": 0.7582044740884735, "learning_rate": 3.446604978861827e-06, "loss": 0.01777549684047699, "step": 35795 }, { "epoch": 0.33694117647058824, "grad_norm": 0.4644859348530176, "learning_rate": 3.4463642790285233e-06, "loss": 0.019906476140022278, "step": 35800 }, { "epoch": 0.33698823529411764, "grad_norm": 0.6188811220893758, "learning_rate": 3.446123629617289e-06, "loss": 0.020251303911209106, "step": 35805 }, { "epoch": 0.33703529411764704, "grad_norm": 0.702870518766261, "learning_rate": 3.4458830306105223e-06, "loss": 0.025085797905921935, "step": 35810 }, { "epoch": 0.33708235294117644, "grad_norm": 0.5544264069919929, "learning_rate": 3.445642481990631e-06, "loss": 0.018509209156036377, "step": 35815 }, { "epoch": 0.3371294117647059, "grad_norm": 0.3457049091645718, "learning_rate": 3.445401983740029e-06, "loss": 0.019181016087532043, "step": 35820 }, { "epoch": 0.3371764705882353, "grad_norm": 0.4575606129432463, "learning_rate": 3.4451615358411435e-06, "loss": 0.018783068656921385, "step": 35825 }, { "epoch": 0.3372235294117647, "grad_norm": 0.46400326913092405, "learning_rate": 3.4449211382764034e-06, "loss": 0.021518470346927644, "step": 35830 }, { "epoch": 0.3372705882352941, "grad_norm": 0.5893754635404427, "learning_rate": 3.4446807910282538e-06, "loss": 0.023985080420970917, "step": 35835 }, { "epoch": 0.33731764705882356, "grad_norm": 0.4171784253833882, "learning_rate": 3.444440494079141e-06, "loss": 0.019985561072826386, "step": 35840 }, { "epoch": 0.33736470588235296, "grad_norm": 0.5210016038725349, "learning_rate": 3.4442002474115256e-06, "loss": 0.019121192395687103, "step": 35845 }, { "epoch": 0.33741176470588236, "grad_norm": 0.6716977181167672, "learning_rate": 3.443960051007874e-06, "loss": 0.02157461494207382, "step": 35850 }, { "epoch": 0.33745882352941176, "grad_norm": 0.5043414625688594, "learning_rate": 3.443719904850662e-06, "loss": 0.01824667751789093, "step": 35855 }, { "epoch": 0.33750588235294116, "grad_norm": 0.46829163575772953, "learning_rate": 3.4434798089223737e-06, "loss": 0.01782536506652832, "step": 35860 }, { "epoch": 0.3375529411764706, "grad_norm": 0.3919757248608049, "learning_rate": 3.4432397632055015e-06, "loss": 0.021099355816841126, "step": 35865 }, { "epoch": 0.3376, "grad_norm": 0.7997443183694516, "learning_rate": 3.442999767682546e-06, "loss": 0.024773244559764863, "step": 35870 }, { "epoch": 0.3376470588235294, "grad_norm": 0.41531610801769, "learning_rate": 3.442759822336019e-06, "loss": 0.0220614954829216, "step": 35875 }, { "epoch": 0.3376941176470588, "grad_norm": 0.507560662565137, "learning_rate": 3.4425199271484354e-06, "loss": 0.02203952968120575, "step": 35880 }, { "epoch": 0.3377411764705882, "grad_norm": 0.9317877568268969, "learning_rate": 3.4422800821023254e-06, "loss": 0.025613433122634886, "step": 35885 }, { "epoch": 0.33778823529411767, "grad_norm": 0.660242300045653, "learning_rate": 3.4420402871802223e-06, "loss": 0.021582111716270447, "step": 35890 }, { "epoch": 0.33783529411764707, "grad_norm": 0.5891045529948964, "learning_rate": 3.4418005423646713e-06, "loss": 0.024483194947242735, "step": 35895 }, { "epoch": 0.33788235294117647, "grad_norm": 0.5159920709675698, "learning_rate": 3.4415608476382234e-06, "loss": 0.02061792016029358, "step": 35900 }, { "epoch": 0.33792941176470587, "grad_norm": 0.6570312430631141, "learning_rate": 3.4413212029834396e-06, "loss": 0.02153092175722122, "step": 35905 }, { "epoch": 0.33797647058823527, "grad_norm": 0.5377554779999959, "learning_rate": 3.44108160838289e-06, "loss": 0.021607720851898195, "step": 35910 }, { "epoch": 0.3380235294117647, "grad_norm": 0.9861471895333475, "learning_rate": 3.440842063819152e-06, "loss": 0.023062166571617127, "step": 35915 }, { "epoch": 0.3380705882352941, "grad_norm": 1.0202533592249743, "learning_rate": 3.4406025692748123e-06, "loss": 0.022995974123477935, "step": 35920 }, { "epoch": 0.3381176470588235, "grad_norm": 0.4623853740927366, "learning_rate": 3.4403631247324647e-06, "loss": 0.02099557816982269, "step": 35925 }, { "epoch": 0.3381647058823529, "grad_norm": 0.4868276595079199, "learning_rate": 3.4401237301747143e-06, "loss": 0.016992950439453126, "step": 35930 }, { "epoch": 0.3382117647058824, "grad_norm": 0.6653048328463145, "learning_rate": 3.4398843855841706e-06, "loss": 0.01904824823141098, "step": 35935 }, { "epoch": 0.3382588235294118, "grad_norm": 0.5796564030444151, "learning_rate": 3.4396450909434553e-06, "loss": 0.019313314557075502, "step": 35940 }, { "epoch": 0.3383058823529412, "grad_norm": 0.546043856300915, "learning_rate": 3.439405846235197e-06, "loss": 0.018691164255142213, "step": 35945 }, { "epoch": 0.3383529411764706, "grad_norm": 0.46904581653597116, "learning_rate": 3.4391666514420325e-06, "loss": 0.019229310750961303, "step": 35950 }, { "epoch": 0.3384, "grad_norm": 0.5967025194167827, "learning_rate": 3.438927506546607e-06, "loss": 0.023289260268211365, "step": 35955 }, { "epoch": 0.33844705882352943, "grad_norm": 0.6003700492504572, "learning_rate": 3.4386884115315756e-06, "loss": 0.023207759857177733, "step": 35960 }, { "epoch": 0.33849411764705883, "grad_norm": 0.6963847413209423, "learning_rate": 3.438449366379599e-06, "loss": 0.018230174481868745, "step": 35965 }, { "epoch": 0.33854117647058823, "grad_norm": 0.5053897070971743, "learning_rate": 3.438210371073351e-06, "loss": 0.020974910259246825, "step": 35970 }, { "epoch": 0.33858823529411763, "grad_norm": 0.6101373468158159, "learning_rate": 3.437971425595508e-06, "loss": 0.01940838098526001, "step": 35975 }, { "epoch": 0.33863529411764703, "grad_norm": 0.6925007242453255, "learning_rate": 3.4377325299287596e-06, "loss": 0.024154670536518097, "step": 35980 }, { "epoch": 0.3386823529411765, "grad_norm": 0.6270687880539203, "learning_rate": 3.4374936840558013e-06, "loss": 0.018605810403823853, "step": 35985 }, { "epoch": 0.3387294117647059, "grad_norm": 0.5649350040975621, "learning_rate": 3.4372548879593377e-06, "loss": 0.02084733098745346, "step": 35990 }, { "epoch": 0.3387764705882353, "grad_norm": 0.4885802938554904, "learning_rate": 3.4370161416220816e-06, "loss": 0.024371039867401124, "step": 35995 }, { "epoch": 0.3388235294117647, "grad_norm": 0.7794031437825119, "learning_rate": 3.436777445026755e-06, "loss": 0.018611915409564972, "step": 36000 }, { "epoch": 0.3388705882352941, "grad_norm": 0.5983971116766569, "learning_rate": 3.436538798156088e-06, "loss": 0.02100173234939575, "step": 36005 }, { "epoch": 0.33891764705882355, "grad_norm": 0.6034089499964583, "learning_rate": 3.436300200992817e-06, "loss": 0.016276240348815918, "step": 36010 }, { "epoch": 0.33896470588235295, "grad_norm": 0.5548979843491932, "learning_rate": 3.4360616535196913e-06, "loss": 0.021236053109169005, "step": 36015 }, { "epoch": 0.33901176470588235, "grad_norm": 0.6335997289196669, "learning_rate": 3.435823155719463e-06, "loss": 0.019425687193870545, "step": 36020 }, { "epoch": 0.33905882352941175, "grad_norm": 0.4021753223350442, "learning_rate": 3.435584707574898e-06, "loss": 0.020000916719436646, "step": 36025 }, { "epoch": 0.3391058823529412, "grad_norm": 0.8611061554860993, "learning_rate": 3.435346309068766e-06, "loss": 0.021875672042369843, "step": 36030 }, { "epoch": 0.3391529411764706, "grad_norm": 0.6902563550908138, "learning_rate": 3.4351079601838494e-06, "loss": 0.025760146975517272, "step": 36035 }, { "epoch": 0.3392, "grad_norm": 0.8554601454928453, "learning_rate": 3.434869660902934e-06, "loss": 0.01982090324163437, "step": 36040 }, { "epoch": 0.3392470588235294, "grad_norm": 0.6348984325228745, "learning_rate": 3.4346314112088184e-06, "loss": 0.019913092255592346, "step": 36045 }, { "epoch": 0.3392941176470588, "grad_norm": 0.5570370011841923, "learning_rate": 3.4343932110843075e-06, "loss": 0.02165970504283905, "step": 36050 }, { "epoch": 0.33934117647058826, "grad_norm": 0.6237482326019846, "learning_rate": 3.434155060512214e-06, "loss": 0.022701793909072877, "step": 36055 }, { "epoch": 0.33938823529411766, "grad_norm": 0.6224377557298132, "learning_rate": 3.433916959475361e-06, "loss": 0.018999263644218445, "step": 36060 }, { "epoch": 0.33943529411764706, "grad_norm": 0.6142691358061048, "learning_rate": 3.4336789079565773e-06, "loss": 0.018674662709236144, "step": 36065 }, { "epoch": 0.33948235294117646, "grad_norm": 0.30317723929932777, "learning_rate": 3.4334409059387023e-06, "loss": 0.025418192148208618, "step": 36070 }, { "epoch": 0.33952941176470586, "grad_norm": 0.5457218320723859, "learning_rate": 3.4332029534045836e-06, "loss": 0.0220528244972229, "step": 36075 }, { "epoch": 0.3395764705882353, "grad_norm": 0.6133399265161772, "learning_rate": 3.4329650503370747e-06, "loss": 0.023145896196365357, "step": 36080 }, { "epoch": 0.3396235294117647, "grad_norm": 1.3753365812802818, "learning_rate": 3.43272719671904e-06, "loss": 0.021517565846443175, "step": 36085 }, { "epoch": 0.3396705882352941, "grad_norm": 0.8276548462913965, "learning_rate": 3.4324893925333515e-06, "loss": 0.02545750141143799, "step": 36090 }, { "epoch": 0.3397176470588235, "grad_norm": 0.6783884614469897, "learning_rate": 3.432251637762888e-06, "loss": 0.0311896950006485, "step": 36095 }, { "epoch": 0.33976470588235297, "grad_norm": 0.5991623369666899, "learning_rate": 3.43201393239054e-06, "loss": 0.02201869636774063, "step": 36100 }, { "epoch": 0.33981176470588237, "grad_norm": 0.42814989591519653, "learning_rate": 3.431776276399202e-06, "loss": 0.022880935668945314, "step": 36105 }, { "epoch": 0.33985882352941177, "grad_norm": 0.558901344893883, "learning_rate": 3.4315386697717805e-06, "loss": 0.0179221048951149, "step": 36110 }, { "epoch": 0.33990588235294117, "grad_norm": 0.38349736789302347, "learning_rate": 3.4313011124911884e-06, "loss": 0.024842500686645508, "step": 36115 }, { "epoch": 0.33995294117647057, "grad_norm": 0.48805393807570074, "learning_rate": 3.4310636045403466e-06, "loss": 0.026076745986938477, "step": 36120 }, { "epoch": 0.34, "grad_norm": 0.4028451135402912, "learning_rate": 3.430826145902186e-06, "loss": 0.021779096126556395, "step": 36125 }, { "epoch": 0.3400470588235294, "grad_norm": 0.5087463096300429, "learning_rate": 3.430588736559644e-06, "loss": 0.02038170099258423, "step": 36130 }, { "epoch": 0.3400941176470588, "grad_norm": 0.537669104845705, "learning_rate": 3.4303513764956668e-06, "loss": 0.016799548268318178, "step": 36135 }, { "epoch": 0.3401411764705882, "grad_norm": 0.4875959615931143, "learning_rate": 3.4301140656932097e-06, "loss": 0.01924651563167572, "step": 36140 }, { "epoch": 0.3401882352941176, "grad_norm": 0.5466943229068886, "learning_rate": 3.429876804135235e-06, "loss": 0.024799421429634094, "step": 36145 }, { "epoch": 0.3402352941176471, "grad_norm": 0.47824784344318483, "learning_rate": 3.429639591804714e-06, "loss": 0.019876974821090698, "step": 36150 }, { "epoch": 0.3402823529411765, "grad_norm": 0.7039375018046061, "learning_rate": 3.4294024286846263e-06, "loss": 0.020546019077301025, "step": 36155 }, { "epoch": 0.3403294117647059, "grad_norm": 0.4332543771262402, "learning_rate": 3.4291653147579584e-06, "loss": 0.021210494637489318, "step": 36160 }, { "epoch": 0.3403764705882353, "grad_norm": 0.6093569186974789, "learning_rate": 3.4289282500077077e-06, "loss": 0.02070950120687485, "step": 36165 }, { "epoch": 0.3404235294117647, "grad_norm": 0.3869358745640195, "learning_rate": 3.4286912344168766e-06, "loss": 0.016902065277099608, "step": 36170 }, { "epoch": 0.34047058823529414, "grad_norm": 0.7340581284909732, "learning_rate": 3.4284542679684798e-06, "loss": 0.022354134917259218, "step": 36175 }, { "epoch": 0.34051764705882354, "grad_norm": 0.5095909682480597, "learning_rate": 3.4282173506455353e-06, "loss": 0.02133481651544571, "step": 36180 }, { "epoch": 0.34056470588235294, "grad_norm": 0.6447051261955473, "learning_rate": 3.4279804824310726e-06, "loss": 0.019501613080501558, "step": 36185 }, { "epoch": 0.34061176470588234, "grad_norm": 0.6106776102864082, "learning_rate": 3.4277436633081285e-06, "loss": 0.0201822429895401, "step": 36190 }, { "epoch": 0.3406588235294118, "grad_norm": 0.860480372333633, "learning_rate": 3.4275068932597496e-06, "loss": 0.027439439296722413, "step": 36195 }, { "epoch": 0.3407058823529412, "grad_norm": 0.7113397401851388, "learning_rate": 3.4272701722689867e-06, "loss": 0.022074325382709502, "step": 36200 }, { "epoch": 0.3407529411764706, "grad_norm": 0.5262719909759028, "learning_rate": 3.427033500318903e-06, "loss": 0.019681753218173982, "step": 36205 }, { "epoch": 0.3408, "grad_norm": 0.653392173714596, "learning_rate": 3.426796877392568e-06, "loss": 0.024055002629756926, "step": 36210 }, { "epoch": 0.3408470588235294, "grad_norm": 0.5387484229199133, "learning_rate": 3.4265603034730595e-06, "loss": 0.02000860869884491, "step": 36215 }, { "epoch": 0.34089411764705885, "grad_norm": 0.7024644895021908, "learning_rate": 3.4263237785434633e-06, "loss": 0.0179646298289299, "step": 36220 }, { "epoch": 0.34094117647058825, "grad_norm": 0.4358904954935175, "learning_rate": 3.426087302586873e-06, "loss": 0.02262532114982605, "step": 36225 }, { "epoch": 0.34098823529411765, "grad_norm": 0.6992078104151209, "learning_rate": 3.4258508755863927e-06, "loss": 0.021973437070846556, "step": 36230 }, { "epoch": 0.34103529411764705, "grad_norm": 0.654449512409222, "learning_rate": 3.4256144975251315e-06, "loss": 0.025007131695747375, "step": 36235 }, { "epoch": 0.34108235294117645, "grad_norm": 0.4583141601388835, "learning_rate": 3.4253781683862088e-06, "loss": 0.020394289493560792, "step": 36240 }, { "epoch": 0.3411294117647059, "grad_norm": 0.5244948965956395, "learning_rate": 3.425141888152751e-06, "loss": 0.02108493149280548, "step": 36245 }, { "epoch": 0.3411764705882353, "grad_norm": 0.6520927297923734, "learning_rate": 3.424905656807893e-06, "loss": 0.01796572357416153, "step": 36250 }, { "epoch": 0.3412235294117647, "grad_norm": 0.6701455138973478, "learning_rate": 3.4246694743347788e-06, "loss": 0.027521085739135743, "step": 36255 }, { "epoch": 0.3412705882352941, "grad_norm": 0.6547409050934334, "learning_rate": 3.4244333407165585e-06, "loss": 0.02307769060134888, "step": 36260 }, { "epoch": 0.3413176470588235, "grad_norm": 0.5113853313303073, "learning_rate": 3.424197255936393e-06, "loss": 0.022259637713432312, "step": 36265 }, { "epoch": 0.34136470588235296, "grad_norm": 0.5229203949155102, "learning_rate": 3.4239612199774487e-06, "loss": 0.016343410313129424, "step": 36270 }, { "epoch": 0.34141176470588236, "grad_norm": 0.7550866178965203, "learning_rate": 3.423725232822901e-06, "loss": 0.025580894947052003, "step": 36275 }, { "epoch": 0.34145882352941176, "grad_norm": 0.6349757707568346, "learning_rate": 3.4234892944559344e-06, "loss": 0.02360975742340088, "step": 36280 }, { "epoch": 0.34150588235294116, "grad_norm": 0.5575380515279851, "learning_rate": 3.423253404859741e-06, "loss": 0.023156289756298066, "step": 36285 }, { "epoch": 0.3415529411764706, "grad_norm": 0.4206464212279899, "learning_rate": 3.42301756401752e-06, "loss": 0.021275332570075987, "step": 36290 }, { "epoch": 0.3416, "grad_norm": 1.0777355053091995, "learning_rate": 3.4227817719124795e-06, "loss": 0.02380101680755615, "step": 36295 }, { "epoch": 0.3416470588235294, "grad_norm": 0.8546459639673368, "learning_rate": 3.4225460285278366e-06, "loss": 0.026768136024475097, "step": 36300 }, { "epoch": 0.3416941176470588, "grad_norm": 0.5104009764393701, "learning_rate": 3.422310333846815e-06, "loss": 0.018120746314525604, "step": 36305 }, { "epoch": 0.3417411764705882, "grad_norm": 0.8200250993912542, "learning_rate": 3.4220746878526462e-06, "loss": 0.02540697157382965, "step": 36310 }, { "epoch": 0.34178823529411767, "grad_norm": 0.4809754936390912, "learning_rate": 3.4218390905285724e-06, "loss": 0.018746715784072877, "step": 36315 }, { "epoch": 0.34183529411764707, "grad_norm": 0.42339549545383587, "learning_rate": 3.4216035418578407e-06, "loss": 0.0200330913066864, "step": 36320 }, { "epoch": 0.34188235294117647, "grad_norm": 0.46611351654632904, "learning_rate": 3.421368041823709e-06, "loss": 0.019284293055534363, "step": 36325 }, { "epoch": 0.34192941176470587, "grad_norm": 0.7036146585206579, "learning_rate": 3.4211325904094405e-06, "loss": 0.02360134422779083, "step": 36330 }, { "epoch": 0.34197647058823527, "grad_norm": 0.7162335689584572, "learning_rate": 3.4208971875983087e-06, "loss": 0.018850448727607726, "step": 36335 }, { "epoch": 0.3420235294117647, "grad_norm": 0.41923880179624456, "learning_rate": 3.420661833373594e-06, "loss": 0.019770006835460662, "step": 36340 }, { "epoch": 0.3420705882352941, "grad_norm": 0.6235092285359912, "learning_rate": 3.420426527718586e-06, "loss": 0.023619836568832396, "step": 36345 }, { "epoch": 0.3421176470588235, "grad_norm": 0.45064524543942885, "learning_rate": 3.420191270616581e-06, "loss": 0.01847686469554901, "step": 36350 }, { "epoch": 0.3421647058823529, "grad_norm": 0.4281049945927976, "learning_rate": 3.4199560620508837e-06, "loss": 0.026102980971336363, "step": 36355 }, { "epoch": 0.3422117647058823, "grad_norm": 0.4614526223199285, "learning_rate": 3.419720902004807e-06, "loss": 0.018685322999954224, "step": 36360 }, { "epoch": 0.3422588235294118, "grad_norm": 0.571176852750532, "learning_rate": 3.4194857904616723e-06, "loss": 0.0255233496427536, "step": 36365 }, { "epoch": 0.3423058823529412, "grad_norm": 0.4862236954302392, "learning_rate": 3.4192507274048086e-06, "loss": 0.023274651169776915, "step": 36370 }, { "epoch": 0.3423529411764706, "grad_norm": 0.38825716632091256, "learning_rate": 3.4190157128175533e-06, "loss": 0.016951248049736023, "step": 36375 }, { "epoch": 0.3424, "grad_norm": 0.4568395406768821, "learning_rate": 3.4187807466832503e-06, "loss": 0.021677392721176147, "step": 36380 }, { "epoch": 0.34244705882352944, "grad_norm": 0.5912899900548054, "learning_rate": 3.4185458289852537e-06, "loss": 0.019349467754364014, "step": 36385 }, { "epoch": 0.34249411764705884, "grad_norm": 0.5140089903879914, "learning_rate": 3.4183109597069235e-06, "loss": 0.01703369915485382, "step": 36390 }, { "epoch": 0.34254117647058824, "grad_norm": 0.501479162389149, "learning_rate": 3.4180761388316296e-06, "loss": 0.0203786239027977, "step": 36395 }, { "epoch": 0.34258823529411764, "grad_norm": 0.5114007078426833, "learning_rate": 3.417841366342749e-06, "loss": 0.022131642699241637, "step": 36400 }, { "epoch": 0.34263529411764704, "grad_norm": 0.6249322245729304, "learning_rate": 3.4176066422236665e-06, "loss": 0.01737624704837799, "step": 36405 }, { "epoch": 0.3426823529411765, "grad_norm": 0.3995771462280511, "learning_rate": 3.4173719664577754e-06, "loss": 0.021300944685935973, "step": 36410 }, { "epoch": 0.3427294117647059, "grad_norm": 0.592453518106826, "learning_rate": 3.4171373390284763e-06, "loss": 0.01854006350040436, "step": 36415 }, { "epoch": 0.3427764705882353, "grad_norm": 0.5316277889946214, "learning_rate": 3.416902759919178e-06, "loss": 0.017345687747001647, "step": 36420 }, { "epoch": 0.3428235294117647, "grad_norm": 0.778473434333859, "learning_rate": 3.416668229113298e-06, "loss": 0.02158132791519165, "step": 36425 }, { "epoch": 0.3428705882352941, "grad_norm": 0.6169743266341565, "learning_rate": 3.4164337465942616e-06, "loss": 0.01980883479118347, "step": 36430 }, { "epoch": 0.34291764705882355, "grad_norm": 0.5912567553942043, "learning_rate": 3.4161993123455006e-06, "loss": 0.021594534814357757, "step": 36435 }, { "epoch": 0.34296470588235295, "grad_norm": 0.7986252965927916, "learning_rate": 3.415964926350457e-06, "loss": 0.02907577157020569, "step": 36440 }, { "epoch": 0.34301176470588235, "grad_norm": 0.7347096759546194, "learning_rate": 3.4157305885925785e-06, "loss": 0.01899612247943878, "step": 36445 }, { "epoch": 0.34305882352941175, "grad_norm": 0.607301695758423, "learning_rate": 3.415496299055322e-06, "loss": 0.017840588092803956, "step": 36450 }, { "epoch": 0.34310588235294115, "grad_norm": 0.6523328771716983, "learning_rate": 3.415262057722153e-06, "loss": 0.02758464217185974, "step": 36455 }, { "epoch": 0.3431529411764706, "grad_norm": 0.3401024579153364, "learning_rate": 3.4150278645765433e-06, "loss": 0.01696305274963379, "step": 36460 }, { "epoch": 0.3432, "grad_norm": 0.6546677850931495, "learning_rate": 3.414793719601974e-06, "loss": 0.020864611864089964, "step": 36465 }, { "epoch": 0.3432470588235294, "grad_norm": 0.40715557717420914, "learning_rate": 3.414559622781934e-06, "loss": 0.018863970041275026, "step": 36470 }, { "epoch": 0.3432941176470588, "grad_norm": 0.6697629877293172, "learning_rate": 3.414325574099918e-06, "loss": 0.023944640159606935, "step": 36475 }, { "epoch": 0.34334117647058826, "grad_norm": 0.6045003216608257, "learning_rate": 3.414091573539432e-06, "loss": 0.017485278844833373, "step": 36480 }, { "epoch": 0.34338823529411766, "grad_norm": 0.4504110796498042, "learning_rate": 3.4138576210839875e-06, "loss": 0.02021978795528412, "step": 36485 }, { "epoch": 0.34343529411764706, "grad_norm": 0.6817274307425634, "learning_rate": 3.4136237167171043e-06, "loss": 0.02683476507663727, "step": 36490 }, { "epoch": 0.34348235294117646, "grad_norm": 0.33849603152245084, "learning_rate": 3.413389860422312e-06, "loss": 0.019302622973918916, "step": 36495 }, { "epoch": 0.34352941176470586, "grad_norm": 0.7284080047131055, "learning_rate": 3.4131560521831442e-06, "loss": 0.02461608946323395, "step": 36500 }, { "epoch": 0.3435764705882353, "grad_norm": 0.8961286610749588, "learning_rate": 3.4129222919831465e-06, "loss": 0.023793479800224303, "step": 36505 }, { "epoch": 0.3436235294117647, "grad_norm": 0.6404520314788713, "learning_rate": 3.4126885798058702e-06, "loss": 0.018820348381996154, "step": 36510 }, { "epoch": 0.3436705882352941, "grad_norm": 0.5383325950787903, "learning_rate": 3.4124549156348753e-06, "loss": 0.021218577027320863, "step": 36515 }, { "epoch": 0.3437176470588235, "grad_norm": 0.5791076506391899, "learning_rate": 3.412221299453728e-06, "loss": 0.025591787695884705, "step": 36520 }, { "epoch": 0.3437647058823529, "grad_norm": 0.4995234350769521, "learning_rate": 3.411987731246005e-06, "loss": 0.021753445267677307, "step": 36525 }, { "epoch": 0.3438117647058824, "grad_norm": 0.7875010640788609, "learning_rate": 3.4117542109952885e-06, "loss": 0.03293460011482239, "step": 36530 }, { "epoch": 0.3438588235294118, "grad_norm": 0.7908753314560087, "learning_rate": 3.41152073868517e-06, "loss": 0.022912275791168214, "step": 36535 }, { "epoch": 0.3439058823529412, "grad_norm": 0.40843527405497476, "learning_rate": 3.4112873142992485e-06, "loss": 0.021791982650756835, "step": 36540 }, { "epoch": 0.3439529411764706, "grad_norm": 0.3937886224304639, "learning_rate": 3.4110539378211314e-06, "loss": 0.02011379897594452, "step": 36545 }, { "epoch": 0.344, "grad_norm": 0.9626376726143615, "learning_rate": 3.4108206092344326e-06, "loss": 0.018262478709220886, "step": 36550 }, { "epoch": 0.34404705882352943, "grad_norm": 0.654665790283722, "learning_rate": 3.410587328522774e-06, "loss": 0.025198382139205933, "step": 36555 }, { "epoch": 0.34409411764705883, "grad_norm": 0.794856633395419, "learning_rate": 3.410354095669787e-06, "loss": 0.021885156631469727, "step": 36560 }, { "epoch": 0.34414117647058823, "grad_norm": 0.5176152897796099, "learning_rate": 3.41012091065911e-06, "loss": 0.01964348703622818, "step": 36565 }, { "epoch": 0.34418823529411763, "grad_norm": 0.4173960544227189, "learning_rate": 3.4098877734743886e-06, "loss": 0.02095123827457428, "step": 36570 }, { "epoch": 0.3442352941176471, "grad_norm": 0.5078162211198393, "learning_rate": 3.4096546840992757e-06, "loss": 0.01877302974462509, "step": 36575 }, { "epoch": 0.3442823529411765, "grad_norm": 0.5115376122288883, "learning_rate": 3.409421642517434e-06, "loss": 0.019716988503932952, "step": 36580 }, { "epoch": 0.3443294117647059, "grad_norm": 0.5766889540475911, "learning_rate": 3.4091886487125332e-06, "loss": 0.0204135924577713, "step": 36585 }, { "epoch": 0.3443764705882353, "grad_norm": 0.534123479366149, "learning_rate": 3.4089557026682496e-06, "loss": 0.02035738229751587, "step": 36590 }, { "epoch": 0.3444235294117647, "grad_norm": 0.5835370995432848, "learning_rate": 3.408722804368269e-06, "loss": 0.017411476373672484, "step": 36595 }, { "epoch": 0.34447058823529414, "grad_norm": 0.5340371617173472, "learning_rate": 3.4084899537962846e-06, "loss": 0.022033603489398958, "step": 36600 }, { "epoch": 0.34451764705882354, "grad_norm": 0.555202638227914, "learning_rate": 3.408257150935996e-06, "loss": 0.021065658330917357, "step": 36605 }, { "epoch": 0.34456470588235294, "grad_norm": 0.5477742754200507, "learning_rate": 3.4080243957711124e-06, "loss": 0.020166057348251342, "step": 36610 }, { "epoch": 0.34461176470588234, "grad_norm": 0.7703958507129361, "learning_rate": 3.407791688285349e-06, "loss": 0.018199706077575685, "step": 36615 }, { "epoch": 0.34465882352941174, "grad_norm": 0.6118634338890826, "learning_rate": 3.407559028462432e-06, "loss": 0.021821606159210204, "step": 36620 }, { "epoch": 0.3447058823529412, "grad_norm": 0.48452876014894286, "learning_rate": 3.407326416286091e-06, "loss": 0.018397513031959533, "step": 36625 }, { "epoch": 0.3447529411764706, "grad_norm": 0.7037750070702669, "learning_rate": 3.407093851740067e-06, "loss": 0.02035670280456543, "step": 36630 }, { "epoch": 0.3448, "grad_norm": 0.6122992682166797, "learning_rate": 3.4068613348081064e-06, "loss": 0.021823768317699433, "step": 36635 }, { "epoch": 0.3448470588235294, "grad_norm": 0.3821552902397098, "learning_rate": 3.4066288654739655e-06, "loss": 0.01713998317718506, "step": 36640 }, { "epoch": 0.34489411764705885, "grad_norm": 0.5127644981858122, "learning_rate": 3.4063964437214057e-06, "loss": 0.021983760595321655, "step": 36645 }, { "epoch": 0.34494117647058825, "grad_norm": 0.48919155651255736, "learning_rate": 3.4061640695341984e-06, "loss": 0.024028149247169495, "step": 36650 }, { "epoch": 0.34498823529411765, "grad_norm": 0.4792583131649653, "learning_rate": 3.405931742896123e-06, "loss": 0.019332538545131683, "step": 36655 }, { "epoch": 0.34503529411764705, "grad_norm": 3.9343326130002403, "learning_rate": 3.405699463790964e-06, "loss": 0.018183279037475585, "step": 36660 }, { "epoch": 0.34508235294117645, "grad_norm": 0.5064074395343601, "learning_rate": 3.4054672322025162e-06, "loss": 0.02111186981201172, "step": 36665 }, { "epoch": 0.3451294117647059, "grad_norm": 0.5826986808673217, "learning_rate": 3.4052350481145804e-06, "loss": 0.020508772134780882, "step": 36670 }, { "epoch": 0.3451764705882353, "grad_norm": 0.5167243718417593, "learning_rate": 3.4050029115109664e-06, "loss": 0.02040506899356842, "step": 36675 }, { "epoch": 0.3452235294117647, "grad_norm": 0.5772402628090731, "learning_rate": 3.4047708223754916e-06, "loss": 0.0220154345035553, "step": 36680 }, { "epoch": 0.3452705882352941, "grad_norm": 0.7201282229128859, "learning_rate": 3.4045387806919804e-06, "loss": 0.01729014366865158, "step": 36685 }, { "epoch": 0.3453176470588235, "grad_norm": 0.5938050018573331, "learning_rate": 3.4043067864442654e-06, "loss": 0.022078675031661988, "step": 36690 }, { "epoch": 0.34536470588235296, "grad_norm": 0.5168087546813629, "learning_rate": 3.4040748396161864e-06, "loss": 0.021947908401489257, "step": 36695 }, { "epoch": 0.34541176470588236, "grad_norm": 0.4573149706791371, "learning_rate": 3.403842940191592e-06, "loss": 0.027321651577949524, "step": 36700 }, { "epoch": 0.34545882352941176, "grad_norm": 0.4497812919097242, "learning_rate": 3.4036110881543373e-06, "loss": 0.018699413537979125, "step": 36705 }, { "epoch": 0.34550588235294116, "grad_norm": 0.5396217682326861, "learning_rate": 3.4033792834882855e-06, "loss": 0.028170347213745117, "step": 36710 }, { "epoch": 0.34555294117647056, "grad_norm": 0.7141249096192666, "learning_rate": 3.4031475261773085e-06, "loss": 0.026054665446281433, "step": 36715 }, { "epoch": 0.3456, "grad_norm": 0.5212430093010766, "learning_rate": 3.4029158162052838e-06, "loss": 0.01811147332191467, "step": 36720 }, { "epoch": 0.3456470588235294, "grad_norm": 0.7753489270355649, "learning_rate": 3.402684153556098e-06, "loss": 0.018958951532840728, "step": 36725 }, { "epoch": 0.3456941176470588, "grad_norm": 0.679327446208335, "learning_rate": 3.402452538213646e-06, "loss": 0.017895597219467162, "step": 36730 }, { "epoch": 0.3457411764705882, "grad_norm": 0.7291418807154655, "learning_rate": 3.4022209701618293e-06, "loss": 0.0187732458114624, "step": 36735 }, { "epoch": 0.3457882352941177, "grad_norm": 0.9363000790620003, "learning_rate": 3.4019894493845567e-06, "loss": 0.024431943893432617, "step": 36740 }, { "epoch": 0.3458352941176471, "grad_norm": 0.6065823859133869, "learning_rate": 3.4017579758657446e-06, "loss": 0.023030439019203187, "step": 36745 }, { "epoch": 0.3458823529411765, "grad_norm": 0.4145213420779728, "learning_rate": 3.4015265495893202e-06, "loss": 0.017619609832763672, "step": 36750 }, { "epoch": 0.3459294117647059, "grad_norm": 0.5158627170440983, "learning_rate": 3.4012951705392127e-06, "loss": 0.02328716218471527, "step": 36755 }, { "epoch": 0.3459764705882353, "grad_norm": 0.6303684231195568, "learning_rate": 3.401063838699365e-06, "loss": 0.022235822677612305, "step": 36760 }, { "epoch": 0.34602352941176473, "grad_norm": 0.5446441154550766, "learning_rate": 3.400832554053723e-06, "loss": 0.01963673233985901, "step": 36765 }, { "epoch": 0.34607058823529413, "grad_norm": 0.6060970618613335, "learning_rate": 3.4006013165862423e-06, "loss": 0.023565879464149474, "step": 36770 }, { "epoch": 0.34611764705882353, "grad_norm": 0.6007755474159909, "learning_rate": 3.400370126280886e-06, "loss": 0.02082696557044983, "step": 36775 }, { "epoch": 0.34616470588235293, "grad_norm": 1.1296734272891786, "learning_rate": 3.4001389831216245e-06, "loss": 0.02050473690032959, "step": 36780 }, { "epoch": 0.34621176470588233, "grad_norm": 0.7315525734242864, "learning_rate": 3.399907887092437e-06, "loss": 0.019173380732536317, "step": 36785 }, { "epoch": 0.3462588235294118, "grad_norm": 0.6704259081324332, "learning_rate": 3.399676838177307e-06, "loss": 0.01798403263092041, "step": 36790 }, { "epoch": 0.3463058823529412, "grad_norm": 0.4474579282900061, "learning_rate": 3.3994458363602307e-06, "loss": 0.014217816293239594, "step": 36795 }, { "epoch": 0.3463529411764706, "grad_norm": 0.6194090281336765, "learning_rate": 3.399214881625207e-06, "loss": 0.023649467527866362, "step": 36800 }, { "epoch": 0.3464, "grad_norm": 0.4644058026266349, "learning_rate": 3.3989839739562457e-06, "loss": 0.017315879464149475, "step": 36805 }, { "epoch": 0.3464470588235294, "grad_norm": 0.6182953274520379, "learning_rate": 3.3987531133373627e-06, "loss": 0.0182219997048378, "step": 36810 }, { "epoch": 0.34649411764705884, "grad_norm": 0.3956113449130962, "learning_rate": 3.398522299752582e-06, "loss": 0.024611443281173706, "step": 36815 }, { "epoch": 0.34654117647058824, "grad_norm": 0.41536803347125084, "learning_rate": 3.3982915331859346e-06, "loss": 0.01960059106349945, "step": 36820 }, { "epoch": 0.34658823529411764, "grad_norm": 0.5242260552687541, "learning_rate": 3.3980608136214606e-06, "loss": 0.018664300441741943, "step": 36825 }, { "epoch": 0.34663529411764704, "grad_norm": 0.4919054817048729, "learning_rate": 3.3978301410432045e-06, "loss": 0.015573503077030182, "step": 36830 }, { "epoch": 0.3466823529411765, "grad_norm": 0.5311257236966308, "learning_rate": 3.3975995154352232e-06, "loss": 0.02287115454673767, "step": 36835 }, { "epoch": 0.3467294117647059, "grad_norm": 0.3284626797277792, "learning_rate": 3.3973689367815758e-06, "loss": 0.01873057186603546, "step": 36840 }, { "epoch": 0.3467764705882353, "grad_norm": 0.4788858657769945, "learning_rate": 3.397138405066335e-06, "loss": 0.016389665007591248, "step": 36845 }, { "epoch": 0.3468235294117647, "grad_norm": 0.6079235259882018, "learning_rate": 3.396907920273574e-06, "loss": 0.019690179824829103, "step": 36850 }, { "epoch": 0.3468705882352941, "grad_norm": 0.684591138209428, "learning_rate": 3.3966774823873794e-06, "loss": 0.02035548388957977, "step": 36855 }, { "epoch": 0.34691764705882355, "grad_norm": 0.3017590884641625, "learning_rate": 3.396447091391843e-06, "loss": 0.025956153869628906, "step": 36860 }, { "epoch": 0.34696470588235295, "grad_norm": 0.39309605037430295, "learning_rate": 3.3962167472710637e-06, "loss": 0.01790897399187088, "step": 36865 }, { "epoch": 0.34701176470588235, "grad_norm": 0.5168694352030558, "learning_rate": 3.39598645000915e-06, "loss": 0.02081511467695236, "step": 36870 }, { "epoch": 0.34705882352941175, "grad_norm": 0.5954459301761608, "learning_rate": 3.395756199590215e-06, "loss": 0.019033282995224, "step": 36875 }, { "epoch": 0.34710588235294115, "grad_norm": 0.45928479157827823, "learning_rate": 3.3955259959983815e-06, "loss": 0.02418053150177002, "step": 36880 }, { "epoch": 0.3471529411764706, "grad_norm": 0.45365869028993155, "learning_rate": 3.395295839217779e-06, "loss": 0.019747602939605712, "step": 36885 }, { "epoch": 0.3472, "grad_norm": 0.6341632451318379, "learning_rate": 3.3950657292325465e-06, "loss": 0.02057051956653595, "step": 36890 }, { "epoch": 0.3472470588235294, "grad_norm": 0.5832674780119493, "learning_rate": 3.394835666026826e-06, "loss": 0.019161663949489594, "step": 36895 }, { "epoch": 0.3472941176470588, "grad_norm": 0.5626466929355691, "learning_rate": 3.3946056495847718e-06, "loss": 0.026235687732696533, "step": 36900 }, { "epoch": 0.3473411764705882, "grad_norm": 0.62638303961498, "learning_rate": 3.394375679890544e-06, "loss": 0.022109326720237733, "step": 36905 }, { "epoch": 0.34738823529411766, "grad_norm": 0.5283622600137353, "learning_rate": 3.3941457569283077e-06, "loss": 0.021776120364665984, "step": 36910 }, { "epoch": 0.34743529411764706, "grad_norm": 0.5493922367487101, "learning_rate": 3.393915880682239e-06, "loss": 0.019733184576034547, "step": 36915 }, { "epoch": 0.34748235294117646, "grad_norm": 0.5650227647210168, "learning_rate": 3.393686051136521e-06, "loss": 0.020815536379814148, "step": 36920 }, { "epoch": 0.34752941176470586, "grad_norm": 0.4599245676561397, "learning_rate": 3.393456268275343e-06, "loss": 0.020556284487247466, "step": 36925 }, { "epoch": 0.3475764705882353, "grad_norm": 0.6671223970857819, "learning_rate": 3.393226532082902e-06, "loss": 0.021083784103393555, "step": 36930 }, { "epoch": 0.3476235294117647, "grad_norm": 0.8193442994268336, "learning_rate": 3.3929968425434035e-06, "loss": 0.02247260808944702, "step": 36935 }, { "epoch": 0.3476705882352941, "grad_norm": 0.6916353687032522, "learning_rate": 3.3927671996410587e-06, "loss": 0.018565911054611205, "step": 36940 }, { "epoch": 0.3477176470588235, "grad_norm": 0.7991436901744577, "learning_rate": 3.392537603360089e-06, "loss": 0.018207329511642455, "step": 36945 }, { "epoch": 0.3477647058823529, "grad_norm": 0.6143215645132928, "learning_rate": 3.3923080536847203e-06, "loss": 0.017534631490707397, "step": 36950 }, { "epoch": 0.3478117647058824, "grad_norm": 0.6625904247400356, "learning_rate": 3.3920785505991873e-06, "loss": 0.022394078969955444, "step": 36955 }, { "epoch": 0.3478588235294118, "grad_norm": 0.6259400984127836, "learning_rate": 3.391849094087733e-06, "loss": 0.02298242151737213, "step": 36960 }, { "epoch": 0.3479058823529412, "grad_norm": 0.4855694325322607, "learning_rate": 3.3916196841346068e-06, "loss": 0.021775835752487184, "step": 36965 }, { "epoch": 0.3479529411764706, "grad_norm": 0.7034134057464828, "learning_rate": 3.3913903207240657e-06, "loss": 0.018865035474300386, "step": 36970 }, { "epoch": 0.348, "grad_norm": 0.4980729036030435, "learning_rate": 3.3911610038403743e-06, "loss": 0.01716286242008209, "step": 36975 }, { "epoch": 0.34804705882352943, "grad_norm": 0.7717980097479524, "learning_rate": 3.3909317334678043e-06, "loss": 0.026983040571212768, "step": 36980 }, { "epoch": 0.34809411764705883, "grad_norm": 0.5628456137574688, "learning_rate": 3.390702509590636e-06, "loss": 0.018772533535957335, "step": 36985 }, { "epoch": 0.34814117647058823, "grad_norm": 0.5378475009112853, "learning_rate": 3.3904733321931555e-06, "loss": 0.02189466208219528, "step": 36990 }, { "epoch": 0.34818823529411763, "grad_norm": 0.6622483183945641, "learning_rate": 3.3902442012596574e-06, "loss": 0.02203287184238434, "step": 36995 }, { "epoch": 0.34823529411764703, "grad_norm": 0.8149694040015957, "learning_rate": 3.390015116774444e-06, "loss": 0.021591906249523164, "step": 37000 }, { "epoch": 0.3482823529411765, "grad_norm": 0.6338358372973583, "learning_rate": 3.3897860787218227e-06, "loss": 0.018712615966796874, "step": 37005 }, { "epoch": 0.3483294117647059, "grad_norm": 0.48865519684837966, "learning_rate": 3.3895570870861123e-06, "loss": 0.017237907648086546, "step": 37010 }, { "epoch": 0.3483764705882353, "grad_norm": 0.48101434090124245, "learning_rate": 3.3893281418516355e-06, "loss": 0.0210969015955925, "step": 37015 }, { "epoch": 0.3484235294117647, "grad_norm": 0.5663139374384908, "learning_rate": 3.3890992430027243e-06, "loss": 0.021291357278823853, "step": 37020 }, { "epoch": 0.34847058823529414, "grad_norm": 0.572214860853943, "learning_rate": 3.388870390523717e-06, "loss": 0.022932620346546174, "step": 37025 }, { "epoch": 0.34851764705882354, "grad_norm": 0.5451406566088896, "learning_rate": 3.3886415843989594e-06, "loss": 0.017586369812488557, "step": 37030 }, { "epoch": 0.34856470588235294, "grad_norm": 0.7746586114940959, "learning_rate": 3.388412824612807e-06, "loss": 0.025781157612800597, "step": 37035 }, { "epoch": 0.34861176470588234, "grad_norm": 0.6214593548241553, "learning_rate": 3.3881841111496196e-06, "loss": 0.021396774053573608, "step": 37040 }, { "epoch": 0.34865882352941174, "grad_norm": 0.7056713705150109, "learning_rate": 3.387955443993765e-06, "loss": 0.020482122898101807, "step": 37045 }, { "epoch": 0.3487058823529412, "grad_norm": 0.5915157698711601, "learning_rate": 3.38772682312962e-06, "loss": 0.019344936311244964, "step": 37050 }, { "epoch": 0.3487529411764706, "grad_norm": 0.8324201743604128, "learning_rate": 3.3874982485415676e-06, "loss": 0.023629435896873476, "step": 37055 }, { "epoch": 0.3488, "grad_norm": 0.5029999707461421, "learning_rate": 3.3872697202139982e-06, "loss": 0.01896602213382721, "step": 37060 }, { "epoch": 0.3488470588235294, "grad_norm": 0.4627008756903908, "learning_rate": 3.3870412381313094e-06, "loss": 0.025557640194892883, "step": 37065 }, { "epoch": 0.3488941176470588, "grad_norm": 0.6353232876855017, "learning_rate": 3.3868128022779073e-06, "loss": 0.02277463972568512, "step": 37070 }, { "epoch": 0.34894117647058825, "grad_norm": 0.3943852540783135, "learning_rate": 3.3865844126382034e-06, "loss": 0.020226329565048218, "step": 37075 }, { "epoch": 0.34898823529411765, "grad_norm": 0.5314881248887269, "learning_rate": 3.3863560691966185e-06, "loss": 0.019763760268688202, "step": 37080 }, { "epoch": 0.34903529411764705, "grad_norm": 0.47373410952117945, "learning_rate": 3.38612777193758e-06, "loss": 0.018192175030708312, "step": 37085 }, { "epoch": 0.34908235294117645, "grad_norm": 0.4917928351694886, "learning_rate": 3.3858995208455227e-06, "loss": 0.01613871306180954, "step": 37090 }, { "epoch": 0.34912941176470585, "grad_norm": 0.5957770784814586, "learning_rate": 3.385671315904888e-06, "loss": 0.022306275367736817, "step": 37095 }, { "epoch": 0.3491764705882353, "grad_norm": 0.6504322949256846, "learning_rate": 3.3854431571001256e-06, "loss": 0.0229507640004158, "step": 37100 }, { "epoch": 0.3492235294117647, "grad_norm": 0.691793405932082, "learning_rate": 3.385215044415692e-06, "loss": 0.0201253205537796, "step": 37105 }, { "epoch": 0.3492705882352941, "grad_norm": 0.8129880661342147, "learning_rate": 3.3849869778360523e-06, "loss": 0.023140951991081238, "step": 37110 }, { "epoch": 0.3493176470588235, "grad_norm": 0.5943988861919911, "learning_rate": 3.384758957345676e-06, "loss": 0.01999529153108597, "step": 37115 }, { "epoch": 0.34936470588235297, "grad_norm": 0.36695885245754334, "learning_rate": 3.384530982929044e-06, "loss": 0.02388530969619751, "step": 37120 }, { "epoch": 0.34941176470588237, "grad_norm": 0.5105905807145967, "learning_rate": 3.384303054570641e-06, "loss": 0.028806376457214355, "step": 37125 }, { "epoch": 0.34945882352941177, "grad_norm": 0.5691647902008637, "learning_rate": 3.384075172254959e-06, "loss": 0.03336052596569061, "step": 37130 }, { "epoch": 0.34950588235294117, "grad_norm": 0.44499873050986793, "learning_rate": 3.3838473359665027e-06, "loss": 0.025292542576789857, "step": 37135 }, { "epoch": 0.34955294117647057, "grad_norm": 0.6132152408196923, "learning_rate": 3.383619545689776e-06, "loss": 0.022792388498783112, "step": 37140 }, { "epoch": 0.3496, "grad_norm": 0.8532939166172008, "learning_rate": 3.3833918014092955e-06, "loss": 0.02343432903289795, "step": 37145 }, { "epoch": 0.3496470588235294, "grad_norm": 0.525307489885302, "learning_rate": 3.3831641031095836e-06, "loss": 0.021258747577667235, "step": 37150 }, { "epoch": 0.3496941176470588, "grad_norm": 0.42324459989644037, "learning_rate": 3.3829364507751716e-06, "loss": 0.024511972069740297, "step": 37155 }, { "epoch": 0.3497411764705882, "grad_norm": 0.5979923496468058, "learning_rate": 3.382708844390594e-06, "loss": 0.023370859026908875, "step": 37160 }, { "epoch": 0.3497882352941176, "grad_norm": 0.507940318244489, "learning_rate": 3.382481283940398e-06, "loss": 0.020951761305332182, "step": 37165 }, { "epoch": 0.3498352941176471, "grad_norm": 0.679117377450262, "learning_rate": 3.3822537694091335e-06, "loss": 0.020227017998695373, "step": 37170 }, { "epoch": 0.3498823529411765, "grad_norm": 0.47538768567388373, "learning_rate": 3.382026300781359e-06, "loss": 0.024267807602882385, "step": 37175 }, { "epoch": 0.3499294117647059, "grad_norm": 0.5282995158303869, "learning_rate": 3.381798878041642e-06, "loss": 0.01683250963687897, "step": 37180 }, { "epoch": 0.3499764705882353, "grad_norm": 0.7017700063152773, "learning_rate": 3.381571501174556e-06, "loss": 0.02592521905899048, "step": 37185 }, { "epoch": 0.35002352941176473, "grad_norm": 0.3847888911317249, "learning_rate": 3.38134417016468e-06, "loss": 0.01903064250946045, "step": 37190 }, { "epoch": 0.35007058823529413, "grad_norm": 0.6166485673864676, "learning_rate": 3.3811168849966045e-06, "loss": 0.020501160621643068, "step": 37195 }, { "epoch": 0.35011764705882353, "grad_norm": 0.6527747691943653, "learning_rate": 3.380889645654923e-06, "loss": 0.019603419303894042, "step": 37200 }, { "epoch": 0.35016470588235293, "grad_norm": 0.5219388409006716, "learning_rate": 3.380662452124239e-06, "loss": 0.018112939596176148, "step": 37205 }, { "epoch": 0.35021176470588233, "grad_norm": 0.6550508739063142, "learning_rate": 3.3804353043891604e-06, "loss": 0.02177061289548874, "step": 37210 }, { "epoch": 0.3502588235294118, "grad_norm": 0.5421376845694196, "learning_rate": 3.3802082024343057e-06, "loss": 0.019220000505447386, "step": 37215 }, { "epoch": 0.3503058823529412, "grad_norm": 0.8055381207407887, "learning_rate": 3.3799811462442994e-06, "loss": 0.021618826687335967, "step": 37220 }, { "epoch": 0.3503529411764706, "grad_norm": 0.4593746353751557, "learning_rate": 3.3797541358037724e-06, "loss": 0.01790489852428436, "step": 37225 }, { "epoch": 0.3504, "grad_norm": 0.3864396133920267, "learning_rate": 3.3795271710973626e-06, "loss": 0.01951429545879364, "step": 37230 }, { "epoch": 0.3504470588235294, "grad_norm": 0.5432998698326974, "learning_rate": 3.3793002521097162e-06, "loss": 0.023443561792373658, "step": 37235 }, { "epoch": 0.35049411764705884, "grad_norm": 0.5146940563035058, "learning_rate": 3.3790733788254876e-06, "loss": 0.020154520869255066, "step": 37240 }, { "epoch": 0.35054117647058824, "grad_norm": 0.9110133250188747, "learning_rate": 3.378846551229336e-06, "loss": 0.019422531127929688, "step": 37245 }, { "epoch": 0.35058823529411764, "grad_norm": 0.4996924775717388, "learning_rate": 3.378619769305929e-06, "loss": 0.01386752724647522, "step": 37250 }, { "epoch": 0.35063529411764705, "grad_norm": 0.5260111407160313, "learning_rate": 3.378393033039941e-06, "loss": 0.023550158739089964, "step": 37255 }, { "epoch": 0.35068235294117645, "grad_norm": 0.41620632236152655, "learning_rate": 3.3781663424160547e-06, "loss": 0.018767279386520386, "step": 37260 }, { "epoch": 0.3507294117647059, "grad_norm": 0.7866626321462746, "learning_rate": 3.3779396974189584e-06, "loss": 0.022601667046546935, "step": 37265 }, { "epoch": 0.3507764705882353, "grad_norm": 0.46299170971380554, "learning_rate": 3.377713098033349e-06, "loss": 0.021135494112968445, "step": 37270 }, { "epoch": 0.3508235294117647, "grad_norm": 0.3875171397582641, "learning_rate": 3.3774865442439296e-06, "loss": 0.01799454689025879, "step": 37275 }, { "epoch": 0.3508705882352941, "grad_norm": 0.6793215488472005, "learning_rate": 3.377260036035411e-06, "loss": 0.02259390950202942, "step": 37280 }, { "epoch": 0.35091764705882356, "grad_norm": 0.8260323157665251, "learning_rate": 3.377033573392511e-06, "loss": 0.026353344321250916, "step": 37285 }, { "epoch": 0.35096470588235296, "grad_norm": 0.49694114546854523, "learning_rate": 3.3768071562999544e-06, "loss": 0.019512486457824708, "step": 37290 }, { "epoch": 0.35101176470588236, "grad_norm": 0.5756346874410766, "learning_rate": 3.376580784742474e-06, "loss": 0.021589484810829163, "step": 37295 }, { "epoch": 0.35105882352941176, "grad_norm": 0.4658467966268841, "learning_rate": 3.3763544587048087e-06, "loss": 0.017243693768978118, "step": 37300 }, { "epoch": 0.35110588235294116, "grad_norm": 0.5641961529955924, "learning_rate": 3.3761281781717047e-06, "loss": 0.019115787744522095, "step": 37305 }, { "epoch": 0.3511529411764706, "grad_norm": 0.8315526163877298, "learning_rate": 3.375901943127916e-06, "loss": 0.02280818819999695, "step": 37310 }, { "epoch": 0.3512, "grad_norm": 0.6893735369261832, "learning_rate": 3.3756757535582034e-06, "loss": 0.023625880479812622, "step": 37315 }, { "epoch": 0.3512470588235294, "grad_norm": 0.46319741436995604, "learning_rate": 3.375449609447335e-06, "loss": 0.017959752678871156, "step": 37320 }, { "epoch": 0.3512941176470588, "grad_norm": 0.67829389263141, "learning_rate": 3.375223510780086e-06, "loss": 0.02349672615528107, "step": 37325 }, { "epoch": 0.3513411764705882, "grad_norm": 0.5606210702468253, "learning_rate": 3.374997457541237e-06, "loss": 0.01877496689558029, "step": 37330 }, { "epoch": 0.35138823529411767, "grad_norm": 0.4909597375155178, "learning_rate": 3.3747714497155797e-06, "loss": 0.020671039819717407, "step": 37335 }, { "epoch": 0.35143529411764707, "grad_norm": 0.4023467599753069, "learning_rate": 3.3745454872879094e-06, "loss": 0.020882129669189453, "step": 37340 }, { "epoch": 0.35148235294117647, "grad_norm": 0.6172560493416229, "learning_rate": 3.3743195702430304e-06, "loss": 0.019595208764076232, "step": 37345 }, { "epoch": 0.35152941176470587, "grad_norm": 0.6309087390597216, "learning_rate": 3.3740936985657526e-06, "loss": 0.02583345174789429, "step": 37350 }, { "epoch": 0.35157647058823527, "grad_norm": 0.9432111308008128, "learning_rate": 3.373867872240894e-06, "loss": 0.019567838311195372, "step": 37355 }, { "epoch": 0.3516235294117647, "grad_norm": 0.6462594577882473, "learning_rate": 3.37364209125328e-06, "loss": 0.02122463583946228, "step": 37360 }, { "epoch": 0.3516705882352941, "grad_norm": 0.7327952578709568, "learning_rate": 3.373416355587742e-06, "loss": 0.02509353160858154, "step": 37365 }, { "epoch": 0.3517176470588235, "grad_norm": 0.4623357322604489, "learning_rate": 3.3731906652291195e-06, "loss": 0.019987794756889343, "step": 37370 }, { "epoch": 0.3517647058823529, "grad_norm": 0.45768274698904177, "learning_rate": 3.372965020162259e-06, "loss": 0.01962253749370575, "step": 37375 }, { "epoch": 0.3518117647058824, "grad_norm": 0.4443764476386944, "learning_rate": 3.3727394203720137e-06, "loss": 0.021277454495429993, "step": 37380 }, { "epoch": 0.3518588235294118, "grad_norm": 0.628429388572765, "learning_rate": 3.3725138658432443e-06, "loss": 0.026718318462371826, "step": 37385 }, { "epoch": 0.3519058823529412, "grad_norm": 0.7138818910635477, "learning_rate": 3.3722883565608184e-06, "loss": 0.023039498925209047, "step": 37390 }, { "epoch": 0.3519529411764706, "grad_norm": 0.5698809974354265, "learning_rate": 3.3720628925096093e-06, "loss": 0.02286252975463867, "step": 37395 }, { "epoch": 0.352, "grad_norm": 0.5867253658840677, "learning_rate": 3.3718374736745007e-06, "loss": 0.022068241238594057, "step": 37400 }, { "epoch": 0.35204705882352944, "grad_norm": 0.6129976303082515, "learning_rate": 3.37161210004038e-06, "loss": 0.025538486242294312, "step": 37405 }, { "epoch": 0.35209411764705884, "grad_norm": 0.5952541283617147, "learning_rate": 3.371386771592144e-06, "loss": 0.021115124225616455, "step": 37410 }, { "epoch": 0.35214117647058824, "grad_norm": 0.4742461042889828, "learning_rate": 3.3711614883146944e-06, "loss": 0.022726273536682128, "step": 37415 }, { "epoch": 0.35218823529411764, "grad_norm": 0.5131221032490731, "learning_rate": 3.370936250192943e-06, "loss": 0.022748485207557678, "step": 37420 }, { "epoch": 0.35223529411764704, "grad_norm": 0.9344197580440019, "learning_rate": 3.3707110572118048e-06, "loss": 0.017529651522636414, "step": 37425 }, { "epoch": 0.3522823529411765, "grad_norm": 0.7798831447955398, "learning_rate": 3.3704859093562047e-06, "loss": 0.018950945138931273, "step": 37430 }, { "epoch": 0.3523294117647059, "grad_norm": 0.6163884066590298, "learning_rate": 3.3702608066110738e-06, "loss": 0.02486203908920288, "step": 37435 }, { "epoch": 0.3523764705882353, "grad_norm": 1.4681712185040896, "learning_rate": 3.370035748961351e-06, "loss": 0.04322243332862854, "step": 37440 }, { "epoch": 0.3524235294117647, "grad_norm": 0.9384887315991942, "learning_rate": 3.369810736391981e-06, "loss": 0.019883313775062562, "step": 37445 }, { "epoch": 0.3524705882352941, "grad_norm": 0.6179006620106002, "learning_rate": 3.3695857688879154e-06, "loss": 0.02065340578556061, "step": 37450 }, { "epoch": 0.35251764705882355, "grad_norm": 0.544496449382333, "learning_rate": 3.3693608464341136e-06, "loss": 0.020369824767112733, "step": 37455 }, { "epoch": 0.35256470588235295, "grad_norm": 0.8250650837998493, "learning_rate": 3.369135969015543e-06, "loss": 0.020650427043437957, "step": 37460 }, { "epoch": 0.35261176470588235, "grad_norm": 0.5660726260335365, "learning_rate": 3.3689111366171764e-06, "loss": 0.021001696586608887, "step": 37465 }, { "epoch": 0.35265882352941175, "grad_norm": 0.6353424405293208, "learning_rate": 3.3686863492239936e-06, "loss": 0.025057268142700196, "step": 37470 }, { "epoch": 0.3527058823529412, "grad_norm": 0.5825031078351783, "learning_rate": 3.368461606820983e-06, "loss": 0.018069696426391602, "step": 37475 }, { "epoch": 0.3527529411764706, "grad_norm": 0.43530961331576445, "learning_rate": 3.368236909393138e-06, "loss": 0.02112797796726227, "step": 37480 }, { "epoch": 0.3528, "grad_norm": 0.6710239276405663, "learning_rate": 3.3680122569254603e-06, "loss": 0.028292328119277954, "step": 37485 }, { "epoch": 0.3528470588235294, "grad_norm": 0.3855389003572032, "learning_rate": 3.3677876494029575e-06, "loss": 0.02294844537973404, "step": 37490 }, { "epoch": 0.3528941176470588, "grad_norm": 0.537743744643114, "learning_rate": 3.3675630868106473e-06, "loss": 0.017791816592216493, "step": 37495 }, { "epoch": 0.35294117647058826, "grad_norm": 0.5948267538692474, "learning_rate": 3.3673385691335495e-06, "loss": 0.02363046556711197, "step": 37500 }, { "epoch": 0.35298823529411766, "grad_norm": 0.496109789136605, "learning_rate": 3.3671140963566947e-06, "loss": 0.01631205379962921, "step": 37505 }, { "epoch": 0.35303529411764706, "grad_norm": 0.6138801252064789, "learning_rate": 3.366889668465119e-06, "loss": 0.02205798923969269, "step": 37510 }, { "epoch": 0.35308235294117646, "grad_norm": 0.38614139039626894, "learning_rate": 3.3666652854438657e-06, "loss": 0.016126951575279234, "step": 37515 }, { "epoch": 0.35312941176470586, "grad_norm": 0.4302176882951281, "learning_rate": 3.366440947277986e-06, "loss": 0.019056469202041626, "step": 37520 }, { "epoch": 0.3531764705882353, "grad_norm": 0.6829855349873838, "learning_rate": 3.3662166539525358e-06, "loss": 0.019472219049930573, "step": 37525 }, { "epoch": 0.3532235294117647, "grad_norm": 0.6236138034165709, "learning_rate": 3.3659924054525798e-06, "loss": 0.01941111236810684, "step": 37530 }, { "epoch": 0.3532705882352941, "grad_norm": 0.5209893797904057, "learning_rate": 3.36576820176319e-06, "loss": 0.021213358640670775, "step": 37535 }, { "epoch": 0.3533176470588235, "grad_norm": 0.3556085755663374, "learning_rate": 3.365544042869443e-06, "loss": 0.015009570121765136, "step": 37540 }, { "epoch": 0.3533647058823529, "grad_norm": 0.6826518570563801, "learning_rate": 3.3653199287564255e-06, "loss": 0.0265357106924057, "step": 37545 }, { "epoch": 0.35341176470588237, "grad_norm": 0.38892362690134824, "learning_rate": 3.365095859409228e-06, "loss": 0.015347099304199219, "step": 37550 }, { "epoch": 0.35345882352941177, "grad_norm": 0.5758317197207652, "learning_rate": 3.3648718348129507e-06, "loss": 0.023172348737716675, "step": 37555 }, { "epoch": 0.35350588235294117, "grad_norm": 0.4913386789967926, "learning_rate": 3.3646478549527e-06, "loss": 0.01919308304786682, "step": 37560 }, { "epoch": 0.35355294117647057, "grad_norm": 0.5537342176121135, "learning_rate": 3.3644239198135874e-06, "loss": 0.019775912165641785, "step": 37565 }, { "epoch": 0.3536, "grad_norm": 0.5557030539360628, "learning_rate": 3.3642000293807336e-06, "loss": 0.019444113969802855, "step": 37570 }, { "epoch": 0.3536470588235294, "grad_norm": 0.5404242206089701, "learning_rate": 3.3639761836392647e-06, "loss": 0.026539808511734007, "step": 37575 }, { "epoch": 0.3536941176470588, "grad_norm": 0.36887708840819644, "learning_rate": 3.3637523825743156e-06, "loss": 0.01819288283586502, "step": 37580 }, { "epoch": 0.3537411764705882, "grad_norm": 0.5049178057921699, "learning_rate": 3.363528626171025e-06, "loss": 0.01642676889896393, "step": 37585 }, { "epoch": 0.3537882352941176, "grad_norm": 0.6593325565595832, "learning_rate": 3.3633049144145437e-06, "loss": 0.021470259130001067, "step": 37590 }, { "epoch": 0.3538352941176471, "grad_norm": 0.5215254540211446, "learning_rate": 3.363081247290022e-06, "loss": 0.020590487122535705, "step": 37595 }, { "epoch": 0.3538823529411765, "grad_norm": 0.7297278307835304, "learning_rate": 3.3628576247826246e-06, "loss": 0.019098837673664094, "step": 37600 }, { "epoch": 0.3539294117647059, "grad_norm": 0.506671535878593, "learning_rate": 3.3626340468775175e-06, "loss": 0.021174612641334533, "step": 37605 }, { "epoch": 0.3539764705882353, "grad_norm": 0.5259499929502901, "learning_rate": 3.3624105135598775e-06, "loss": 0.023095881938934325, "step": 37610 }, { "epoch": 0.3540235294117647, "grad_norm": 0.5681006053160932, "learning_rate": 3.3621870248148856e-06, "loss": 0.022980238497257232, "step": 37615 }, { "epoch": 0.35407058823529414, "grad_norm": 0.632173676787702, "learning_rate": 3.3619635806277316e-06, "loss": 0.022160294651985168, "step": 37620 }, { "epoch": 0.35411764705882354, "grad_norm": 1.023235697820284, "learning_rate": 3.361740180983611e-06, "loss": 0.023139265179634095, "step": 37625 }, { "epoch": 0.35416470588235294, "grad_norm": 0.6194396748337813, "learning_rate": 3.3615168258677262e-06, "loss": 0.020305629074573516, "step": 37630 }, { "epoch": 0.35421176470588234, "grad_norm": 0.39516216575605784, "learning_rate": 3.361293515265287e-06, "loss": 0.0192766398191452, "step": 37635 }, { "epoch": 0.35425882352941174, "grad_norm": 0.6522959043766485, "learning_rate": 3.36107024916151e-06, "loss": 0.020293933153152467, "step": 37640 }, { "epoch": 0.3543058823529412, "grad_norm": 0.6168782400300936, "learning_rate": 3.360847027541619e-06, "loss": 0.025364717841148375, "step": 37645 }, { "epoch": 0.3543529411764706, "grad_norm": 0.7137775759065748, "learning_rate": 3.360623850390842e-06, "loss": 0.025417155027389525, "step": 37650 }, { "epoch": 0.3544, "grad_norm": 0.43245049220439474, "learning_rate": 3.3604007176944196e-06, "loss": 0.0176955908536911, "step": 37655 }, { "epoch": 0.3544470588235294, "grad_norm": 0.4255763902345611, "learning_rate": 3.360177629437594e-06, "loss": 0.02134193927049637, "step": 37660 }, { "epoch": 0.35449411764705885, "grad_norm": 0.77181469886441, "learning_rate": 3.3599545856056153e-06, "loss": 0.02234705239534378, "step": 37665 }, { "epoch": 0.35454117647058825, "grad_norm": 0.6639313385831763, "learning_rate": 3.359731586183742e-06, "loss": 0.02208624929189682, "step": 37670 }, { "epoch": 0.35458823529411765, "grad_norm": 0.7177785866716698, "learning_rate": 3.359508631157239e-06, "loss": 0.01964876800775528, "step": 37675 }, { "epoch": 0.35463529411764705, "grad_norm": 0.7453943737050069, "learning_rate": 3.359285720511376e-06, "loss": 0.02222665548324585, "step": 37680 }, { "epoch": 0.35468235294117645, "grad_norm": 0.5714029533932669, "learning_rate": 3.3590628542314343e-06, "loss": 0.016268765926361083, "step": 37685 }, { "epoch": 0.3547294117647059, "grad_norm": 0.6219701144084011, "learning_rate": 3.3588400323026947e-06, "loss": 0.02333299219608307, "step": 37690 }, { "epoch": 0.3547764705882353, "grad_norm": 0.6400678058415535, "learning_rate": 3.3586172547104527e-06, "loss": 0.019873052835464478, "step": 37695 }, { "epoch": 0.3548235294117647, "grad_norm": 0.40145640346227374, "learning_rate": 3.3583945214400055e-06, "loss": 0.018465541303157806, "step": 37700 }, { "epoch": 0.3548705882352941, "grad_norm": 0.5370691760227184, "learning_rate": 3.3581718324766582e-06, "loss": 0.022964496910572053, "step": 37705 }, { "epoch": 0.3549176470588235, "grad_norm": 0.6979956085513145, "learning_rate": 3.3579491878057243e-06, "loss": 0.0249442994594574, "step": 37710 }, { "epoch": 0.35496470588235296, "grad_norm": 0.4518865337840712, "learning_rate": 3.3577265874125216e-06, "loss": 0.02938462793827057, "step": 37715 }, { "epoch": 0.35501176470588236, "grad_norm": 0.45285069394987654, "learning_rate": 3.3575040312823774e-06, "loss": 0.022828108072280882, "step": 37720 }, { "epoch": 0.35505882352941176, "grad_norm": 0.6880733023173474, "learning_rate": 3.3572815194006237e-06, "loss": 0.019263869524002074, "step": 37725 }, { "epoch": 0.35510588235294116, "grad_norm": 0.5756469921820562, "learning_rate": 3.357059051752601e-06, "loss": 0.017480504512786866, "step": 37730 }, { "epoch": 0.3551529411764706, "grad_norm": 0.7091618571866551, "learning_rate": 3.3568366283236535e-06, "loss": 0.018692097067832945, "step": 37735 }, { "epoch": 0.3552, "grad_norm": 0.47673666097270884, "learning_rate": 3.3566142490991364e-06, "loss": 0.01558467447757721, "step": 37740 }, { "epoch": 0.3552470588235294, "grad_norm": 0.5840996854642208, "learning_rate": 3.356391914064409e-06, "loss": 0.018419934809207915, "step": 37745 }, { "epoch": 0.3552941176470588, "grad_norm": 0.5170399450038168, "learning_rate": 3.356169623204839e-06, "loss": 0.017730632424354555, "step": 37750 }, { "epoch": 0.3553411764705882, "grad_norm": 0.543808572000427, "learning_rate": 3.3559473765057984e-06, "loss": 0.018379172682762145, "step": 37755 }, { "epoch": 0.35538823529411767, "grad_norm": 0.6090357348509623, "learning_rate": 3.3557251739526673e-06, "loss": 0.02487812042236328, "step": 37760 }, { "epoch": 0.35543529411764707, "grad_norm": 0.4035128926261106, "learning_rate": 3.355503015530834e-06, "loss": 0.01901482939720154, "step": 37765 }, { "epoch": 0.35548235294117647, "grad_norm": 0.5468305271353412, "learning_rate": 3.355280901225693e-06, "loss": 0.019145199656486513, "step": 37770 }, { "epoch": 0.35552941176470587, "grad_norm": 0.5564701103718126, "learning_rate": 3.355058831022643e-06, "loss": 0.02196640968322754, "step": 37775 }, { "epoch": 0.35557647058823527, "grad_norm": 0.49990176647125756, "learning_rate": 3.3548368049070924e-06, "loss": 0.02235516607761383, "step": 37780 }, { "epoch": 0.3556235294117647, "grad_norm": 0.6216312139410354, "learning_rate": 3.354614822864455e-06, "loss": 0.020026925206184387, "step": 37785 }, { "epoch": 0.3556705882352941, "grad_norm": 0.6530431816547374, "learning_rate": 3.354392884880152e-06, "loss": 0.02448054850101471, "step": 37790 }, { "epoch": 0.3557176470588235, "grad_norm": 0.7060532436496342, "learning_rate": 3.3541709909396115e-06, "loss": 0.021995845437049865, "step": 37795 }, { "epoch": 0.3557647058823529, "grad_norm": 0.6430668624295662, "learning_rate": 3.3539491410282664e-06, "loss": 0.024019506573677064, "step": 37800 }, { "epoch": 0.3558117647058823, "grad_norm": 0.624243273604204, "learning_rate": 3.3537273351315602e-06, "loss": 0.022777354717254637, "step": 37805 }, { "epoch": 0.3558588235294118, "grad_norm": 0.5398715300650246, "learning_rate": 3.3535055732349385e-06, "loss": 0.02425704300403595, "step": 37810 }, { "epoch": 0.3559058823529412, "grad_norm": 0.8044279267970434, "learning_rate": 3.3532838553238573e-06, "loss": 0.021218998730182646, "step": 37815 }, { "epoch": 0.3559529411764706, "grad_norm": 0.5536606925463314, "learning_rate": 3.3530621813837767e-06, "loss": 0.024714270234107973, "step": 37820 }, { "epoch": 0.356, "grad_norm": 0.6147351524314363, "learning_rate": 3.352840551400166e-06, "loss": 0.019610732793807983, "step": 37825 }, { "epoch": 0.35604705882352944, "grad_norm": 0.43388048123574285, "learning_rate": 3.352618965358499e-06, "loss": 0.022549816966056825, "step": 37830 }, { "epoch": 0.35609411764705884, "grad_norm": 0.5984633188927783, "learning_rate": 3.3523974232442584e-06, "loss": 0.02000041604042053, "step": 37835 }, { "epoch": 0.35614117647058824, "grad_norm": 0.5225195723773028, "learning_rate": 3.3521759250429315e-06, "loss": 0.016383063793182374, "step": 37840 }, { "epoch": 0.35618823529411764, "grad_norm": 0.4516551937200121, "learning_rate": 3.3519544707400138e-06, "loss": 0.01787649095058441, "step": 37845 }, { "epoch": 0.35623529411764704, "grad_norm": 0.5124899838960779, "learning_rate": 3.351733060321006e-06, "loss": 0.0160773828625679, "step": 37850 }, { "epoch": 0.3562823529411765, "grad_norm": 0.5512325884770505, "learning_rate": 3.3515116937714174e-06, "loss": 0.021673509478569032, "step": 37855 }, { "epoch": 0.3563294117647059, "grad_norm": 0.5284886924061171, "learning_rate": 3.351290371076763e-06, "loss": 0.02130484879016876, "step": 37860 }, { "epoch": 0.3563764705882353, "grad_norm": 0.6111898134300695, "learning_rate": 3.3510690922225635e-06, "loss": 0.02581375539302826, "step": 37865 }, { "epoch": 0.3564235294117647, "grad_norm": 0.6435818450593006, "learning_rate": 3.3508478571943488e-06, "loss": 0.021505746245384216, "step": 37870 }, { "epoch": 0.3564705882352941, "grad_norm": 0.6228268644925407, "learning_rate": 3.3506266659776533e-06, "loss": 0.02124854326248169, "step": 37875 }, { "epoch": 0.35651764705882355, "grad_norm": 0.5776438871214651, "learning_rate": 3.3504055185580186e-06, "loss": 0.019852855801582338, "step": 37880 }, { "epoch": 0.35656470588235295, "grad_norm": 0.604862629046853, "learning_rate": 3.3501844149209935e-06, "loss": 0.023039278388023377, "step": 37885 }, { "epoch": 0.35661176470588235, "grad_norm": 0.3823135490353985, "learning_rate": 3.349963355052133e-06, "loss": 0.027365618944168092, "step": 37890 }, { "epoch": 0.35665882352941175, "grad_norm": 0.4396810565251172, "learning_rate": 3.349742338936999e-06, "loss": 0.01938444674015045, "step": 37895 }, { "epoch": 0.35670588235294115, "grad_norm": 0.47296480323884627, "learning_rate": 3.34952136656116e-06, "loss": 0.01965329796075821, "step": 37900 }, { "epoch": 0.3567529411764706, "grad_norm": 0.582875343348068, "learning_rate": 3.3493004379101907e-06, "loss": 0.016868767142295838, "step": 37905 }, { "epoch": 0.3568, "grad_norm": 0.4446588597332096, "learning_rate": 3.349079552969674e-06, "loss": 0.01762915402650833, "step": 37910 }, { "epoch": 0.3568470588235294, "grad_norm": 1.0158559145263062, "learning_rate": 3.3488587117251975e-06, "loss": 0.02046828716993332, "step": 37915 }, { "epoch": 0.3568941176470588, "grad_norm": 0.5230169619089714, "learning_rate": 3.3486379141623564e-06, "loss": 0.022029292583465577, "step": 37920 }, { "epoch": 0.35694117647058826, "grad_norm": 0.4819701971751704, "learning_rate": 3.348417160266753e-06, "loss": 0.019134697318077088, "step": 37925 }, { "epoch": 0.35698823529411766, "grad_norm": 0.6159720248639053, "learning_rate": 3.3481964500239948e-06, "loss": 0.020685911178588867, "step": 37930 }, { "epoch": 0.35703529411764706, "grad_norm": 0.42948228128693816, "learning_rate": 3.347975783419698e-06, "loss": 0.017717790603637696, "step": 37935 }, { "epoch": 0.35708235294117646, "grad_norm": 0.7485115217694442, "learning_rate": 3.347755160439484e-06, "loss": 0.0176800012588501, "step": 37940 }, { "epoch": 0.35712941176470586, "grad_norm": 0.523113663161973, "learning_rate": 3.3475345810689795e-06, "loss": 0.021049365401268005, "step": 37945 }, { "epoch": 0.3571764705882353, "grad_norm": 0.6550461992250713, "learning_rate": 3.3473140452938215e-06, "loss": 0.021920569241046906, "step": 37950 }, { "epoch": 0.3572235294117647, "grad_norm": 0.40485295259335036, "learning_rate": 3.347093553099651e-06, "loss": 0.02049487829208374, "step": 37955 }, { "epoch": 0.3572705882352941, "grad_norm": 0.5867938209463408, "learning_rate": 3.3468731044721158e-06, "loss": 0.020654882490634918, "step": 37960 }, { "epoch": 0.3573176470588235, "grad_norm": 0.6814154942262396, "learning_rate": 3.3466526993968706e-06, "loss": 0.023553267121315002, "step": 37965 }, { "epoch": 0.3573647058823529, "grad_norm": 0.5273889330826762, "learning_rate": 3.3464323378595777e-06, "loss": 0.0188705712556839, "step": 37970 }, { "epoch": 0.3574117647058824, "grad_norm": 0.5699919249362877, "learning_rate": 3.3462120198459046e-06, "loss": 0.020057371258735655, "step": 37975 }, { "epoch": 0.3574588235294118, "grad_norm": 0.6721253815266981, "learning_rate": 3.345991745341525e-06, "loss": 0.018576346337795258, "step": 37980 }, { "epoch": 0.3575058823529412, "grad_norm": 0.5708347787510137, "learning_rate": 3.3457715143321216e-06, "loss": 0.021960029006004335, "step": 37985 }, { "epoch": 0.3575529411764706, "grad_norm": 0.41463227222243587, "learning_rate": 3.3455513268033824e-06, "loss": 0.01699900031089783, "step": 37990 }, { "epoch": 0.3576, "grad_norm": 0.43584514014390985, "learning_rate": 3.3453311827410007e-06, "loss": 0.021522170305252074, "step": 37995 }, { "epoch": 0.35764705882352943, "grad_norm": 0.440491631424758, "learning_rate": 3.345111082130677e-06, "loss": 0.014291007816791535, "step": 38000 }, { "epoch": 0.35769411764705883, "grad_norm": 0.6727022932420158, "learning_rate": 3.344891024958121e-06, "loss": 0.020586857199668886, "step": 38005 }, { "epoch": 0.35774117647058823, "grad_norm": 0.3881973410234634, "learning_rate": 3.3446710112090447e-06, "loss": 0.026214003562927246, "step": 38010 }, { "epoch": 0.35778823529411763, "grad_norm": 0.7297649801515989, "learning_rate": 3.3444510408691703e-06, "loss": 0.021399851143360137, "step": 38015 }, { "epoch": 0.3578352941176471, "grad_norm": 0.43683372092693734, "learning_rate": 3.3442311139242245e-06, "loss": 0.018935486674308777, "step": 38020 }, { "epoch": 0.3578823529411765, "grad_norm": 0.564868132748002, "learning_rate": 3.344011230359941e-06, "loss": 0.020948033034801482, "step": 38025 }, { "epoch": 0.3579294117647059, "grad_norm": 0.5139292296436215, "learning_rate": 3.343791390162061e-06, "loss": 0.017101427912712096, "step": 38030 }, { "epoch": 0.3579764705882353, "grad_norm": 0.4945581405713913, "learning_rate": 3.3435715933163315e-06, "loss": 0.018403446674346922, "step": 38035 }, { "epoch": 0.3580235294117647, "grad_norm": 0.49890544369614537, "learning_rate": 3.343351839808505e-06, "loss": 0.017639729380607604, "step": 38040 }, { "epoch": 0.35807058823529414, "grad_norm": 0.4113311558766315, "learning_rate": 3.3431321296243423e-06, "loss": 0.015277107059955598, "step": 38045 }, { "epoch": 0.35811764705882354, "grad_norm": 0.5307233747219345, "learning_rate": 3.3429124627496105e-06, "loss": 0.020132695138454438, "step": 38050 }, { "epoch": 0.35816470588235294, "grad_norm": 0.5204250988245225, "learning_rate": 3.3426928391700823e-06, "loss": 0.017797577381134033, "step": 38055 }, { "epoch": 0.35821176470588234, "grad_norm": 0.5691415974389552, "learning_rate": 3.342473258871538e-06, "loss": 0.019786116480827332, "step": 38060 }, { "epoch": 0.35825882352941174, "grad_norm": 0.5646027244863555, "learning_rate": 3.3422537218397628e-06, "loss": 0.022261747717857362, "step": 38065 }, { "epoch": 0.3583058823529412, "grad_norm": 0.7292274197735439, "learning_rate": 3.3420342280605506e-06, "loss": 0.024392232298851013, "step": 38070 }, { "epoch": 0.3583529411764706, "grad_norm": 1.1313373960677457, "learning_rate": 3.3418147775196997e-06, "loss": 0.02344200909137726, "step": 38075 }, { "epoch": 0.3584, "grad_norm": 0.6259328333744177, "learning_rate": 3.341595370203018e-06, "loss": 0.026804497838020323, "step": 38080 }, { "epoch": 0.3584470588235294, "grad_norm": 0.3654004148206769, "learning_rate": 3.3413760060963156e-06, "loss": 0.02030898779630661, "step": 38085 }, { "epoch": 0.3584941176470588, "grad_norm": 0.7735271207023279, "learning_rate": 3.341156685185413e-06, "loss": 0.01910254955291748, "step": 38090 }, { "epoch": 0.35854117647058825, "grad_norm": 0.4781702494052391, "learning_rate": 3.3409374074561348e-06, "loss": 0.020500600337982178, "step": 38095 }, { "epoch": 0.35858823529411765, "grad_norm": 0.537879010271626, "learning_rate": 3.340718172894314e-06, "loss": 0.018742698431015014, "step": 38100 }, { "epoch": 0.35863529411764705, "grad_norm": 0.4525554456373095, "learning_rate": 3.3404989814857874e-06, "loss": 0.021363356709480287, "step": 38105 }, { "epoch": 0.35868235294117645, "grad_norm": 0.6810830529342352, "learning_rate": 3.340279833216401e-06, "loss": 0.01807224303483963, "step": 38110 }, { "epoch": 0.3587294117647059, "grad_norm": 0.46464716346319457, "learning_rate": 3.3400607280720065e-06, "loss": 0.02343439906835556, "step": 38115 }, { "epoch": 0.3587764705882353, "grad_norm": 0.5252832209517811, "learning_rate": 3.3398416660384616e-06, "loss": 0.01812094748020172, "step": 38120 }, { "epoch": 0.3588235294117647, "grad_norm": 0.4981036653491556, "learning_rate": 3.3396226471016307e-06, "loss": 0.017907118797302245, "step": 38125 }, { "epoch": 0.3588705882352941, "grad_norm": 0.509846075484182, "learning_rate": 3.3394036712473845e-06, "loss": 0.02629980444908142, "step": 38130 }, { "epoch": 0.3589176470588235, "grad_norm": 0.4123122849904202, "learning_rate": 3.3391847384616012e-06, "loss": 0.02022682726383209, "step": 38135 }, { "epoch": 0.35896470588235296, "grad_norm": 0.4845164268583671, "learning_rate": 3.3389658487301635e-06, "loss": 0.021884602308273316, "step": 38140 }, { "epoch": 0.35901176470588236, "grad_norm": 0.4516116022824823, "learning_rate": 3.338747002038963e-06, "loss": 0.01813012957572937, "step": 38145 }, { "epoch": 0.35905882352941176, "grad_norm": 0.5499301204022352, "learning_rate": 3.338528198373896e-06, "loss": 0.019826707243919373, "step": 38150 }, { "epoch": 0.35910588235294116, "grad_norm": 0.5075347217983297, "learning_rate": 3.338309437720866e-06, "loss": 0.020306879281997682, "step": 38155 }, { "epoch": 0.35915294117647056, "grad_norm": 0.3195903599837367, "learning_rate": 3.338090720065782e-06, "loss": 0.014987581968307495, "step": 38160 }, { "epoch": 0.3592, "grad_norm": 0.5102655267359563, "learning_rate": 3.337872045394562e-06, "loss": 0.026262885332107543, "step": 38165 }, { "epoch": 0.3592470588235294, "grad_norm": 0.8215645529994308, "learning_rate": 3.337653413693127e-06, "loss": 0.02013084590435028, "step": 38170 }, { "epoch": 0.3592941176470588, "grad_norm": 0.3238757599727519, "learning_rate": 3.3374348249474078e-06, "loss": 0.018669940531253815, "step": 38175 }, { "epoch": 0.3593411764705882, "grad_norm": 0.5271932649912202, "learning_rate": 3.3372162791433377e-06, "loss": 0.023252734541893007, "step": 38180 }, { "epoch": 0.3593882352941176, "grad_norm": 0.6789646620469104, "learning_rate": 3.336997776266861e-06, "loss": 0.0172209233045578, "step": 38185 }, { "epoch": 0.3594352941176471, "grad_norm": 0.5020798086569366, "learning_rate": 3.336779316303925e-06, "loss": 0.021416418254375458, "step": 38190 }, { "epoch": 0.3594823529411765, "grad_norm": 0.7356161239067447, "learning_rate": 3.336560899240485e-06, "loss": 0.02087061107158661, "step": 38195 }, { "epoch": 0.3595294117647059, "grad_norm": 0.6317356943338297, "learning_rate": 3.3363425250625027e-06, "loss": 0.02555873095989227, "step": 38200 }, { "epoch": 0.3595764705882353, "grad_norm": 0.562796159573711, "learning_rate": 3.336124193755945e-06, "loss": 0.021982860565185548, "step": 38205 }, { "epoch": 0.35962352941176473, "grad_norm": 0.7215453283344526, "learning_rate": 3.335905905306787e-06, "loss": 0.02029487192630768, "step": 38210 }, { "epoch": 0.35967058823529413, "grad_norm": 0.6030337799513356, "learning_rate": 3.335687659701009e-06, "loss": 0.020754770934581758, "step": 38215 }, { "epoch": 0.35971764705882353, "grad_norm": 0.5961111895212865, "learning_rate": 3.335469456924598e-06, "loss": 0.030705687403678895, "step": 38220 }, { "epoch": 0.35976470588235293, "grad_norm": 0.5840161611205036, "learning_rate": 3.3352512969635483e-06, "loss": 0.022190643846988677, "step": 38225 }, { "epoch": 0.35981176470588233, "grad_norm": 0.469262843488325, "learning_rate": 3.335033179803858e-06, "loss": 0.02109196186065674, "step": 38230 }, { "epoch": 0.3598588235294118, "grad_norm": 0.5862671050371125, "learning_rate": 3.334815105431535e-06, "loss": 0.020466527342796324, "step": 38235 }, { "epoch": 0.3599058823529412, "grad_norm": 0.6251904653944637, "learning_rate": 3.334597073832593e-06, "loss": 0.023611415922641755, "step": 38240 }, { "epoch": 0.3599529411764706, "grad_norm": 0.8591870193460863, "learning_rate": 3.334379084993048e-06, "loss": 0.026198562979698182, "step": 38245 }, { "epoch": 0.36, "grad_norm": 0.4074393648002345, "learning_rate": 3.334161138898928e-06, "loss": 0.017018061876296998, "step": 38250 }, { "epoch": 0.3600470588235294, "grad_norm": 0.4301106255015957, "learning_rate": 3.333943235536264e-06, "loss": 0.019480293989181517, "step": 38255 }, { "epoch": 0.36009411764705884, "grad_norm": 0.6007069225722637, "learning_rate": 3.333725374891094e-06, "loss": 0.017645947635173798, "step": 38260 }, { "epoch": 0.36014117647058824, "grad_norm": 0.6260167094638276, "learning_rate": 3.3335075569494636e-06, "loss": 0.02354506254196167, "step": 38265 }, { "epoch": 0.36018823529411764, "grad_norm": 0.7056629732519791, "learning_rate": 3.3332897816974236e-06, "loss": 0.017387020587921142, "step": 38270 }, { "epoch": 0.36023529411764704, "grad_norm": 0.4571885398543533, "learning_rate": 3.3330720491210305e-06, "loss": 0.02188621163368225, "step": 38275 }, { "epoch": 0.3602823529411765, "grad_norm": 0.47529848797692836, "learning_rate": 3.3328543592063495e-06, "loss": 0.01836240440607071, "step": 38280 }, { "epoch": 0.3603294117647059, "grad_norm": 0.5043631210726798, "learning_rate": 3.3326367119394497e-06, "loss": 0.021244677901268005, "step": 38285 }, { "epoch": 0.3603764705882353, "grad_norm": 0.5652808433533779, "learning_rate": 3.3324191073064087e-06, "loss": 0.021488194167613984, "step": 38290 }, { "epoch": 0.3604235294117647, "grad_norm": 0.84495094872585, "learning_rate": 3.332201545293308e-06, "loss": 0.01902092099189758, "step": 38295 }, { "epoch": 0.3604705882352941, "grad_norm": 0.5893006356907972, "learning_rate": 3.3319840258862383e-06, "loss": 0.02038647383451462, "step": 38300 }, { "epoch": 0.36051764705882355, "grad_norm": 0.4590971485950205, "learning_rate": 3.331766549071295e-06, "loss": 0.01822180151939392, "step": 38305 }, { "epoch": 0.36056470588235295, "grad_norm": 0.36573383093469336, "learning_rate": 3.3315491148345793e-06, "loss": 0.014478933811187745, "step": 38310 }, { "epoch": 0.36061176470588235, "grad_norm": 0.42808152179604186, "learning_rate": 3.3313317231621994e-06, "loss": 0.01903258413076401, "step": 38315 }, { "epoch": 0.36065882352941175, "grad_norm": 0.6106906669918671, "learning_rate": 3.3311143740402713e-06, "loss": 0.019342291355133056, "step": 38320 }, { "epoch": 0.36070588235294115, "grad_norm": 0.6095842353831348, "learning_rate": 3.3308970674549154e-06, "loss": 0.016008201241493224, "step": 38325 }, { "epoch": 0.3607529411764706, "grad_norm": 0.5869502639415448, "learning_rate": 3.3306798033922582e-06, "loss": 0.021165621280670167, "step": 38330 }, { "epoch": 0.3608, "grad_norm": 0.7389031517478823, "learning_rate": 3.3304625818384345e-06, "loss": 0.0196831077337265, "step": 38335 }, { "epoch": 0.3608470588235294, "grad_norm": 0.435777266224678, "learning_rate": 3.3302454027795838e-06, "loss": 0.02084851861000061, "step": 38340 }, { "epoch": 0.3608941176470588, "grad_norm": 0.4919395888257142, "learning_rate": 3.3300282662018536e-06, "loss": 0.022080439329147338, "step": 38345 }, { "epoch": 0.3609411764705882, "grad_norm": 0.5195415465863488, "learning_rate": 3.3298111720913937e-06, "loss": 0.0192155659198761, "step": 38350 }, { "epoch": 0.36098823529411767, "grad_norm": 1.4610097187289421, "learning_rate": 3.3295941204343662e-06, "loss": 0.01874160170555115, "step": 38355 }, { "epoch": 0.36103529411764707, "grad_norm": 0.5822492968247471, "learning_rate": 3.3293771112169352e-06, "loss": 0.02029123455286026, "step": 38360 }, { "epoch": 0.36108235294117647, "grad_norm": 0.6055000060512632, "learning_rate": 3.329160144425271e-06, "loss": 0.019294612109661102, "step": 38365 }, { "epoch": 0.36112941176470587, "grad_norm": 0.5496871606223999, "learning_rate": 3.328943220045555e-06, "loss": 0.0198562353849411, "step": 38370 }, { "epoch": 0.3611764705882353, "grad_norm": 0.5319163359841445, "learning_rate": 3.328726338063967e-06, "loss": 0.020667235553264617, "step": 38375 }, { "epoch": 0.3612235294117647, "grad_norm": 0.696038453690542, "learning_rate": 3.328509498466701e-06, "loss": 0.019475781917572023, "step": 38380 }, { "epoch": 0.3612705882352941, "grad_norm": 0.43530713117770414, "learning_rate": 3.3282927012399522e-06, "loss": 0.016013675928115846, "step": 38385 }, { "epoch": 0.3613176470588235, "grad_norm": 0.6084332696910657, "learning_rate": 3.328075946369924e-06, "loss": 0.02429054379463196, "step": 38390 }, { "epoch": 0.3613647058823529, "grad_norm": 0.42427760244123874, "learning_rate": 3.3278592338428263e-06, "loss": 0.022736728191375732, "step": 38395 }, { "epoch": 0.3614117647058824, "grad_norm": 0.6205067030787774, "learning_rate": 3.3276425636448738e-06, "loss": 0.023355542123317717, "step": 38400 }, { "epoch": 0.3614588235294118, "grad_norm": 0.49212020077222024, "learning_rate": 3.327425935762289e-06, "loss": 0.020499661564826965, "step": 38405 }, { "epoch": 0.3615058823529412, "grad_norm": 0.5428718373194723, "learning_rate": 3.3272093501813e-06, "loss": 0.022964507341384888, "step": 38410 }, { "epoch": 0.3615529411764706, "grad_norm": 0.6558502939498092, "learning_rate": 3.3269928068881425e-06, "loss": 0.017139157652854918, "step": 38415 }, { "epoch": 0.3616, "grad_norm": 0.4597386006703254, "learning_rate": 3.3267763058690554e-06, "loss": 0.015256305038928986, "step": 38420 }, { "epoch": 0.36164705882352943, "grad_norm": 0.5853119896832657, "learning_rate": 3.326559847110287e-06, "loss": 0.0236931174993515, "step": 38425 }, { "epoch": 0.36169411764705883, "grad_norm": 0.5416794385403585, "learning_rate": 3.32634343059809e-06, "loss": 0.019792276620864867, "step": 38430 }, { "epoch": 0.36174117647058823, "grad_norm": 0.49581286452933027, "learning_rate": 3.326127056318724e-06, "loss": 0.01904304325580597, "step": 38435 }, { "epoch": 0.36178823529411763, "grad_norm": 0.5674132864677117, "learning_rate": 3.3259107242584553e-06, "loss": 0.021943750977516174, "step": 38440 }, { "epoch": 0.36183529411764703, "grad_norm": 0.2995493889672093, "learning_rate": 3.3256944344035563e-06, "loss": 0.013582608103752137, "step": 38445 }, { "epoch": 0.3618823529411765, "grad_norm": 0.8224059612570982, "learning_rate": 3.3254781867403034e-06, "loss": 0.022312554717063903, "step": 38450 }, { "epoch": 0.3619294117647059, "grad_norm": 0.6985646333875226, "learning_rate": 3.3252619812549842e-06, "loss": 0.021809601783752443, "step": 38455 }, { "epoch": 0.3619764705882353, "grad_norm": 0.5254065904938775, "learning_rate": 3.325045817933887e-06, "loss": 0.020555900037288667, "step": 38460 }, { "epoch": 0.3620235294117647, "grad_norm": 0.4356275780003356, "learning_rate": 3.3248296967633097e-06, "loss": 0.023520588874816895, "step": 38465 }, { "epoch": 0.36207058823529414, "grad_norm": 0.4771358762897493, "learning_rate": 3.324613617729556e-06, "loss": 0.0205399289727211, "step": 38470 }, { "epoch": 0.36211764705882354, "grad_norm": 0.5644576640205118, "learning_rate": 3.3243975808189347e-06, "loss": 0.020427250862121583, "step": 38475 }, { "epoch": 0.36216470588235294, "grad_norm": 0.5679358973960739, "learning_rate": 3.3241815860177625e-06, "loss": 0.018959414958953858, "step": 38480 }, { "epoch": 0.36221176470588234, "grad_norm": 0.9783210141482469, "learning_rate": 3.3239656333123604e-06, "loss": 0.025128000974655153, "step": 38485 }, { "epoch": 0.36225882352941174, "grad_norm": 0.5527345329261881, "learning_rate": 3.323749722689057e-06, "loss": 0.01840626895427704, "step": 38490 }, { "epoch": 0.3623058823529412, "grad_norm": 0.524033764663521, "learning_rate": 3.3235338541341868e-06, "loss": 0.020059606432914732, "step": 38495 }, { "epoch": 0.3623529411764706, "grad_norm": 0.5267313464684167, "learning_rate": 3.32331802763409e-06, "loss": 0.018686375021934508, "step": 38500 }, { "epoch": 0.3624, "grad_norm": 0.4047971808094287, "learning_rate": 3.323102243175114e-06, "loss": 0.01493086814880371, "step": 38505 }, { "epoch": 0.3624470588235294, "grad_norm": 0.6162613110208462, "learning_rate": 3.322886500743611e-06, "loss": 0.020664842426776887, "step": 38510 }, { "epoch": 0.3624941176470588, "grad_norm": 0.4694360343437149, "learning_rate": 3.322670800325941e-06, "loss": 0.019929885864257812, "step": 38515 }, { "epoch": 0.36254117647058826, "grad_norm": 0.49544413952708755, "learning_rate": 3.3224551419084693e-06, "loss": 0.023180797696113586, "step": 38520 }, { "epoch": 0.36258823529411766, "grad_norm": 0.6421583602572367, "learning_rate": 3.322239525477568e-06, "loss": 0.026036903262138367, "step": 38525 }, { "epoch": 0.36263529411764706, "grad_norm": 0.6789949122164766, "learning_rate": 3.3220239510196138e-06, "loss": 0.022564885020256043, "step": 38530 }, { "epoch": 0.36268235294117646, "grad_norm": 0.47972239938411915, "learning_rate": 3.321808418520992e-06, "loss": 0.018870799243450163, "step": 38535 }, { "epoch": 0.36272941176470586, "grad_norm": 0.5354264851513316, "learning_rate": 3.3215929279680913e-06, "loss": 0.01941945552825928, "step": 38540 }, { "epoch": 0.3627764705882353, "grad_norm": 0.3792232394394483, "learning_rate": 3.321377479347309e-06, "loss": 0.023687250912189484, "step": 38545 }, { "epoch": 0.3628235294117647, "grad_norm": 0.8497998769520164, "learning_rate": 3.321162072645048e-06, "loss": 0.020495522022247314, "step": 38550 }, { "epoch": 0.3628705882352941, "grad_norm": 0.7444671310218018, "learning_rate": 3.320946707847716e-06, "loss": 0.019978880882263184, "step": 38555 }, { "epoch": 0.3629176470588235, "grad_norm": 0.5303763952692955, "learning_rate": 3.3207313849417284e-06, "loss": 0.019298157095909117, "step": 38560 }, { "epoch": 0.36296470588235297, "grad_norm": 0.33428153353474027, "learning_rate": 3.320516103913506e-06, "loss": 0.02462438941001892, "step": 38565 }, { "epoch": 0.36301176470588237, "grad_norm": 0.5984302659825026, "learning_rate": 3.3203008647494767e-06, "loss": 0.02326864153146744, "step": 38570 }, { "epoch": 0.36305882352941177, "grad_norm": 0.3918981591925464, "learning_rate": 3.3200856674360724e-06, "loss": 0.015134510397911072, "step": 38575 }, { "epoch": 0.36310588235294117, "grad_norm": 0.574666036675535, "learning_rate": 3.3198705119597345e-06, "loss": 0.018512602150440215, "step": 38580 }, { "epoch": 0.36315294117647057, "grad_norm": 0.6155631517782193, "learning_rate": 3.3196553983069075e-06, "loss": 0.023908540606498718, "step": 38585 }, { "epoch": 0.3632, "grad_norm": 0.61578166357731, "learning_rate": 3.3194403264640435e-06, "loss": 0.02068125903606415, "step": 38590 }, { "epoch": 0.3632470588235294, "grad_norm": 0.6141643177447442, "learning_rate": 3.3192252964176e-06, "loss": 0.020071640610694885, "step": 38595 }, { "epoch": 0.3632941176470588, "grad_norm": 0.52396633091401, "learning_rate": 3.319010308154042e-06, "loss": 0.01738038659095764, "step": 38600 }, { "epoch": 0.3633411764705882, "grad_norm": 0.4357662486831652, "learning_rate": 3.3187953616598394e-06, "loss": 0.020453950762748717, "step": 38605 }, { "epoch": 0.3633882352941176, "grad_norm": 0.7387968546325697, "learning_rate": 3.318580456921468e-06, "loss": 0.0228516086935997, "step": 38610 }, { "epoch": 0.3634352941176471, "grad_norm": 0.4038403086326585, "learning_rate": 3.3183655939254113e-06, "loss": 0.024110186100006103, "step": 38615 }, { "epoch": 0.3634823529411765, "grad_norm": 0.5125707005156368, "learning_rate": 3.318150772658157e-06, "loss": 0.022043631970882417, "step": 38620 }, { "epoch": 0.3635294117647059, "grad_norm": 0.4164177602856056, "learning_rate": 3.3179359931062006e-06, "loss": 0.018736276030540466, "step": 38625 }, { "epoch": 0.3635764705882353, "grad_norm": 0.6918299070891591, "learning_rate": 3.317721255256042e-06, "loss": 0.018147185444831848, "step": 38630 }, { "epoch": 0.3636235294117647, "grad_norm": 0.8066750695929499, "learning_rate": 3.31750655909419e-06, "loss": 0.02381987273693085, "step": 38635 }, { "epoch": 0.36367058823529413, "grad_norm": 0.5708826541261863, "learning_rate": 3.317291904607155e-06, "loss": 0.01997547149658203, "step": 38640 }, { "epoch": 0.36371764705882353, "grad_norm": 0.42390762434236373, "learning_rate": 3.317077291781458e-06, "loss": 0.01887427717447281, "step": 38645 }, { "epoch": 0.36376470588235293, "grad_norm": 0.5671066990973309, "learning_rate": 3.3168627206036243e-06, "loss": 0.019992974400520325, "step": 38650 }, { "epoch": 0.36381176470588233, "grad_norm": 0.6409887748638056, "learning_rate": 3.3166481910601844e-06, "loss": 0.02131684422492981, "step": 38655 }, { "epoch": 0.3638588235294118, "grad_norm": 0.609597025330848, "learning_rate": 3.316433703137677e-06, "loss": 0.0180541530251503, "step": 38660 }, { "epoch": 0.3639058823529412, "grad_norm": 0.36308454681757135, "learning_rate": 3.316219256822645e-06, "loss": 0.022861482203006746, "step": 38665 }, { "epoch": 0.3639529411764706, "grad_norm": 0.44681290035283383, "learning_rate": 3.3160048521016375e-06, "loss": 0.017564845085144044, "step": 38670 }, { "epoch": 0.364, "grad_norm": 0.6374277740213836, "learning_rate": 3.315790488961212e-06, "loss": 0.016568394005298616, "step": 38675 }, { "epoch": 0.3640470588235294, "grad_norm": 0.4522070890799631, "learning_rate": 3.3155761673879285e-06, "loss": 0.020258828997612, "step": 38680 }, { "epoch": 0.36409411764705885, "grad_norm": 0.7013779774114344, "learning_rate": 3.315361887368355e-06, "loss": 0.0298872172832489, "step": 38685 }, { "epoch": 0.36414117647058825, "grad_norm": 0.5151450997825336, "learning_rate": 3.3151476488890672e-06, "loss": 0.017943552136421202, "step": 38690 }, { "epoch": 0.36418823529411765, "grad_norm": 0.7195022063931134, "learning_rate": 3.3149334519366445e-06, "loss": 0.022438019514083862, "step": 38695 }, { "epoch": 0.36423529411764705, "grad_norm": 0.4475207863730086, "learning_rate": 3.314719296497672e-06, "loss": 0.014482995867729187, "step": 38700 }, { "epoch": 0.36428235294117645, "grad_norm": 0.5889986093657334, "learning_rate": 3.3145051825587425e-06, "loss": 0.019232416152954103, "step": 38705 }, { "epoch": 0.3643294117647059, "grad_norm": 0.6091872317347518, "learning_rate": 3.314291110106455e-06, "loss": 0.020397795736789702, "step": 38710 }, { "epoch": 0.3643764705882353, "grad_norm": 0.5554875797782508, "learning_rate": 3.3140770791274134e-06, "loss": 0.01745302081108093, "step": 38715 }, { "epoch": 0.3644235294117647, "grad_norm": 0.501583659013408, "learning_rate": 3.3138630896082272e-06, "loss": 0.013625192642211913, "step": 38720 }, { "epoch": 0.3644705882352941, "grad_norm": 1.2313850627586473, "learning_rate": 3.3136491415355143e-06, "loss": 0.0210229754447937, "step": 38725 }, { "epoch": 0.3645176470588235, "grad_norm": 0.5142639642504974, "learning_rate": 3.3134352348958966e-06, "loss": 0.02113116383552551, "step": 38730 }, { "epoch": 0.36456470588235296, "grad_norm": 0.6065167705140978, "learning_rate": 3.313221369676002e-06, "loss": 0.021822281181812286, "step": 38735 }, { "epoch": 0.36461176470588236, "grad_norm": 0.4230519260956077, "learning_rate": 3.3130075458624655e-06, "loss": 0.019166183471679688, "step": 38740 }, { "epoch": 0.36465882352941176, "grad_norm": 0.6771089916413396, "learning_rate": 3.3127937634419282e-06, "loss": 0.019304299354553224, "step": 38745 }, { "epoch": 0.36470588235294116, "grad_norm": 0.3047421919632153, "learning_rate": 3.3125800224010362e-06, "loss": 0.015575142204761505, "step": 38750 }, { "epoch": 0.3647529411764706, "grad_norm": 0.8199898662219148, "learning_rate": 3.312366322726442e-06, "loss": 0.019750133156776428, "step": 38755 }, { "epoch": 0.3648, "grad_norm": 0.7626386075585103, "learning_rate": 3.312152664404805e-06, "loss": 0.021995735168457032, "step": 38760 }, { "epoch": 0.3648470588235294, "grad_norm": 0.5047791084299321, "learning_rate": 3.3119390474227896e-06, "loss": 0.021156361699104308, "step": 38765 }, { "epoch": 0.3648941176470588, "grad_norm": 0.753472060767352, "learning_rate": 3.311725471767066e-06, "loss": 0.02319740056991577, "step": 38770 }, { "epoch": 0.3649411764705882, "grad_norm": 0.7367077209630991, "learning_rate": 3.3115119374243122e-06, "loss": 0.021249626576900483, "step": 38775 }, { "epoch": 0.36498823529411767, "grad_norm": 0.5686354562124867, "learning_rate": 3.3112984443812095e-06, "loss": 0.023080277442932128, "step": 38780 }, { "epoch": 0.36503529411764707, "grad_norm": 0.5876787755469558, "learning_rate": 3.3110849926244476e-06, "loss": 0.021156534552574158, "step": 38785 }, { "epoch": 0.36508235294117647, "grad_norm": 0.4403394207544455, "learning_rate": 3.3108715821407218e-06, "loss": 0.01931370496749878, "step": 38790 }, { "epoch": 0.36512941176470587, "grad_norm": 1.6468473180131122, "learning_rate": 3.3106582129167314e-06, "loss": 0.024109217524528503, "step": 38795 }, { "epoch": 0.36517647058823527, "grad_norm": 1.0691031210829864, "learning_rate": 3.310444884939184e-06, "loss": 0.019381043314933778, "step": 38800 }, { "epoch": 0.3652235294117647, "grad_norm": 0.6213537931560362, "learning_rate": 3.310231598194793e-06, "loss": 0.020448037981987, "step": 38805 }, { "epoch": 0.3652705882352941, "grad_norm": 0.7914021998855203, "learning_rate": 3.3100183526702755e-06, "loss": 0.021058489382267, "step": 38810 }, { "epoch": 0.3653176470588235, "grad_norm": 0.5556026009104724, "learning_rate": 3.309805148352358e-06, "loss": 0.022059887647628784, "step": 38815 }, { "epoch": 0.3653647058823529, "grad_norm": 0.4544116626906857, "learning_rate": 3.30959198522777e-06, "loss": 0.015407697856426239, "step": 38820 }, { "epoch": 0.3654117647058824, "grad_norm": 0.509622075061383, "learning_rate": 3.3093788632832496e-06, "loss": 0.022161561250686645, "step": 38825 }, { "epoch": 0.3654588235294118, "grad_norm": 0.49430506399763763, "learning_rate": 3.309165782505538e-06, "loss": 0.015278312563896179, "step": 38830 }, { "epoch": 0.3655058823529412, "grad_norm": 0.44503268856021727, "learning_rate": 3.308952742881385e-06, "loss": 0.02169957160949707, "step": 38835 }, { "epoch": 0.3655529411764706, "grad_norm": 0.5299359199370952, "learning_rate": 3.3087397443975443e-06, "loss": 0.017129945755004882, "step": 38840 }, { "epoch": 0.3656, "grad_norm": 0.6194765366859264, "learning_rate": 3.308526787040777e-06, "loss": 0.024951964616775513, "step": 38845 }, { "epoch": 0.36564705882352944, "grad_norm": 0.8010828996953072, "learning_rate": 3.3083138707978507e-06, "loss": 0.022291241586208342, "step": 38850 }, { "epoch": 0.36569411764705884, "grad_norm": 0.587765304169673, "learning_rate": 3.308100995655536e-06, "loss": 0.024641916155815125, "step": 38855 }, { "epoch": 0.36574117647058824, "grad_norm": 0.5761012934907943, "learning_rate": 3.3078881616006127e-06, "loss": 0.01920129656791687, "step": 38860 }, { "epoch": 0.36578823529411764, "grad_norm": 0.6722871612171799, "learning_rate": 3.3076753686198648e-06, "loss": 0.02018236517906189, "step": 38865 }, { "epoch": 0.36583529411764704, "grad_norm": 0.5823472275730283, "learning_rate": 3.307462616700083e-06, "loss": 0.01972467601299286, "step": 38870 }, { "epoch": 0.3658823529411765, "grad_norm": 0.5514355145599604, "learning_rate": 3.307249905828063e-06, "loss": 0.0246821790933609, "step": 38875 }, { "epoch": 0.3659294117647059, "grad_norm": 0.7690239970361361, "learning_rate": 3.3070372359906083e-06, "loss": 0.01918829381465912, "step": 38880 }, { "epoch": 0.3659764705882353, "grad_norm": 0.5608150817474372, "learning_rate": 3.3068246071745263e-06, "loss": 0.021649083495140074, "step": 38885 }, { "epoch": 0.3660235294117647, "grad_norm": 0.5358263818073596, "learning_rate": 3.3066120193666313e-06, "loss": 0.023223716020584106, "step": 38890 }, { "epoch": 0.3660705882352941, "grad_norm": 0.5452848173059753, "learning_rate": 3.306399472553743e-06, "loss": 0.019469195604324342, "step": 38895 }, { "epoch": 0.36611764705882355, "grad_norm": 0.46636795640527556, "learning_rate": 3.3061869667226874e-06, "loss": 0.01890570521354675, "step": 38900 }, { "epoch": 0.36616470588235295, "grad_norm": 0.531472367382912, "learning_rate": 3.305974501860298e-06, "loss": 0.021931460499763487, "step": 38905 }, { "epoch": 0.36621176470588235, "grad_norm": 0.5149812814374816, "learning_rate": 3.305762077953411e-06, "loss": 0.024257022142410278, "step": 38910 }, { "epoch": 0.36625882352941175, "grad_norm": 0.5533472602948671, "learning_rate": 3.3055496949888703e-06, "loss": 0.01986878514289856, "step": 38915 }, { "epoch": 0.3663058823529412, "grad_norm": 0.5898178301560327, "learning_rate": 3.305337352953527e-06, "loss": 0.020512586832046507, "step": 38920 }, { "epoch": 0.3663529411764706, "grad_norm": 0.5421221336020747, "learning_rate": 3.3051250518342354e-06, "loss": 0.018350182473659514, "step": 38925 }, { "epoch": 0.3664, "grad_norm": 0.5150424299483799, "learning_rate": 3.3049127916178574e-06, "loss": 0.018212199211120605, "step": 38930 }, { "epoch": 0.3664470588235294, "grad_norm": 0.41433299065396917, "learning_rate": 3.3047005722912606e-06, "loss": 0.022256509959697725, "step": 38935 }, { "epoch": 0.3664941176470588, "grad_norm": 0.5144486266517352, "learning_rate": 3.304488393841318e-06, "loss": 0.01980305016040802, "step": 38940 }, { "epoch": 0.36654117647058826, "grad_norm": 0.4637695794015229, "learning_rate": 3.3042762562549096e-06, "loss": 0.017482072114944458, "step": 38945 }, { "epoch": 0.36658823529411766, "grad_norm": 0.6814373696788658, "learning_rate": 3.30406415951892e-06, "loss": 0.017771127820014953, "step": 38950 }, { "epoch": 0.36663529411764706, "grad_norm": 0.5307730204971759, "learning_rate": 3.30385210362024e-06, "loss": 0.018149463832378386, "step": 38955 }, { "epoch": 0.36668235294117646, "grad_norm": 0.9420331442290836, "learning_rate": 3.3036400885457672e-06, "loss": 0.019944481551647186, "step": 38960 }, { "epoch": 0.36672941176470586, "grad_norm": 0.4130964155926894, "learning_rate": 3.303428114282403e-06, "loss": 0.022853949666023256, "step": 38965 }, { "epoch": 0.3667764705882353, "grad_norm": 0.6334612220260695, "learning_rate": 3.3032161808170584e-06, "loss": 0.025645729899406434, "step": 38970 }, { "epoch": 0.3668235294117647, "grad_norm": 0.6474123612005964, "learning_rate": 3.3030042881366458e-06, "loss": 0.026590335369110107, "step": 38975 }, { "epoch": 0.3668705882352941, "grad_norm": 0.5970924396699553, "learning_rate": 3.3027924362280865e-06, "loss": 0.01858498752117157, "step": 38980 }, { "epoch": 0.3669176470588235, "grad_norm": 0.4417839028647529, "learning_rate": 3.3025806250783076e-06, "loss": 0.02258150279521942, "step": 38985 }, { "epoch": 0.3669647058823529, "grad_norm": 1.6687434173977733, "learning_rate": 3.30236885467424e-06, "loss": 0.02376256585121155, "step": 38990 }, { "epoch": 0.36701176470588237, "grad_norm": 0.6512195101884692, "learning_rate": 3.3021571250028224e-06, "loss": 0.020295596122741698, "step": 38995 }, { "epoch": 0.36705882352941177, "grad_norm": 0.8579238860588264, "learning_rate": 3.3019454360509983e-06, "loss": 0.02061949372291565, "step": 39000 }, { "epoch": 0.36710588235294117, "grad_norm": 0.4612510515387727, "learning_rate": 3.3017337878057176e-06, "loss": 0.01867530345916748, "step": 39005 }, { "epoch": 0.36715294117647057, "grad_norm": 0.7879557560909674, "learning_rate": 3.301522180253937e-06, "loss": 0.02351529002189636, "step": 39010 }, { "epoch": 0.3672, "grad_norm": 0.4934468031993434, "learning_rate": 3.301310613382616e-06, "loss": 0.016990044713020326, "step": 39015 }, { "epoch": 0.3672470588235294, "grad_norm": 0.47282498744969365, "learning_rate": 3.301099087178724e-06, "loss": 0.026205044984817506, "step": 39020 }, { "epoch": 0.3672941176470588, "grad_norm": 0.35364408335519204, "learning_rate": 3.3008876016292322e-06, "loss": 0.018317893147468567, "step": 39025 }, { "epoch": 0.3673411764705882, "grad_norm": 0.4721692887662209, "learning_rate": 3.3006761567211214e-06, "loss": 0.02146528959274292, "step": 39030 }, { "epoch": 0.3673882352941176, "grad_norm": 0.2888364432352575, "learning_rate": 3.3004647524413754e-06, "loss": 0.014923512935638428, "step": 39035 }, { "epoch": 0.3674352941176471, "grad_norm": 0.43379675098674075, "learning_rate": 3.3002533887769845e-06, "loss": 0.01561598777770996, "step": 39040 }, { "epoch": 0.3674823529411765, "grad_norm": 0.7157573259090882, "learning_rate": 3.3000420657149455e-06, "loss": 0.017783069610595705, "step": 39045 }, { "epoch": 0.3675294117647059, "grad_norm": 0.5034583006194, "learning_rate": 3.2998307832422626e-06, "loss": 0.019331373274326324, "step": 39050 }, { "epoch": 0.3675764705882353, "grad_norm": 0.6282315242068671, "learning_rate": 3.2996195413459407e-06, "loss": 0.020400720834732055, "step": 39055 }, { "epoch": 0.3676235294117647, "grad_norm": 0.7064314280833053, "learning_rate": 3.2994083400129968e-06, "loss": 0.021647630631923674, "step": 39060 }, { "epoch": 0.36767058823529414, "grad_norm": 0.4037751855829221, "learning_rate": 3.299197179230448e-06, "loss": 0.016343812644481658, "step": 39065 }, { "epoch": 0.36771764705882354, "grad_norm": 0.43535067840189384, "learning_rate": 3.2989860589853222e-06, "loss": 0.021842843294143675, "step": 39070 }, { "epoch": 0.36776470588235294, "grad_norm": 0.5831277470754392, "learning_rate": 3.29877497926465e-06, "loss": 0.019639769196510316, "step": 39075 }, { "epoch": 0.36781176470588234, "grad_norm": 0.40687343736562004, "learning_rate": 3.2985639400554685e-06, "loss": 0.020756450295448304, "step": 39080 }, { "epoch": 0.36785882352941174, "grad_norm": 0.43391980559812454, "learning_rate": 3.2983529413448202e-06, "loss": 0.016447687149047853, "step": 39085 }, { "epoch": 0.3679058823529412, "grad_norm": 0.6501294837264886, "learning_rate": 3.298141983119755e-06, "loss": 0.02320494204759598, "step": 39090 }, { "epoch": 0.3679529411764706, "grad_norm": 0.8793503670416633, "learning_rate": 3.2979310653673274e-06, "loss": 0.02212663143873215, "step": 39095 }, { "epoch": 0.368, "grad_norm": 0.5548526410361472, "learning_rate": 3.2977201880745967e-06, "loss": 0.02025871127843857, "step": 39100 }, { "epoch": 0.3680470588235294, "grad_norm": 0.6389581629431108, "learning_rate": 3.297509351228631e-06, "loss": 0.01999495327472687, "step": 39105 }, { "epoch": 0.36809411764705885, "grad_norm": 0.6900270614514982, "learning_rate": 3.2972985548164998e-06, "loss": 0.02110505700111389, "step": 39110 }, { "epoch": 0.36814117647058825, "grad_norm": 0.700271812475368, "learning_rate": 3.2970877988252834e-06, "loss": 0.021459591388702393, "step": 39115 }, { "epoch": 0.36818823529411765, "grad_norm": 0.4825383935725023, "learning_rate": 3.296877083242064e-06, "loss": 0.02104744017124176, "step": 39120 }, { "epoch": 0.36823529411764705, "grad_norm": 0.2827587970236597, "learning_rate": 3.2966664080539313e-06, "loss": 0.018765148520469666, "step": 39125 }, { "epoch": 0.36828235294117645, "grad_norm": 0.4825012459802197, "learning_rate": 3.29645577324798e-06, "loss": 0.022623766958713532, "step": 39130 }, { "epoch": 0.3683294117647059, "grad_norm": 0.8697599572468234, "learning_rate": 3.296245178811312e-06, "loss": 0.025627392530441283, "step": 39135 }, { "epoch": 0.3683764705882353, "grad_norm": 0.45544701451515823, "learning_rate": 3.296034624731033e-06, "loss": 0.021234504878520966, "step": 39140 }, { "epoch": 0.3684235294117647, "grad_norm": 0.46657281229518466, "learning_rate": 3.2958241109942564e-06, "loss": 0.013374242186546325, "step": 39145 }, { "epoch": 0.3684705882352941, "grad_norm": 0.5513055459482702, "learning_rate": 3.2956136375880993e-06, "loss": 0.018638958036899567, "step": 39150 }, { "epoch": 0.3685176470588235, "grad_norm": 0.5504210123590271, "learning_rate": 3.295403204499686e-06, "loss": 0.025193026661872862, "step": 39155 }, { "epoch": 0.36856470588235296, "grad_norm": 0.5051563911682176, "learning_rate": 3.2951928117161465e-06, "loss": 0.01826976239681244, "step": 39160 }, { "epoch": 0.36861176470588236, "grad_norm": 0.7590252983409771, "learning_rate": 3.294982459224616e-06, "loss": 0.018047165870666505, "step": 39165 }, { "epoch": 0.36865882352941176, "grad_norm": 0.6099361540958974, "learning_rate": 3.2947721470122363e-06, "loss": 0.023036077618598938, "step": 39170 }, { "epoch": 0.36870588235294116, "grad_norm": 0.6287445716837688, "learning_rate": 3.294561875066153e-06, "loss": 0.02086782306432724, "step": 39175 }, { "epoch": 0.36875294117647056, "grad_norm": 0.5452703831626412, "learning_rate": 3.29435164337352e-06, "loss": 0.018623828887939453, "step": 39180 }, { "epoch": 0.3688, "grad_norm": 0.39566031004473856, "learning_rate": 3.2941414519214952e-06, "loss": 0.016327418386936188, "step": 39185 }, { "epoch": 0.3688470588235294, "grad_norm": 0.5796121402586638, "learning_rate": 3.2939313006972436e-06, "loss": 0.021426481008529664, "step": 39190 }, { "epoch": 0.3688941176470588, "grad_norm": 0.5166320776948826, "learning_rate": 3.293721189687934e-06, "loss": 0.021370773017406464, "step": 39195 }, { "epoch": 0.3689411764705882, "grad_norm": 0.3927527368675621, "learning_rate": 3.293511118880742e-06, "loss": 0.020049455761909484, "step": 39200 }, { "epoch": 0.3689882352941177, "grad_norm": 0.6638434828504574, "learning_rate": 3.2933010882628496e-06, "loss": 0.021237948536872865, "step": 39205 }, { "epoch": 0.3690352941176471, "grad_norm": 0.5497289758178009, "learning_rate": 3.293091097821444e-06, "loss": 0.019208016991615295, "step": 39210 }, { "epoch": 0.3690823529411765, "grad_norm": 0.5223996903646007, "learning_rate": 3.292881147543718e-06, "loss": 0.021420201659202574, "step": 39215 }, { "epoch": 0.3691294117647059, "grad_norm": 0.48514703767860806, "learning_rate": 3.292671237416869e-06, "loss": 0.020502370595932008, "step": 39220 }, { "epoch": 0.3691764705882353, "grad_norm": 0.562930689099116, "learning_rate": 3.292461367428102e-06, "loss": 0.0225984588265419, "step": 39225 }, { "epoch": 0.36922352941176473, "grad_norm": 0.7227728472763041, "learning_rate": 3.2922515375646276e-06, "loss": 0.018271493911743163, "step": 39230 }, { "epoch": 0.36927058823529413, "grad_norm": 0.739214714036579, "learning_rate": 3.2920417478136598e-06, "loss": 0.02233130931854248, "step": 39235 }, { "epoch": 0.36931764705882353, "grad_norm": 0.411065730864349, "learning_rate": 3.2918319981624213e-06, "loss": 0.018696215748786927, "step": 39240 }, { "epoch": 0.36936470588235293, "grad_norm": 0.7253569606728999, "learning_rate": 3.2916222885981386e-06, "loss": 0.018403926491737367, "step": 39245 }, { "epoch": 0.36941176470588233, "grad_norm": 0.6122998777788208, "learning_rate": 3.2914126191080446e-06, "loss": 0.021674734354019166, "step": 39250 }, { "epoch": 0.3694588235294118, "grad_norm": 0.6362000052462322, "learning_rate": 3.2912029896793773e-06, "loss": 0.01850563585758209, "step": 39255 }, { "epoch": 0.3695058823529412, "grad_norm": 0.49800321767313016, "learning_rate": 3.290993400299381e-06, "loss": 0.0216756671667099, "step": 39260 }, { "epoch": 0.3695529411764706, "grad_norm": 0.5683010003364345, "learning_rate": 3.2907838509553058e-06, "loss": 0.022334879636764525, "step": 39265 }, { "epoch": 0.3696, "grad_norm": 0.29548937010578685, "learning_rate": 3.2905743416344072e-06, "loss": 0.015313613414764404, "step": 39270 }, { "epoch": 0.3696470588235294, "grad_norm": 0.4909133427276434, "learning_rate": 3.2903648723239456e-06, "loss": 0.020784559845924377, "step": 39275 }, { "epoch": 0.36969411764705884, "grad_norm": 1.6181720416929501, "learning_rate": 3.2901554430111886e-06, "loss": 0.02459368109703064, "step": 39280 }, { "epoch": 0.36974117647058824, "grad_norm": 0.6330418539653332, "learning_rate": 3.2899460536834088e-06, "loss": 0.02072093039751053, "step": 39285 }, { "epoch": 0.36978823529411764, "grad_norm": 0.3301754175296466, "learning_rate": 3.2897367043278833e-06, "loss": 0.021815562248229982, "step": 39290 }, { "epoch": 0.36983529411764704, "grad_norm": 0.5136016355149821, "learning_rate": 3.2895273949318972e-06, "loss": 0.024628084897994996, "step": 39295 }, { "epoch": 0.3698823529411765, "grad_norm": 0.5659401498050916, "learning_rate": 3.289318125482739e-06, "loss": 0.019947490096092223, "step": 39300 }, { "epoch": 0.3699294117647059, "grad_norm": 0.55366381101959, "learning_rate": 3.2891088959677044e-06, "loss": 0.024051329493522643, "step": 39305 }, { "epoch": 0.3699764705882353, "grad_norm": 0.6295760523195297, "learning_rate": 3.288899706374094e-06, "loss": 0.016064095497131347, "step": 39310 }, { "epoch": 0.3700235294117647, "grad_norm": 0.5628964532851729, "learning_rate": 3.2886905566892146e-06, "loss": 0.02087089717388153, "step": 39315 }, { "epoch": 0.3700705882352941, "grad_norm": 0.5727317512515808, "learning_rate": 3.288481446900378e-06, "loss": 0.01748593747615814, "step": 39320 }, { "epoch": 0.37011764705882355, "grad_norm": 0.6225272842653028, "learning_rate": 3.288272376994902e-06, "loss": 0.0248067706823349, "step": 39325 }, { "epoch": 0.37016470588235295, "grad_norm": 0.40247475212011924, "learning_rate": 3.28806334696011e-06, "loss": 0.020886734127998352, "step": 39330 }, { "epoch": 0.37021176470588235, "grad_norm": 0.42811123974235715, "learning_rate": 3.2878543567833305e-06, "loss": 0.01872825771570206, "step": 39335 }, { "epoch": 0.37025882352941175, "grad_norm": 0.609223541332455, "learning_rate": 3.2876454064519e-06, "loss": 0.01986919641494751, "step": 39340 }, { "epoch": 0.37030588235294115, "grad_norm": 0.6315338426265905, "learning_rate": 3.2874364959531564e-06, "loss": 0.017161840200424196, "step": 39345 }, { "epoch": 0.3703529411764706, "grad_norm": 0.646311454589068, "learning_rate": 3.2872276252744474e-06, "loss": 0.01883432865142822, "step": 39350 }, { "epoch": 0.3704, "grad_norm": 0.4514567652731652, "learning_rate": 3.287018794403124e-06, "loss": 0.017435526847839354, "step": 39355 }, { "epoch": 0.3704470588235294, "grad_norm": 0.4204616010568248, "learning_rate": 3.286810003326543e-06, "loss": 0.02173553854227066, "step": 39360 }, { "epoch": 0.3704941176470588, "grad_norm": 0.6749622295328337, "learning_rate": 3.2866012520320677e-06, "loss": 0.02242184579372406, "step": 39365 }, { "epoch": 0.37054117647058826, "grad_norm": 0.4754805847504767, "learning_rate": 3.286392540507067e-06, "loss": 0.016459819674491883, "step": 39370 }, { "epoch": 0.37058823529411766, "grad_norm": 0.6047019012650413, "learning_rate": 3.2861838687389138e-06, "loss": 0.02089962661266327, "step": 39375 }, { "epoch": 0.37063529411764706, "grad_norm": 0.3634597022551133, "learning_rate": 3.2859752367149885e-06, "loss": 0.01806398928165436, "step": 39380 }, { "epoch": 0.37068235294117646, "grad_norm": 0.639028968253113, "learning_rate": 3.2857666444226754e-06, "loss": 0.018294277787208556, "step": 39385 }, { "epoch": 0.37072941176470586, "grad_norm": 0.6180588426913347, "learning_rate": 3.285558091849368e-06, "loss": 0.017691406607627868, "step": 39390 }, { "epoch": 0.3707764705882353, "grad_norm": 0.7244653077651626, "learning_rate": 3.2853495789824593e-06, "loss": 0.017670661211013794, "step": 39395 }, { "epoch": 0.3708235294117647, "grad_norm": 0.8132814291639704, "learning_rate": 3.2851411058093536e-06, "loss": 0.021565863490104677, "step": 39400 }, { "epoch": 0.3708705882352941, "grad_norm": 0.4890503541399013, "learning_rate": 3.284932672317458e-06, "loss": 0.020979753136634825, "step": 39405 }, { "epoch": 0.3709176470588235, "grad_norm": 0.7534102629879642, "learning_rate": 3.2847242784941855e-06, "loss": 0.018216176331043242, "step": 39410 }, { "epoch": 0.3709647058823529, "grad_norm": 0.7050360292994962, "learning_rate": 3.284515924326956e-06, "loss": 0.01859447658061981, "step": 39415 }, { "epoch": 0.3710117647058824, "grad_norm": 0.41253995497036805, "learning_rate": 3.2843076098031922e-06, "loss": 0.01666075885295868, "step": 39420 }, { "epoch": 0.3710588235294118, "grad_norm": 0.6449374367459497, "learning_rate": 3.2840993349103257e-06, "loss": 0.02429175078868866, "step": 39425 }, { "epoch": 0.3711058823529412, "grad_norm": 0.6756671020753507, "learning_rate": 3.283891099635791e-06, "loss": 0.017671731114387513, "step": 39430 }, { "epoch": 0.3711529411764706, "grad_norm": 0.575460880587583, "learning_rate": 3.2836829039670303e-06, "loss": 0.020187529921531677, "step": 39435 }, { "epoch": 0.3712, "grad_norm": 0.4376055459165723, "learning_rate": 3.283474747891489e-06, "loss": 0.016121113300323488, "step": 39440 }, { "epoch": 0.37124705882352943, "grad_norm": 0.4475563763618107, "learning_rate": 3.2832666313966204e-06, "loss": 0.0187818706035614, "step": 39445 }, { "epoch": 0.37129411764705883, "grad_norm": 0.5820515545041671, "learning_rate": 3.2830585544698824e-06, "loss": 0.021504333615303038, "step": 39450 }, { "epoch": 0.37134117647058823, "grad_norm": 0.4455297201747594, "learning_rate": 3.2828505170987385e-06, "loss": 0.01950167715549469, "step": 39455 }, { "epoch": 0.37138823529411763, "grad_norm": 0.5578590022408982, "learning_rate": 3.282642519270657e-06, "loss": 0.018948182463645935, "step": 39460 }, { "epoch": 0.3714352941176471, "grad_norm": 0.5241997501451744, "learning_rate": 3.2824345609731127e-06, "loss": 0.018204733729362488, "step": 39465 }, { "epoch": 0.3714823529411765, "grad_norm": 0.5463553056453514, "learning_rate": 3.282226642193586e-06, "loss": 0.021372449398040772, "step": 39470 }, { "epoch": 0.3715294117647059, "grad_norm": 0.5870290285627343, "learning_rate": 3.282018762919563e-06, "loss": 0.02123328447341919, "step": 39475 }, { "epoch": 0.3715764705882353, "grad_norm": 0.7570252678465655, "learning_rate": 3.281810923138533e-06, "loss": 0.020457503199577332, "step": 39480 }, { "epoch": 0.3716235294117647, "grad_norm": 0.6087816552044969, "learning_rate": 3.2816031228379953e-06, "loss": 0.022473621368408202, "step": 39485 }, { "epoch": 0.37167058823529414, "grad_norm": 0.558107847369431, "learning_rate": 3.28139536200545e-06, "loss": 0.02205417603254318, "step": 39490 }, { "epoch": 0.37171764705882354, "grad_norm": 0.5290901488051782, "learning_rate": 3.281187640628407e-06, "loss": 0.017647245526313783, "step": 39495 }, { "epoch": 0.37176470588235294, "grad_norm": 0.548698044698361, "learning_rate": 3.280979958694378e-06, "loss": 0.022305557131767274, "step": 39500 }, { "epoch": 0.37181176470588234, "grad_norm": 0.5901500619552406, "learning_rate": 3.2807723161908826e-06, "loss": 0.017535762488842012, "step": 39505 }, { "epoch": 0.37185882352941174, "grad_norm": 0.5522767122880096, "learning_rate": 3.2805647131054456e-06, "loss": 0.02448088824748993, "step": 39510 }, { "epoch": 0.3719058823529412, "grad_norm": 0.5603479755079098, "learning_rate": 3.280357149425596e-06, "loss": 0.01859053373336792, "step": 39515 }, { "epoch": 0.3719529411764706, "grad_norm": 1.0295375622642815, "learning_rate": 3.28014962513887e-06, "loss": 0.020042648911476134, "step": 39520 }, { "epoch": 0.372, "grad_norm": 0.43895005184628905, "learning_rate": 3.279942140232808e-06, "loss": 0.018149152398109436, "step": 39525 }, { "epoch": 0.3720470588235294, "grad_norm": 0.7149161904994986, "learning_rate": 3.279734694694957e-06, "loss": 0.021396100521087646, "step": 39530 }, { "epoch": 0.3720941176470588, "grad_norm": 0.515971980187776, "learning_rate": 3.279527288512869e-06, "loss": 0.020774754881858825, "step": 39535 }, { "epoch": 0.37214117647058825, "grad_norm": 0.4916234317172006, "learning_rate": 3.279319921674102e-06, "loss": 0.018310296535491943, "step": 39540 }, { "epoch": 0.37218823529411765, "grad_norm": 0.49277133966741943, "learning_rate": 3.279112594166218e-06, "loss": 0.018601128458976747, "step": 39545 }, { "epoch": 0.37223529411764705, "grad_norm": 0.6421621199308222, "learning_rate": 3.2789053059767863e-06, "loss": 0.016077791154384614, "step": 39550 }, { "epoch": 0.37228235294117645, "grad_norm": 0.46762906100205903, "learning_rate": 3.27869805709338e-06, "loss": 0.019494214653968812, "step": 39555 }, { "epoch": 0.3723294117647059, "grad_norm": 0.4651434208163098, "learning_rate": 3.2784908475035805e-06, "loss": 0.022028034925460814, "step": 39560 }, { "epoch": 0.3723764705882353, "grad_norm": 0.7326710801475121, "learning_rate": 3.2782836771949705e-06, "loss": 0.021985021233558655, "step": 39565 }, { "epoch": 0.3724235294117647, "grad_norm": 0.4886153800315121, "learning_rate": 3.278076546155143e-06, "loss": 0.01795682907104492, "step": 39570 }, { "epoch": 0.3724705882352941, "grad_norm": 0.6673129441810943, "learning_rate": 3.277869454371691e-06, "loss": 0.024089425802230835, "step": 39575 }, { "epoch": 0.3725176470588235, "grad_norm": 0.6630581990694906, "learning_rate": 3.277662401832218e-06, "loss": 0.018643808364868165, "step": 39580 }, { "epoch": 0.37256470588235296, "grad_norm": 0.5271443564484793, "learning_rate": 3.277455388524332e-06, "loss": 0.018079376220703124, "step": 39585 }, { "epoch": 0.37261176470588236, "grad_norm": 0.6151188159883495, "learning_rate": 3.2772484144356437e-06, "loss": 0.018630042672157288, "step": 39590 }, { "epoch": 0.37265882352941176, "grad_norm": 0.5877905966112761, "learning_rate": 3.2770414795537714e-06, "loss": 0.021062368154525758, "step": 39595 }, { "epoch": 0.37270588235294116, "grad_norm": 1.4680004085358396, "learning_rate": 3.276834583866338e-06, "loss": 0.01989767551422119, "step": 39600 }, { "epoch": 0.37275294117647056, "grad_norm": 0.5052903472460567, "learning_rate": 3.2766277273609732e-06, "loss": 0.017720735073089598, "step": 39605 }, { "epoch": 0.3728, "grad_norm": 0.8338944926895177, "learning_rate": 3.2764209100253114e-06, "loss": 0.02706643342971802, "step": 39610 }, { "epoch": 0.3728470588235294, "grad_norm": 0.5446999979419916, "learning_rate": 3.2762141318469925e-06, "loss": 0.0213605597615242, "step": 39615 }, { "epoch": 0.3728941176470588, "grad_norm": 0.6203335721128304, "learning_rate": 3.276007392813661e-06, "loss": 0.022666436433792115, "step": 39620 }, { "epoch": 0.3729411764705882, "grad_norm": 0.48454544746411266, "learning_rate": 3.2758006929129677e-06, "loss": 0.02538151144981384, "step": 39625 }, { "epoch": 0.3729882352941176, "grad_norm": 0.7525976075867973, "learning_rate": 3.275594032132569e-06, "loss": 0.019712693989276886, "step": 39630 }, { "epoch": 0.3730352941176471, "grad_norm": 0.402975038361932, "learning_rate": 3.2753874104601272e-06, "loss": 0.02012903392314911, "step": 39635 }, { "epoch": 0.3730823529411765, "grad_norm": 0.7857424435120554, "learning_rate": 3.2751808278833084e-06, "loss": 0.020718197524547576, "step": 39640 }, { "epoch": 0.3731294117647059, "grad_norm": 0.6479790587813464, "learning_rate": 3.274974284389786e-06, "loss": 0.021020907163619994, "step": 39645 }, { "epoch": 0.3731764705882353, "grad_norm": 0.5781824968710617, "learning_rate": 3.274767779967237e-06, "loss": 0.020312952995300292, "step": 39650 }, { "epoch": 0.37322352941176473, "grad_norm": 0.6396784108420497, "learning_rate": 3.2745613146033454e-06, "loss": 0.023844076693058013, "step": 39655 }, { "epoch": 0.37327058823529413, "grad_norm": 0.6545135660950946, "learning_rate": 3.2743548882857997e-06, "loss": 0.02027226835489273, "step": 39660 }, { "epoch": 0.37331764705882353, "grad_norm": 0.7295041031916585, "learning_rate": 3.2741485010022944e-06, "loss": 0.03246963918209076, "step": 39665 }, { "epoch": 0.37336470588235293, "grad_norm": 0.38786533437458115, "learning_rate": 3.2739421527405297e-06, "loss": 0.01780279874801636, "step": 39670 }, { "epoch": 0.37341176470588233, "grad_norm": 0.6941315297499682, "learning_rate": 3.2737358434882092e-06, "loss": 0.02164058983325958, "step": 39675 }, { "epoch": 0.3734588235294118, "grad_norm": 0.6264565930517826, "learning_rate": 3.273529573233045e-06, "loss": 0.025335592031478883, "step": 39680 }, { "epoch": 0.3735058823529412, "grad_norm": 0.3455529306945131, "learning_rate": 3.2733233419627524e-06, "loss": 0.017875300347805025, "step": 39685 }, { "epoch": 0.3735529411764706, "grad_norm": 0.5077816420762442, "learning_rate": 3.2731171496650533e-06, "loss": 0.019906048476696015, "step": 39690 }, { "epoch": 0.3736, "grad_norm": 0.570814754860317, "learning_rate": 3.2729109963276735e-06, "loss": 0.019180928170681, "step": 39695 }, { "epoch": 0.3736470588235294, "grad_norm": 0.6704059051711814, "learning_rate": 3.2727048819383463e-06, "loss": 0.020276093482971193, "step": 39700 }, { "epoch": 0.37369411764705884, "grad_norm": 0.5628269493584589, "learning_rate": 3.2724988064848075e-06, "loss": 0.021843601763248444, "step": 39705 }, { "epoch": 0.37374117647058824, "grad_norm": 0.6972390489303555, "learning_rate": 3.2722927699548026e-06, "loss": 0.02173524647951126, "step": 39710 }, { "epoch": 0.37378823529411764, "grad_norm": 0.575607902993943, "learning_rate": 3.2720867723360784e-06, "loss": 0.020673218369483947, "step": 39715 }, { "epoch": 0.37383529411764704, "grad_norm": 0.41447614099259433, "learning_rate": 3.2718808136163886e-06, "loss": 0.021614024043083192, "step": 39720 }, { "epoch": 0.37388235294117644, "grad_norm": 0.7007123785216038, "learning_rate": 3.2716748937834934e-06, "loss": 0.018900610506534576, "step": 39725 }, { "epoch": 0.3739294117647059, "grad_norm": 0.6003778203139859, "learning_rate": 3.271469012825157e-06, "loss": 0.02201523780822754, "step": 39730 }, { "epoch": 0.3739764705882353, "grad_norm": 0.6837473735344984, "learning_rate": 3.271263170729149e-06, "loss": 0.03058650195598602, "step": 39735 }, { "epoch": 0.3740235294117647, "grad_norm": 0.6026919737758989, "learning_rate": 3.2710573674832454e-06, "loss": 0.016554926335811616, "step": 39740 }, { "epoch": 0.3740705882352941, "grad_norm": 0.5517185473374989, "learning_rate": 3.2708516030752265e-06, "loss": 0.019874031841754913, "step": 39745 }, { "epoch": 0.37411764705882355, "grad_norm": 0.6436905845527567, "learning_rate": 3.270645877492878e-06, "loss": 0.015752531588077545, "step": 39750 }, { "epoch": 0.37416470588235295, "grad_norm": 0.6362110568031017, "learning_rate": 3.2704401907239925e-06, "loss": 0.02021375745534897, "step": 39755 }, { "epoch": 0.37421176470588235, "grad_norm": 0.3476425339265173, "learning_rate": 3.270234542756366e-06, "loss": 0.019663888216018676, "step": 39760 }, { "epoch": 0.37425882352941175, "grad_norm": 0.6104981456580191, "learning_rate": 3.2700289335778013e-06, "loss": 0.02299458682537079, "step": 39765 }, { "epoch": 0.37430588235294115, "grad_norm": 0.5244247459526982, "learning_rate": 3.2698233631761064e-06, "loss": 0.017274612188339235, "step": 39770 }, { "epoch": 0.3743529411764706, "grad_norm": 0.3540961721698128, "learning_rate": 3.2696178315390933e-06, "loss": 0.01718716323375702, "step": 39775 }, { "epoch": 0.3744, "grad_norm": 0.3839446651657405, "learning_rate": 3.26941233865458e-06, "loss": 0.017046204209327696, "step": 39780 }, { "epoch": 0.3744470588235294, "grad_norm": 0.6342768564715443, "learning_rate": 3.2692068845103924e-06, "loss": 0.020778751373291014, "step": 39785 }, { "epoch": 0.3744941176470588, "grad_norm": 0.793457520875053, "learning_rate": 3.269001469094357e-06, "loss": 0.019374434649944306, "step": 39790 }, { "epoch": 0.3745411764705882, "grad_norm": 0.6215701482482583, "learning_rate": 3.26879609239431e-06, "loss": 0.019509604573249816, "step": 39795 }, { "epoch": 0.37458823529411767, "grad_norm": 0.5659851333683893, "learning_rate": 3.2685907543980903e-06, "loss": 0.021240074932575227, "step": 39800 }, { "epoch": 0.37463529411764707, "grad_norm": 0.4696587706335698, "learning_rate": 3.268385455093544e-06, "loss": 0.022650046646595, "step": 39805 }, { "epoch": 0.37468235294117647, "grad_norm": 0.4490176780189948, "learning_rate": 3.26818019446852e-06, "loss": 0.015841007232666016, "step": 39810 }, { "epoch": 0.37472941176470587, "grad_norm": 0.4482646214476163, "learning_rate": 3.267974972510875e-06, "loss": 0.024217528104782105, "step": 39815 }, { "epoch": 0.37477647058823527, "grad_norm": 0.5516983056704622, "learning_rate": 3.2677697892084703e-06, "loss": 0.019246891140937805, "step": 39820 }, { "epoch": 0.3748235294117647, "grad_norm": 0.45135713342571715, "learning_rate": 3.2675646445491716e-06, "loss": 0.02022491693496704, "step": 39825 }, { "epoch": 0.3748705882352941, "grad_norm": 0.5213246194772005, "learning_rate": 3.2673595385208522e-06, "loss": 0.015363629162311553, "step": 39830 }, { "epoch": 0.3749176470588235, "grad_norm": 0.5620975330112257, "learning_rate": 3.2671544711113873e-06, "loss": 0.02022155374288559, "step": 39835 }, { "epoch": 0.3749647058823529, "grad_norm": 0.6370978047776836, "learning_rate": 3.2669494423086613e-06, "loss": 0.020748281478881837, "step": 39840 }, { "epoch": 0.3750117647058824, "grad_norm": 0.6925111458277717, "learning_rate": 3.2667444521005596e-06, "loss": 0.024808873236179353, "step": 39845 }, { "epoch": 0.3750588235294118, "grad_norm": 0.553728939933696, "learning_rate": 3.2665395004749777e-06, "loss": 0.021462652087211608, "step": 39850 }, { "epoch": 0.3751058823529412, "grad_norm": 0.5403262892904691, "learning_rate": 3.2663345874198126e-06, "loss": 0.021834270656108858, "step": 39855 }, { "epoch": 0.3751529411764706, "grad_norm": 0.6512940081031303, "learning_rate": 3.266129712922969e-06, "loss": 0.02286500036716461, "step": 39860 }, { "epoch": 0.3752, "grad_norm": 0.5487378005790946, "learning_rate": 3.2659248769723552e-06, "loss": 0.017790013551712038, "step": 39865 }, { "epoch": 0.37524705882352943, "grad_norm": 0.5665965752536928, "learning_rate": 3.2657200795558857e-06, "loss": 0.02176848202943802, "step": 39870 }, { "epoch": 0.37529411764705883, "grad_norm": 0.722236210406319, "learning_rate": 3.26551532066148e-06, "loss": 0.020275714993476867, "step": 39875 }, { "epoch": 0.37534117647058823, "grad_norm": 0.692022789811631, "learning_rate": 3.2653106002770634e-06, "loss": 0.017785288393497467, "step": 39880 }, { "epoch": 0.37538823529411763, "grad_norm": 0.6252220452786015, "learning_rate": 3.265105918390565e-06, "loss": 0.017077353596687318, "step": 39885 }, { "epoch": 0.37543529411764703, "grad_norm": 0.699146484821938, "learning_rate": 3.2649012749899228e-06, "loss": 0.01864645630121231, "step": 39890 }, { "epoch": 0.3754823529411765, "grad_norm": 0.5239949360621402, "learning_rate": 3.264696670063075e-06, "loss": 0.017909285426139832, "step": 39895 }, { "epoch": 0.3755294117647059, "grad_norm": 0.7646595204689, "learning_rate": 3.2644921035979698e-06, "loss": 0.023523786664009096, "step": 39900 }, { "epoch": 0.3755764705882353, "grad_norm": 0.4120895550356718, "learning_rate": 3.2642875755825572e-06, "loss": 0.022552160918712615, "step": 39905 }, { "epoch": 0.3756235294117647, "grad_norm": 0.5663596364881324, "learning_rate": 3.264083086004794e-06, "loss": 0.022717756032943726, "step": 39910 }, { "epoch": 0.37567058823529415, "grad_norm": 0.4179202456824557, "learning_rate": 3.2638786348526437e-06, "loss": 0.014551883935928345, "step": 39915 }, { "epoch": 0.37571764705882355, "grad_norm": 0.5266970732231769, "learning_rate": 3.263674222114071e-06, "loss": 0.019927284121513365, "step": 39920 }, { "epoch": 0.37576470588235295, "grad_norm": 0.5865167117928438, "learning_rate": 3.263469847777051e-06, "loss": 0.019547474384307862, "step": 39925 }, { "epoch": 0.37581176470588235, "grad_norm": 0.4116519905419593, "learning_rate": 3.263265511829559e-06, "loss": 0.015958404541015624, "step": 39930 }, { "epoch": 0.37585882352941175, "grad_norm": 0.6650244786720206, "learning_rate": 3.2630612142595807e-06, "loss": 0.023105157911777495, "step": 39935 }, { "epoch": 0.3759058823529412, "grad_norm": 0.628411455841565, "learning_rate": 3.2628569550551033e-06, "loss": 0.022743435204029085, "step": 39940 }, { "epoch": 0.3759529411764706, "grad_norm": 0.4838051942160108, "learning_rate": 3.2626527342041192e-06, "loss": 0.01753529757261276, "step": 39945 }, { "epoch": 0.376, "grad_norm": 0.5808595453122921, "learning_rate": 3.262448551694629e-06, "loss": 0.023848621547222136, "step": 39950 }, { "epoch": 0.3760470588235294, "grad_norm": 0.8431726662908379, "learning_rate": 3.2622444075146357e-06, "loss": 0.018259093165397644, "step": 39955 }, { "epoch": 0.3760941176470588, "grad_norm": 0.5113073560195132, "learning_rate": 3.262040301652149e-06, "loss": 0.020037752389907838, "step": 39960 }, { "epoch": 0.37614117647058826, "grad_norm": 0.6044703111871225, "learning_rate": 3.2618362340951843e-06, "loss": 0.019388166069984437, "step": 39965 }, { "epoch": 0.37618823529411766, "grad_norm": 0.5508982719541755, "learning_rate": 3.26163220483176e-06, "loss": 0.018922793865203857, "step": 39970 }, { "epoch": 0.37623529411764706, "grad_norm": 0.5932851247410539, "learning_rate": 3.2614282138499022e-06, "loss": 0.01831653416156769, "step": 39975 }, { "epoch": 0.37628235294117646, "grad_norm": 0.4468588939773106, "learning_rate": 3.261224261137641e-06, "loss": 0.01989995539188385, "step": 39980 }, { "epoch": 0.37632941176470586, "grad_norm": 0.47387818262615805, "learning_rate": 3.261020346683012e-06, "loss": 0.018971720337867738, "step": 39985 }, { "epoch": 0.3763764705882353, "grad_norm": 0.7330760640916165, "learning_rate": 3.260816470474057e-06, "loss": 0.018120849132537843, "step": 39990 }, { "epoch": 0.3764235294117647, "grad_norm": 0.5043748741058736, "learning_rate": 3.2606126324988197e-06, "loss": 0.021114909648895265, "step": 39995 }, { "epoch": 0.3764705882352941, "grad_norm": 0.41966384112227817, "learning_rate": 3.260408832745353e-06, "loss": 0.023005166649818422, "step": 40000 }, { "epoch": 0.3765176470588235, "grad_norm": 0.7933182828491263, "learning_rate": 3.260205071201714e-06, "loss": 0.025297072529792786, "step": 40005 }, { "epoch": 0.37656470588235297, "grad_norm": 0.2941617784675911, "learning_rate": 3.2600013478559633e-06, "loss": 0.01982952654361725, "step": 40010 }, { "epoch": 0.37661176470588237, "grad_norm": 0.6322822282298742, "learning_rate": 3.2597976626961682e-06, "loss": 0.017569534480571747, "step": 40015 }, { "epoch": 0.37665882352941177, "grad_norm": 0.5337354191453572, "learning_rate": 3.2595940157104007e-06, "loss": 0.022594697773456573, "step": 40020 }, { "epoch": 0.37670588235294117, "grad_norm": 0.7547831181383919, "learning_rate": 3.2593904068867383e-06, "loss": 0.024693579971790315, "step": 40025 }, { "epoch": 0.37675294117647057, "grad_norm": 0.6037954515109072, "learning_rate": 3.259186836213264e-06, "loss": 0.0213726669549942, "step": 40030 }, { "epoch": 0.3768, "grad_norm": 0.5209158284482746, "learning_rate": 3.2589833036780653e-06, "loss": 0.019069841504096983, "step": 40035 }, { "epoch": 0.3768470588235294, "grad_norm": 0.34031529675869726, "learning_rate": 3.2587798092692357e-06, "loss": 0.014612405002117157, "step": 40040 }, { "epoch": 0.3768941176470588, "grad_norm": 0.5180624112318681, "learning_rate": 3.258576352974872e-06, "loss": 0.01834573894739151, "step": 40045 }, { "epoch": 0.3769411764705882, "grad_norm": 0.48597707430750414, "learning_rate": 3.2583729347830793e-06, "loss": 0.017894384264945985, "step": 40050 }, { "epoch": 0.3769882352941176, "grad_norm": 0.5393605226808917, "learning_rate": 3.258169554681965e-06, "loss": 0.02288229763507843, "step": 40055 }, { "epoch": 0.3770352941176471, "grad_norm": 0.5346410792582158, "learning_rate": 3.257966212659644e-06, "loss": 0.022061419486999512, "step": 40060 }, { "epoch": 0.3770823529411765, "grad_norm": 0.6372465808995532, "learning_rate": 3.257762908704235e-06, "loss": 0.02292472720146179, "step": 40065 }, { "epoch": 0.3771294117647059, "grad_norm": 0.32137447214083636, "learning_rate": 3.2575596428038612e-06, "loss": 0.020890182256698607, "step": 40070 }, { "epoch": 0.3771764705882353, "grad_norm": 0.4933751440162188, "learning_rate": 3.2573564149466533e-06, "loss": 0.02135975956916809, "step": 40075 }, { "epoch": 0.3772235294117647, "grad_norm": 0.5028222726877701, "learning_rate": 3.257153225120745e-06, "loss": 0.020970943570137023, "step": 40080 }, { "epoch": 0.37727058823529414, "grad_norm": 0.650920633992544, "learning_rate": 3.2569500733142767e-06, "loss": 0.01902173161506653, "step": 40085 }, { "epoch": 0.37731764705882354, "grad_norm": 0.474271953241337, "learning_rate": 3.2567469595153927e-06, "loss": 0.020633912086486815, "step": 40090 }, { "epoch": 0.37736470588235294, "grad_norm": 0.7415727097727469, "learning_rate": 3.256543883712244e-06, "loss": 0.021939995884895324, "step": 40095 }, { "epoch": 0.37741176470588234, "grad_norm": 0.594721247113896, "learning_rate": 3.2563408458929852e-06, "loss": 0.021068474650382994, "step": 40100 }, { "epoch": 0.3774588235294118, "grad_norm": 0.48884124372955173, "learning_rate": 3.2561378460457765e-06, "loss": 0.01925729513168335, "step": 40105 }, { "epoch": 0.3775058823529412, "grad_norm": 0.6014837167636492, "learning_rate": 3.255934884158784e-06, "loss": 0.023472651839256287, "step": 40110 }, { "epoch": 0.3775529411764706, "grad_norm": 0.388227236636656, "learning_rate": 3.255731960220178e-06, "loss": 0.018035250902175903, "step": 40115 }, { "epoch": 0.3776, "grad_norm": 0.5149638373559445, "learning_rate": 3.255529074218135e-06, "loss": 0.019356933236122132, "step": 40120 }, { "epoch": 0.3776470588235294, "grad_norm": 0.6166499159275448, "learning_rate": 3.255326226140837e-06, "loss": 0.022906915843486787, "step": 40125 }, { "epoch": 0.37769411764705885, "grad_norm": 0.56179673810858, "learning_rate": 3.255123415976468e-06, "loss": 0.017339050769805908, "step": 40130 }, { "epoch": 0.37774117647058825, "grad_norm": 0.41948451620210825, "learning_rate": 3.2549206437132208e-06, "loss": 0.016900093853473665, "step": 40135 }, { "epoch": 0.37778823529411765, "grad_norm": 0.6048721573991711, "learning_rate": 3.2547179093392918e-06, "loss": 0.023684930801391602, "step": 40140 }, { "epoch": 0.37783529411764705, "grad_norm": 0.6381539187962745, "learning_rate": 3.2545152128428825e-06, "loss": 0.025686678290367127, "step": 40145 }, { "epoch": 0.37788235294117645, "grad_norm": 0.6575256035400174, "learning_rate": 3.2543125542121995e-06, "loss": 0.026779964566230774, "step": 40150 }, { "epoch": 0.3779294117647059, "grad_norm": 0.514629341657911, "learning_rate": 3.254109933435455e-06, "loss": 0.02057448774576187, "step": 40155 }, { "epoch": 0.3779764705882353, "grad_norm": 0.4395308898201289, "learning_rate": 3.2539073505008667e-06, "loss": 0.018505558371543884, "step": 40160 }, { "epoch": 0.3780235294117647, "grad_norm": 0.5503650803462111, "learning_rate": 3.2537048053966556e-06, "loss": 0.014751306176185608, "step": 40165 }, { "epoch": 0.3780705882352941, "grad_norm": 0.559090397463391, "learning_rate": 3.25350229811105e-06, "loss": 0.02487761974334717, "step": 40170 }, { "epoch": 0.3781176470588235, "grad_norm": 0.5030172829544898, "learning_rate": 3.253299828632283e-06, "loss": 0.0193744957447052, "step": 40175 }, { "epoch": 0.37816470588235296, "grad_norm": 0.43153107285230624, "learning_rate": 3.2530973969485915e-06, "loss": 0.02454274594783783, "step": 40180 }, { "epoch": 0.37821176470588236, "grad_norm": 0.4605454282150642, "learning_rate": 3.2528950030482178e-06, "loss": 0.01976584643125534, "step": 40185 }, { "epoch": 0.37825882352941176, "grad_norm": 0.4703793186673055, "learning_rate": 3.25269264691941e-06, "loss": 0.02257867008447647, "step": 40190 }, { "epoch": 0.37830588235294116, "grad_norm": 0.38993731144486254, "learning_rate": 3.2524903285504213e-06, "loss": 0.016028571128845214, "step": 40195 }, { "epoch": 0.3783529411764706, "grad_norm": 0.470683252075955, "learning_rate": 3.2522880479295104e-06, "loss": 0.01914307028055191, "step": 40200 }, { "epoch": 0.3784, "grad_norm": 0.7385181375877117, "learning_rate": 3.2520858050449396e-06, "loss": 0.022266796231269835, "step": 40205 }, { "epoch": 0.3784470588235294, "grad_norm": 0.372087191089287, "learning_rate": 3.2518835998849775e-06, "loss": 0.019001477956771852, "step": 40210 }, { "epoch": 0.3784941176470588, "grad_norm": 0.5954965914370449, "learning_rate": 3.251681432437897e-06, "loss": 0.019706591963768005, "step": 40215 }, { "epoch": 0.3785411764705882, "grad_norm": 0.665656926581016, "learning_rate": 3.251479302691978e-06, "loss": 0.017436721920967103, "step": 40220 }, { "epoch": 0.37858823529411767, "grad_norm": 0.6046814458655172, "learning_rate": 3.251277210635503e-06, "loss": 0.0202983096241951, "step": 40225 }, { "epoch": 0.37863529411764707, "grad_norm": 0.4051552531744299, "learning_rate": 3.2510751562567604e-06, "loss": 0.017985741794109344, "step": 40230 }, { "epoch": 0.37868235294117647, "grad_norm": 0.6542434228743783, "learning_rate": 3.2508731395440456e-06, "loss": 0.018424974381923677, "step": 40235 }, { "epoch": 0.37872941176470587, "grad_norm": 0.4375757818004521, "learning_rate": 3.250671160485656e-06, "loss": 0.012442480027675628, "step": 40240 }, { "epoch": 0.37877647058823527, "grad_norm": 0.5143023905308822, "learning_rate": 3.2504692190698966e-06, "loss": 0.018899089097976683, "step": 40245 }, { "epoch": 0.3788235294117647, "grad_norm": 0.7390660491667339, "learning_rate": 3.2502673152850763e-06, "loss": 0.01780053526163101, "step": 40250 }, { "epoch": 0.3788705882352941, "grad_norm": 0.4028025444163904, "learning_rate": 3.2500654491195087e-06, "loss": 0.01900675892829895, "step": 40255 }, { "epoch": 0.3789176470588235, "grad_norm": 0.4656318564539522, "learning_rate": 3.2498636205615136e-06, "loss": 0.016849732398986815, "step": 40260 }, { "epoch": 0.3789647058823529, "grad_norm": 0.5998689173072562, "learning_rate": 3.2496618295994146e-06, "loss": 0.018754670023918153, "step": 40265 }, { "epoch": 0.3790117647058823, "grad_norm": 0.39943173632171314, "learning_rate": 3.249460076221542e-06, "loss": 0.019679459929466247, "step": 40270 }, { "epoch": 0.3790588235294118, "grad_norm": 0.8828999988161937, "learning_rate": 3.2492583604162304e-06, "loss": 0.019170597195625305, "step": 40275 }, { "epoch": 0.3791058823529412, "grad_norm": 0.5448890068381163, "learning_rate": 3.249056682171818e-06, "loss": 0.023697033524513245, "step": 40280 }, { "epoch": 0.3791529411764706, "grad_norm": 0.4030786253989069, "learning_rate": 3.2488550414766506e-06, "loss": 0.02034306526184082, "step": 40285 }, { "epoch": 0.3792, "grad_norm": 0.7035185798409805, "learning_rate": 3.2486534383190784e-06, "loss": 0.02157864421606064, "step": 40290 }, { "epoch": 0.37924705882352944, "grad_norm": 0.5232608052036033, "learning_rate": 3.2484518726874543e-06, "loss": 0.024881644546985625, "step": 40295 }, { "epoch": 0.37929411764705884, "grad_norm": 0.5848548632975037, "learning_rate": 3.248250344570139e-06, "loss": 0.01623763144016266, "step": 40300 }, { "epoch": 0.37934117647058824, "grad_norm": 0.5652005580727005, "learning_rate": 3.2480488539554978e-06, "loss": 0.023256188631057738, "step": 40305 }, { "epoch": 0.37938823529411764, "grad_norm": 0.4929897026026688, "learning_rate": 3.2478474008318995e-06, "loss": 0.017029246687889098, "step": 40310 }, { "epoch": 0.37943529411764704, "grad_norm": 0.46678978246729863, "learning_rate": 3.2476459851877206e-06, "loss": 0.016894268989562988, "step": 40315 }, { "epoch": 0.3794823529411765, "grad_norm": 0.7239408191060612, "learning_rate": 3.24744460701134e-06, "loss": 0.0174432709813118, "step": 40320 }, { "epoch": 0.3795294117647059, "grad_norm": 1.2408137192807862, "learning_rate": 3.2472432662911422e-06, "loss": 0.023255354166030882, "step": 40325 }, { "epoch": 0.3795764705882353, "grad_norm": 0.43964205804231127, "learning_rate": 3.247041963015519e-06, "loss": 0.01881006360054016, "step": 40330 }, { "epoch": 0.3796235294117647, "grad_norm": 0.5878957644440573, "learning_rate": 3.246840697172864e-06, "loss": 0.01570252627134323, "step": 40335 }, { "epoch": 0.3796705882352941, "grad_norm": 0.4548443409324391, "learning_rate": 3.2466394687515783e-06, "loss": 0.015645523369312287, "step": 40340 }, { "epoch": 0.37971764705882355, "grad_norm": 0.5239406440401021, "learning_rate": 3.2464382777400664e-06, "loss": 0.019019870460033415, "step": 40345 }, { "epoch": 0.37976470588235295, "grad_norm": 0.40461721274040546, "learning_rate": 3.2462371241267393e-06, "loss": 0.019063693284988404, "step": 40350 }, { "epoch": 0.37981176470588235, "grad_norm": 0.6822247619474624, "learning_rate": 3.246036007900011e-06, "loss": 0.018651488423347472, "step": 40355 }, { "epoch": 0.37985882352941175, "grad_norm": 0.716679234069935, "learning_rate": 3.245834929048302e-06, "loss": 0.02255651354789734, "step": 40360 }, { "epoch": 0.37990588235294115, "grad_norm": 0.49075483391851216, "learning_rate": 3.2456338875600384e-06, "loss": 0.019541701674461363, "step": 40365 }, { "epoch": 0.3799529411764706, "grad_norm": 0.5925797528877966, "learning_rate": 3.245432883423651e-06, "loss": 0.021118339896202088, "step": 40370 }, { "epoch": 0.38, "grad_norm": 0.5994367738603664, "learning_rate": 3.245231916627573e-06, "loss": 0.01689388006925583, "step": 40375 }, { "epoch": 0.3800470588235294, "grad_norm": 0.6463653276081955, "learning_rate": 3.245030987160247e-06, "loss": 0.020615187287330628, "step": 40380 }, { "epoch": 0.3800941176470588, "grad_norm": 0.46584070399789806, "learning_rate": 3.2448300950101164e-06, "loss": 0.02185865044593811, "step": 40385 }, { "epoch": 0.38014117647058826, "grad_norm": 0.5911548065388516, "learning_rate": 3.2446292401656326e-06, "loss": 0.021741291880607604, "step": 40390 }, { "epoch": 0.38018823529411766, "grad_norm": 0.733642265261239, "learning_rate": 3.244428422615251e-06, "loss": 0.016972702741622925, "step": 40395 }, { "epoch": 0.38023529411764706, "grad_norm": 0.36339317312110636, "learning_rate": 3.2442276423474312e-06, "loss": 0.01628466695547104, "step": 40400 }, { "epoch": 0.38028235294117646, "grad_norm": 0.6253682057194128, "learning_rate": 3.2440268993506395e-06, "loss": 0.023148462176322937, "step": 40405 }, { "epoch": 0.38032941176470586, "grad_norm": 0.5804665848267494, "learning_rate": 3.2438261936133453e-06, "loss": 0.022673670947551728, "step": 40410 }, { "epoch": 0.3803764705882353, "grad_norm": 0.5285600562598914, "learning_rate": 3.243625525124025e-06, "loss": 0.02251197099685669, "step": 40415 }, { "epoch": 0.3804235294117647, "grad_norm": 0.58017294307414, "learning_rate": 3.2434248938711583e-06, "loss": 0.02295785993337631, "step": 40420 }, { "epoch": 0.3804705882352941, "grad_norm": 0.6452244137430456, "learning_rate": 3.24322429984323e-06, "loss": 0.025835072994232176, "step": 40425 }, { "epoch": 0.3805176470588235, "grad_norm": 0.5296830607168652, "learning_rate": 3.243023743028731e-06, "loss": 0.023533466458320617, "step": 40430 }, { "epoch": 0.3805647058823529, "grad_norm": 0.52330076414457, "learning_rate": 3.2428232234161568e-06, "loss": 0.02114410698413849, "step": 40435 }, { "epoch": 0.38061176470588237, "grad_norm": 0.5634784294057901, "learning_rate": 3.242622740994007e-06, "loss": 0.01927213966846466, "step": 40440 }, { "epoch": 0.38065882352941177, "grad_norm": 0.6082093858989661, "learning_rate": 3.2424222957507873e-06, "loss": 0.019026653468608858, "step": 40445 }, { "epoch": 0.38070588235294117, "grad_norm": 0.4740221538519271, "learning_rate": 3.2422218876750073e-06, "loss": 0.02320934534072876, "step": 40450 }, { "epoch": 0.38075294117647057, "grad_norm": 0.4311399106787998, "learning_rate": 3.2420215167551826e-06, "loss": 0.020012339949607848, "step": 40455 }, { "epoch": 0.3808, "grad_norm": 0.4632217176421117, "learning_rate": 3.241821182979834e-06, "loss": 0.013330155611038208, "step": 40460 }, { "epoch": 0.3808470588235294, "grad_norm": 0.5497950592419164, "learning_rate": 3.241620886337486e-06, "loss": 0.01737416684627533, "step": 40465 }, { "epoch": 0.3808941176470588, "grad_norm": 0.5313311778470752, "learning_rate": 3.2414206268166677e-06, "loss": 0.014367970824241637, "step": 40470 }, { "epoch": 0.3809411764705882, "grad_norm": 0.7653370013417369, "learning_rate": 3.2412204044059154e-06, "loss": 0.021140654385089875, "step": 40475 }, { "epoch": 0.3809882352941176, "grad_norm": 0.46337441734496776, "learning_rate": 3.241020219093769e-06, "loss": 0.021748965978622435, "step": 40480 }, { "epoch": 0.3810352941176471, "grad_norm": 0.4363854142235331, "learning_rate": 3.2408200708687726e-06, "loss": 0.018704453110694887, "step": 40485 }, { "epoch": 0.3810823529411765, "grad_norm": 0.5628037401936196, "learning_rate": 3.240619959719477e-06, "loss": 0.01514890193939209, "step": 40490 }, { "epoch": 0.3811294117647059, "grad_norm": 0.4540590420526239, "learning_rate": 3.2404198856344365e-06, "loss": 0.020088696479797365, "step": 40495 }, { "epoch": 0.3811764705882353, "grad_norm": 0.5403643058120546, "learning_rate": 3.240219848602211e-06, "loss": 0.02492585629224777, "step": 40500 }, { "epoch": 0.3812235294117647, "grad_norm": 0.6569668468058701, "learning_rate": 3.240019848611366e-06, "loss": 0.020195811986923218, "step": 40505 }, { "epoch": 0.38127058823529414, "grad_norm": 0.4121351515319014, "learning_rate": 3.23981988565047e-06, "loss": 0.01486644446849823, "step": 40510 }, { "epoch": 0.38131764705882354, "grad_norm": 0.45153108843943696, "learning_rate": 3.2396199597080975e-06, "loss": 0.02320496439933777, "step": 40515 }, { "epoch": 0.38136470588235294, "grad_norm": 0.4738507991532003, "learning_rate": 3.2394200707728286e-06, "loss": 0.01977382004261017, "step": 40520 }, { "epoch": 0.38141176470588234, "grad_norm": 0.4811401250301859, "learning_rate": 3.2392202188332476e-06, "loss": 0.017659388482570648, "step": 40525 }, { "epoch": 0.38145882352941174, "grad_norm": 0.41212355669425865, "learning_rate": 3.2390204038779457e-06, "loss": 0.020831488072872162, "step": 40530 }, { "epoch": 0.3815058823529412, "grad_norm": 0.9905468860190241, "learning_rate": 3.2388206258955137e-06, "loss": 0.020870402455329895, "step": 40535 }, { "epoch": 0.3815529411764706, "grad_norm": 0.5097183123114942, "learning_rate": 3.238620884874553e-06, "loss": 0.018252411484718324, "step": 40540 }, { "epoch": 0.3816, "grad_norm": 0.5558461230972906, "learning_rate": 3.2384211808036674e-06, "loss": 0.020527687668800355, "step": 40545 }, { "epoch": 0.3816470588235294, "grad_norm": 0.4801356021721045, "learning_rate": 3.2382215136714673e-06, "loss": 0.0184334933757782, "step": 40550 }, { "epoch": 0.38169411764705885, "grad_norm": 0.6593656376143465, "learning_rate": 3.238021883466564e-06, "loss": 0.029647642374038698, "step": 40555 }, { "epoch": 0.38174117647058825, "grad_norm": 0.7268839263871718, "learning_rate": 3.2378222901775787e-06, "loss": 0.019248220324516296, "step": 40560 }, { "epoch": 0.38178823529411765, "grad_norm": 0.5724372862396004, "learning_rate": 3.2376227337931336e-06, "loss": 0.0188149631023407, "step": 40565 }, { "epoch": 0.38183529411764705, "grad_norm": 0.5834656730543472, "learning_rate": 3.237423214301859e-06, "loss": 0.020013260841369628, "step": 40570 }, { "epoch": 0.38188235294117645, "grad_norm": 0.447724079505318, "learning_rate": 3.2372237316923875e-06, "loss": 0.018402916193008424, "step": 40575 }, { "epoch": 0.3819294117647059, "grad_norm": 0.6645417933516773, "learning_rate": 3.237024285953358e-06, "loss": 0.02650604248046875, "step": 40580 }, { "epoch": 0.3819764705882353, "grad_norm": 0.4733774739697373, "learning_rate": 3.2368248770734136e-06, "loss": 0.021013829112052917, "step": 40585 }, { "epoch": 0.3820235294117647, "grad_norm": 0.5570573298973883, "learning_rate": 3.236625505041203e-06, "loss": 0.025955939292907716, "step": 40590 }, { "epoch": 0.3820705882352941, "grad_norm": 0.9284578247535926, "learning_rate": 3.236426169845379e-06, "loss": 0.019540350139141082, "step": 40595 }, { "epoch": 0.3821176470588235, "grad_norm": 0.6472946543819711, "learning_rate": 3.2362268714746004e-06, "loss": 0.01738675534725189, "step": 40600 }, { "epoch": 0.38216470588235296, "grad_norm": 0.48700973535413844, "learning_rate": 3.23602760991753e-06, "loss": 0.02538301944732666, "step": 40605 }, { "epoch": 0.38221176470588236, "grad_norm": 0.8969424120315294, "learning_rate": 3.235828385162835e-06, "loss": 0.021742764115333556, "step": 40610 }, { "epoch": 0.38225882352941176, "grad_norm": 0.5183301672052006, "learning_rate": 3.235629197199188e-06, "loss": 0.019059932231903075, "step": 40615 }, { "epoch": 0.38230588235294116, "grad_norm": 0.6299638238465688, "learning_rate": 3.235430046015269e-06, "loss": 0.02896355390548706, "step": 40620 }, { "epoch": 0.38235294117647056, "grad_norm": 0.6945195784164212, "learning_rate": 3.2352309315997582e-06, "loss": 0.02073328197002411, "step": 40625 }, { "epoch": 0.3824, "grad_norm": 0.40360705300429023, "learning_rate": 3.2350318539413435e-06, "loss": 0.020017705857753754, "step": 40630 }, { "epoch": 0.3824470588235294, "grad_norm": 0.6682624211415701, "learning_rate": 3.2348328130287172e-06, "loss": 0.016018114984035492, "step": 40635 }, { "epoch": 0.3824941176470588, "grad_norm": 0.6067560877775248, "learning_rate": 3.2346338088505767e-06, "loss": 0.023728346824645995, "step": 40640 }, { "epoch": 0.3825411764705882, "grad_norm": 0.7486634713801807, "learning_rate": 3.234434841395624e-06, "loss": 0.02344164252281189, "step": 40645 }, { "epoch": 0.3825882352941177, "grad_norm": 0.6806655212303627, "learning_rate": 3.234235910652566e-06, "loss": 0.021953633427619933, "step": 40650 }, { "epoch": 0.3826352941176471, "grad_norm": 0.6319409056948453, "learning_rate": 3.2340370166101143e-06, "loss": 0.016425123810768126, "step": 40655 }, { "epoch": 0.3826823529411765, "grad_norm": 0.5003088901149138, "learning_rate": 3.2338381592569855e-06, "loss": 0.01891465187072754, "step": 40660 }, { "epoch": 0.3827294117647059, "grad_norm": 0.5525999742351314, "learning_rate": 3.2336393385819015e-06, "loss": 0.0186870276927948, "step": 40665 }, { "epoch": 0.3827764705882353, "grad_norm": 0.8948926766122387, "learning_rate": 3.233440554573588e-06, "loss": 0.023002269864082336, "step": 40670 }, { "epoch": 0.38282352941176473, "grad_norm": 0.449956190401235, "learning_rate": 3.233241807220776e-06, "loss": 0.01895463764667511, "step": 40675 }, { "epoch": 0.38287058823529413, "grad_norm": 0.7522081871835451, "learning_rate": 3.2330430965122028e-06, "loss": 0.023603472113609313, "step": 40680 }, { "epoch": 0.38291764705882353, "grad_norm": 0.5504651604865483, "learning_rate": 3.232844422436607e-06, "loss": 0.020601484179496764, "step": 40685 }, { "epoch": 0.38296470588235293, "grad_norm": 0.5862573045461925, "learning_rate": 3.2326457849827376e-06, "loss": 0.01955651491880417, "step": 40690 }, { "epoch": 0.38301176470588233, "grad_norm": 0.4616071200849721, "learning_rate": 3.2324471841393414e-06, "loss": 0.022536420822143556, "step": 40695 }, { "epoch": 0.3830588235294118, "grad_norm": 0.6072127507817353, "learning_rate": 3.232248619895176e-06, "loss": 0.036696338653564455, "step": 40700 }, { "epoch": 0.3831058823529412, "grad_norm": 0.6299474497166393, "learning_rate": 3.232050092239002e-06, "loss": 0.020189136266708374, "step": 40705 }, { "epoch": 0.3831529411764706, "grad_norm": 0.7951523806745814, "learning_rate": 3.231851601159583e-06, "loss": 0.020921677350997925, "step": 40710 }, { "epoch": 0.3832, "grad_norm": 0.49783301845932226, "learning_rate": 3.23165314664569e-06, "loss": 0.0183457151055336, "step": 40715 }, { "epoch": 0.3832470588235294, "grad_norm": 0.520271003313084, "learning_rate": 3.2314547286860963e-06, "loss": 0.01815781891345978, "step": 40720 }, { "epoch": 0.38329411764705884, "grad_norm": 0.6298904210230872, "learning_rate": 3.231256347269583e-06, "loss": 0.01844988763332367, "step": 40725 }, { "epoch": 0.38334117647058824, "grad_norm": 0.5514274670828531, "learning_rate": 3.231058002384933e-06, "loss": 0.01945221722126007, "step": 40730 }, { "epoch": 0.38338823529411764, "grad_norm": 0.5696992773504924, "learning_rate": 3.230859694020937e-06, "loss": 0.017604556679725648, "step": 40735 }, { "epoch": 0.38343529411764704, "grad_norm": 0.4240600469535113, "learning_rate": 3.230661422166388e-06, "loss": 0.018058981001377105, "step": 40740 }, { "epoch": 0.3834823529411765, "grad_norm": 0.5403126312692257, "learning_rate": 3.2304631868100844e-06, "loss": 0.01778589338064194, "step": 40745 }, { "epoch": 0.3835294117647059, "grad_norm": 0.5492957908594976, "learning_rate": 3.230264987940831e-06, "loss": 0.023924282193183898, "step": 40750 }, { "epoch": 0.3835764705882353, "grad_norm": 0.5678982853445275, "learning_rate": 3.2300668255474353e-06, "loss": 0.018181315064430235, "step": 40755 }, { "epoch": 0.3836235294117647, "grad_norm": 0.7095353770176143, "learning_rate": 3.2298686996187112e-06, "loss": 0.019672724604606628, "step": 40760 }, { "epoch": 0.3836705882352941, "grad_norm": 0.7377891060799036, "learning_rate": 3.2296706101434765e-06, "loss": 0.020392414927482606, "step": 40765 }, { "epoch": 0.38371764705882355, "grad_norm": 0.738083627025083, "learning_rate": 3.2294725571105527e-06, "loss": 0.0196369469165802, "step": 40770 }, { "epoch": 0.38376470588235295, "grad_norm": 0.6441041465364992, "learning_rate": 3.22927454050877e-06, "loss": 0.021669602394104003, "step": 40775 }, { "epoch": 0.38381176470588235, "grad_norm": 0.6386695863809748, "learning_rate": 3.2290765603269588e-06, "loss": 0.020957362651824952, "step": 40780 }, { "epoch": 0.38385882352941175, "grad_norm": 0.8007773264644515, "learning_rate": 3.2288786165539566e-06, "loss": 0.019428279995918275, "step": 40785 }, { "epoch": 0.38390588235294115, "grad_norm": 0.5474273864101216, "learning_rate": 3.2286807091786053e-06, "loss": 0.016354992985725403, "step": 40790 }, { "epoch": 0.3839529411764706, "grad_norm": 0.9154950637033233, "learning_rate": 3.2284828381897526e-06, "loss": 0.02324184775352478, "step": 40795 }, { "epoch": 0.384, "grad_norm": 0.6348886592712135, "learning_rate": 3.228285003576249e-06, "loss": 0.019479268789291383, "step": 40800 }, { "epoch": 0.3840470588235294, "grad_norm": 0.4052647247825635, "learning_rate": 3.228087205326952e-06, "loss": 0.01937396377325058, "step": 40805 }, { "epoch": 0.3840941176470588, "grad_norm": 0.5580676974258345, "learning_rate": 3.2278894434307217e-06, "loss": 0.0172355979681015, "step": 40810 }, { "epoch": 0.3841411764705882, "grad_norm": 0.5529717227687732, "learning_rate": 3.2276917178764245e-06, "loss": 0.019459933042526245, "step": 40815 }, { "epoch": 0.38418823529411766, "grad_norm": 0.5904342462354628, "learning_rate": 3.227494028652931e-06, "loss": 0.023434372246265413, "step": 40820 }, { "epoch": 0.38423529411764706, "grad_norm": 0.6553230350093049, "learning_rate": 3.227296375749116e-06, "loss": 0.02302854657173157, "step": 40825 }, { "epoch": 0.38428235294117646, "grad_norm": 0.542763567339696, "learning_rate": 3.2270987591538612e-06, "loss": 0.017625804245471954, "step": 40830 }, { "epoch": 0.38432941176470586, "grad_norm": 0.4927919938826565, "learning_rate": 3.2269011788560494e-06, "loss": 0.028275132179260254, "step": 40835 }, { "epoch": 0.3843764705882353, "grad_norm": 0.632326071349278, "learning_rate": 3.226703634844573e-06, "loss": 0.0216770201921463, "step": 40840 }, { "epoch": 0.3844235294117647, "grad_norm": 0.4658062005196495, "learning_rate": 3.2265061271083235e-06, "loss": 0.01868952065706253, "step": 40845 }, { "epoch": 0.3844705882352941, "grad_norm": 0.5108367537607796, "learning_rate": 3.226308655636203e-06, "loss": 0.02014293372631073, "step": 40850 }, { "epoch": 0.3845176470588235, "grad_norm": 0.4972157450110573, "learning_rate": 3.226111220417114e-06, "loss": 0.023830598592758177, "step": 40855 }, { "epoch": 0.3845647058823529, "grad_norm": 0.7911750733452162, "learning_rate": 3.2259138214399654e-06, "loss": 0.021688762307167053, "step": 40860 }, { "epoch": 0.3846117647058824, "grad_norm": 0.5771395795405461, "learning_rate": 3.2257164586936714e-06, "loss": 0.016418834030628205, "step": 40865 }, { "epoch": 0.3846588235294118, "grad_norm": 0.5020673734232693, "learning_rate": 3.225519132167149e-06, "loss": 0.018367791175842287, "step": 40870 }, { "epoch": 0.3847058823529412, "grad_norm": 0.5344112724665487, "learning_rate": 3.2253218418493214e-06, "loss": 0.017481233179569244, "step": 40875 }, { "epoch": 0.3847529411764706, "grad_norm": 0.6526432932292409, "learning_rate": 3.225124587729118e-06, "loss": 0.018950729072093962, "step": 40880 }, { "epoch": 0.3848, "grad_norm": 0.5132621407659111, "learning_rate": 3.2249273697954697e-06, "loss": 0.023254957795143128, "step": 40885 }, { "epoch": 0.38484705882352943, "grad_norm": 0.6431964794423234, "learning_rate": 3.224730188037314e-06, "loss": 0.019361349940299987, "step": 40890 }, { "epoch": 0.38489411764705883, "grad_norm": 0.3787732844619004, "learning_rate": 3.2245330424435933e-06, "loss": 0.018705675005912782, "step": 40895 }, { "epoch": 0.38494117647058823, "grad_norm": 0.5364388614577477, "learning_rate": 3.2243359330032534e-06, "loss": 0.018434599041938782, "step": 40900 }, { "epoch": 0.38498823529411763, "grad_norm": 0.787971112362671, "learning_rate": 3.2241388597052475e-06, "loss": 0.021318942308425903, "step": 40905 }, { "epoch": 0.38503529411764703, "grad_norm": 0.4768478806304206, "learning_rate": 3.223941822538529e-06, "loss": 0.017054709792137145, "step": 40910 }, { "epoch": 0.3850823529411765, "grad_norm": 0.4592737114524938, "learning_rate": 3.2237448214920626e-06, "loss": 0.020585742592811585, "step": 40915 }, { "epoch": 0.3851294117647059, "grad_norm": 0.6179534979818436, "learning_rate": 3.22354785655481e-06, "loss": 0.021451401710510253, "step": 40920 }, { "epoch": 0.3851764705882353, "grad_norm": 0.5109779732343409, "learning_rate": 3.223350927715743e-06, "loss": 0.017449870705604553, "step": 40925 }, { "epoch": 0.3852235294117647, "grad_norm": 0.7017713505181592, "learning_rate": 3.2231540349638378e-06, "loss": 0.02036822438240051, "step": 40930 }, { "epoch": 0.38527058823529414, "grad_norm": 0.6608094981071357, "learning_rate": 3.222957178288073e-06, "loss": 0.015606915950775147, "step": 40935 }, { "epoch": 0.38531764705882354, "grad_norm": 0.7210295832288393, "learning_rate": 3.2227603576774323e-06, "loss": 0.024078340828418733, "step": 40940 }, { "epoch": 0.38536470588235294, "grad_norm": 0.5828839988691772, "learning_rate": 3.2225635731209065e-06, "loss": 0.022141292691230774, "step": 40945 }, { "epoch": 0.38541176470588234, "grad_norm": 0.6343507065582097, "learning_rate": 3.2223668246074884e-06, "loss": 0.023832611739635468, "step": 40950 }, { "epoch": 0.38545882352941174, "grad_norm": 0.6783950491358561, "learning_rate": 3.2221701121261777e-06, "loss": 0.02415402978658676, "step": 40955 }, { "epoch": 0.3855058823529412, "grad_norm": 0.665654273623286, "learning_rate": 3.221973435665976e-06, "loss": 0.020050880312919617, "step": 40960 }, { "epoch": 0.3855529411764706, "grad_norm": 0.7496437620768499, "learning_rate": 3.221776795215892e-06, "loss": 0.023656605184078215, "step": 40965 }, { "epoch": 0.3856, "grad_norm": 0.508891155381562, "learning_rate": 3.2215801907649386e-06, "loss": 0.016692967712879182, "step": 40970 }, { "epoch": 0.3856470588235294, "grad_norm": 0.5926265031879818, "learning_rate": 3.221383622302133e-06, "loss": 0.022246672213077544, "step": 40975 }, { "epoch": 0.3856941176470588, "grad_norm": 0.4967548173511809, "learning_rate": 3.221187089816498e-06, "loss": 0.019906945526599884, "step": 40980 }, { "epoch": 0.38574117647058825, "grad_norm": 0.36667416720416085, "learning_rate": 3.2209905932970594e-06, "loss": 0.015755750238895416, "step": 40985 }, { "epoch": 0.38578823529411765, "grad_norm": 0.5863664839195651, "learning_rate": 3.2207941327328483e-06, "loss": 0.024679554998874663, "step": 40990 }, { "epoch": 0.38583529411764705, "grad_norm": 0.5937314523813664, "learning_rate": 3.2205977081129013e-06, "loss": 0.020341190695762634, "step": 40995 }, { "epoch": 0.38588235294117645, "grad_norm": 0.5845635721686129, "learning_rate": 3.22040131942626e-06, "loss": 0.02183157503604889, "step": 41000 }, { "epoch": 0.3859294117647059, "grad_norm": 0.49009056278106616, "learning_rate": 3.2202049666619682e-06, "loss": 0.026634377241134644, "step": 41005 }, { "epoch": 0.3859764705882353, "grad_norm": 0.9190558535099881, "learning_rate": 3.220008649809078e-06, "loss": 0.020639306306838988, "step": 41010 }, { "epoch": 0.3860235294117647, "grad_norm": 0.4625690869218835, "learning_rate": 3.2198123688566417e-06, "loss": 0.020496860146522522, "step": 41015 }, { "epoch": 0.3860705882352941, "grad_norm": 0.471283191459471, "learning_rate": 3.2196161237937217e-06, "loss": 0.018004408478736876, "step": 41020 }, { "epoch": 0.3861176470588235, "grad_norm": 0.9078819325534953, "learning_rate": 3.2194199146093797e-06, "loss": 0.01915537714958191, "step": 41025 }, { "epoch": 0.38616470588235297, "grad_norm": 0.5784179631167976, "learning_rate": 3.219223741292686e-06, "loss": 0.020730018615722656, "step": 41030 }, { "epoch": 0.38621176470588237, "grad_norm": 0.44834047922009124, "learning_rate": 3.219027603832713e-06, "loss": 0.01738561540842056, "step": 41035 }, { "epoch": 0.38625882352941177, "grad_norm": 0.5824044084376954, "learning_rate": 3.218831502218539e-06, "loss": 0.0198546439409256, "step": 41040 }, { "epoch": 0.38630588235294117, "grad_norm": 0.7127009205407839, "learning_rate": 3.2186354364392473e-06, "loss": 0.01809276044368744, "step": 41045 }, { "epoch": 0.38635294117647057, "grad_norm": 0.5182110979564851, "learning_rate": 3.218439406483926e-06, "loss": 0.018252570927143098, "step": 41050 }, { "epoch": 0.3864, "grad_norm": 0.5368774171569736, "learning_rate": 3.2182434123416654e-06, "loss": 0.016688093543052673, "step": 41055 }, { "epoch": 0.3864470588235294, "grad_norm": 0.5436153484366988, "learning_rate": 3.218047454001564e-06, "loss": 0.021226751804351806, "step": 41060 }, { "epoch": 0.3864941176470588, "grad_norm": 0.6507836712385476, "learning_rate": 3.2178515314527224e-06, "loss": 0.021644625067710876, "step": 41065 }, { "epoch": 0.3865411764705882, "grad_norm": 1.1813400818384925, "learning_rate": 3.217655644684246e-06, "loss": 0.02142777740955353, "step": 41070 }, { "epoch": 0.3865882352941176, "grad_norm": 0.5608200300034353, "learning_rate": 3.2174597936852465e-06, "loss": 0.025388437509536742, "step": 41075 }, { "epoch": 0.3866352941176471, "grad_norm": 0.7115066932334198, "learning_rate": 3.2172639784448385e-06, "loss": 0.02060350775718689, "step": 41080 }, { "epoch": 0.3866823529411765, "grad_norm": 0.4964254567610677, "learning_rate": 3.217068198952143e-06, "loss": 0.019937659800052642, "step": 41085 }, { "epoch": 0.3867294117647059, "grad_norm": 0.43952332863291527, "learning_rate": 3.2168724551962827e-06, "loss": 0.013639548420906067, "step": 41090 }, { "epoch": 0.3867764705882353, "grad_norm": 0.6296879299602657, "learning_rate": 3.216676747166389e-06, "loss": 0.023595142364501952, "step": 41095 }, { "epoch": 0.38682352941176473, "grad_norm": 0.33409803836128765, "learning_rate": 3.216481074851594e-06, "loss": 0.01929789185523987, "step": 41100 }, { "epoch": 0.38687058823529413, "grad_norm": 0.3528313371258212, "learning_rate": 3.2162854382410375e-06, "loss": 0.023164266347885133, "step": 41105 }, { "epoch": 0.38691764705882353, "grad_norm": 0.5976403897623039, "learning_rate": 3.2160898373238612e-06, "loss": 0.01971094310283661, "step": 41110 }, { "epoch": 0.38696470588235293, "grad_norm": 0.5027771008488744, "learning_rate": 3.2158942720892146e-06, "loss": 0.018204909563064576, "step": 41115 }, { "epoch": 0.38701176470588233, "grad_norm": 0.5484152092686198, "learning_rate": 3.2156987425262475e-06, "loss": 0.022260919213294983, "step": 41120 }, { "epoch": 0.3870588235294118, "grad_norm": 0.5713906273276923, "learning_rate": 3.21550324862412e-06, "loss": 0.019768673181533813, "step": 41125 }, { "epoch": 0.3871058823529412, "grad_norm": 0.47944316494051387, "learning_rate": 3.215307790371991e-06, "loss": 0.020529605448246002, "step": 41130 }, { "epoch": 0.3871529411764706, "grad_norm": 0.7090191243092547, "learning_rate": 3.215112367759028e-06, "loss": 0.01869576722383499, "step": 41135 }, { "epoch": 0.3872, "grad_norm": 0.5504320922465729, "learning_rate": 3.2149169807744012e-06, "loss": 0.016554734110832213, "step": 41140 }, { "epoch": 0.3872470588235294, "grad_norm": 0.5702499953692124, "learning_rate": 3.2147216294072864e-06, "loss": 0.016181938350200653, "step": 41145 }, { "epoch": 0.38729411764705884, "grad_norm": 0.7676834939005145, "learning_rate": 3.214526313646864e-06, "loss": 0.022408513724803923, "step": 41150 }, { "epoch": 0.38734117647058824, "grad_norm": 0.5298572388689113, "learning_rate": 3.214331033482318e-06, "loss": 0.017798486351966857, "step": 41155 }, { "epoch": 0.38738823529411764, "grad_norm": 0.5759842801983432, "learning_rate": 3.214135788902837e-06, "loss": 0.019510886073112486, "step": 41160 }, { "epoch": 0.38743529411764704, "grad_norm": 0.48877920864672336, "learning_rate": 3.213940579897616e-06, "loss": 0.021163436770439147, "step": 41165 }, { "epoch": 0.38748235294117644, "grad_norm": 0.466283507850785, "learning_rate": 3.2137454064558532e-06, "loss": 0.019541241228580475, "step": 41170 }, { "epoch": 0.3875294117647059, "grad_norm": 0.6625891473096227, "learning_rate": 3.2135502685667502e-06, "loss": 0.02065449357032776, "step": 41175 }, { "epoch": 0.3875764705882353, "grad_norm": 0.526625525955391, "learning_rate": 3.213355166219516e-06, "loss": 0.01657336950302124, "step": 41180 }, { "epoch": 0.3876235294117647, "grad_norm": 0.49842511625601194, "learning_rate": 3.2131600994033624e-06, "loss": 0.02137090414762497, "step": 41185 }, { "epoch": 0.3876705882352941, "grad_norm": 0.8098009972507729, "learning_rate": 3.2129650681075066e-06, "loss": 0.031195232272148134, "step": 41190 }, { "epoch": 0.38771764705882356, "grad_norm": 0.3890667257318401, "learning_rate": 3.2127700723211685e-06, "loss": 0.016314104199409485, "step": 41195 }, { "epoch": 0.38776470588235296, "grad_norm": 0.6896888236859383, "learning_rate": 3.212575112033575e-06, "loss": 0.019506433606147768, "step": 41200 }, { "epoch": 0.38781176470588236, "grad_norm": 0.35443085292754256, "learning_rate": 3.2123801872339566e-06, "loss": 0.02260860800743103, "step": 41205 }, { "epoch": 0.38785882352941176, "grad_norm": 0.4639553908085115, "learning_rate": 3.2121852979115488e-06, "loss": 0.02006673514842987, "step": 41210 }, { "epoch": 0.38790588235294116, "grad_norm": 0.45001933490871526, "learning_rate": 3.2119904440555895e-06, "loss": 0.01827942728996277, "step": 41215 }, { "epoch": 0.3879529411764706, "grad_norm": 0.6032080731757286, "learning_rate": 3.2117956256553244e-06, "loss": 0.018323752284049987, "step": 41220 }, { "epoch": 0.388, "grad_norm": 0.4587675215014439, "learning_rate": 3.2116008427000018e-06, "loss": 0.018987736105918883, "step": 41225 }, { "epoch": 0.3880470588235294, "grad_norm": 0.7847187123359005, "learning_rate": 3.211406095178875e-06, "loss": 0.019676098227500917, "step": 41230 }, { "epoch": 0.3880941176470588, "grad_norm": 0.7180458181012898, "learning_rate": 3.2112113830812027e-06, "loss": 0.02476525753736496, "step": 41235 }, { "epoch": 0.3881411764705882, "grad_norm": 0.5930996746465291, "learning_rate": 3.211016706396245e-06, "loss": 0.021990090608596802, "step": 41240 }, { "epoch": 0.38818823529411767, "grad_norm": 0.449935292489658, "learning_rate": 3.2108220651132717e-06, "loss": 0.01874588578939438, "step": 41245 }, { "epoch": 0.38823529411764707, "grad_norm": 0.5933542249331646, "learning_rate": 3.2106274592215525e-06, "loss": 0.01952145844697952, "step": 41250 }, { "epoch": 0.38828235294117647, "grad_norm": 0.7545355908723471, "learning_rate": 3.2104328887103643e-06, "loss": 0.017712625861167907, "step": 41255 }, { "epoch": 0.38832941176470587, "grad_norm": 0.4600489890364938, "learning_rate": 3.2102383535689875e-06, "loss": 0.01770109832286835, "step": 41260 }, { "epoch": 0.38837647058823527, "grad_norm": 0.7991889165875592, "learning_rate": 3.2100438537867067e-06, "loss": 0.023390059173107148, "step": 41265 }, { "epoch": 0.3884235294117647, "grad_norm": 0.7970633371400552, "learning_rate": 3.2098493893528126e-06, "loss": 0.021743081510066986, "step": 41270 }, { "epoch": 0.3884705882352941, "grad_norm": 0.5523554535516558, "learning_rate": 3.2096549602565998e-06, "loss": 0.020265820622444152, "step": 41275 }, { "epoch": 0.3885176470588235, "grad_norm": 0.36994879694299165, "learning_rate": 3.2094605664873656e-06, "loss": 0.02010684013366699, "step": 41280 }, { "epoch": 0.3885647058823529, "grad_norm": 0.5362318310137121, "learning_rate": 3.209266208034414e-06, "loss": 0.024245390295982362, "step": 41285 }, { "epoch": 0.3886117647058824, "grad_norm": 0.4518788796832601, "learning_rate": 3.2090718848870535e-06, "loss": 0.023527640104293823, "step": 41290 }, { "epoch": 0.3886588235294118, "grad_norm": 0.701580431667989, "learning_rate": 3.208877597034596e-06, "loss": 0.020753386616706847, "step": 41295 }, { "epoch": 0.3887058823529412, "grad_norm": 0.5716408431834402, "learning_rate": 3.2086833444663586e-06, "loss": 0.019411852955818175, "step": 41300 }, { "epoch": 0.3887529411764706, "grad_norm": 0.6866906109720773, "learning_rate": 3.2084891271716617e-06, "loss": 0.022866055369377136, "step": 41305 }, { "epoch": 0.3888, "grad_norm": 0.513324609833172, "learning_rate": 3.2082949451398333e-06, "loss": 0.015063861012458801, "step": 41310 }, { "epoch": 0.38884705882352943, "grad_norm": 0.49995092142912956, "learning_rate": 3.208100798360202e-06, "loss": 0.021460501849651335, "step": 41315 }, { "epoch": 0.38889411764705883, "grad_norm": 0.4028569172122003, "learning_rate": 3.207906686822105e-06, "loss": 0.019934636354446412, "step": 41320 }, { "epoch": 0.38894117647058823, "grad_norm": 0.5077855309747912, "learning_rate": 3.207712610514879e-06, "loss": 0.016267721354961396, "step": 41325 }, { "epoch": 0.38898823529411763, "grad_norm": 0.4273010446368315, "learning_rate": 3.2075185694278703e-06, "loss": 0.02024715840816498, "step": 41330 }, { "epoch": 0.38903529411764703, "grad_norm": 0.5768512730969123, "learning_rate": 3.207324563550427e-06, "loss": 0.01729593575000763, "step": 41335 }, { "epoch": 0.3890823529411765, "grad_norm": 0.5816570629418074, "learning_rate": 3.2071305928719016e-06, "loss": 0.019862258434295656, "step": 41340 }, { "epoch": 0.3891294117647059, "grad_norm": 0.6739617895422596, "learning_rate": 3.2069366573816523e-06, "loss": 0.021934597194194792, "step": 41345 }, { "epoch": 0.3891764705882353, "grad_norm": 0.6325957002028303, "learning_rate": 3.2067427570690414e-06, "loss": 0.019181814789772034, "step": 41350 }, { "epoch": 0.3892235294117647, "grad_norm": 0.6185648545174811, "learning_rate": 3.2065488919234338e-06, "loss": 0.02169802486896515, "step": 41355 }, { "epoch": 0.3892705882352941, "grad_norm": 0.4546372609867451, "learning_rate": 3.2063550619342026e-06, "loss": 0.019683143496513365, "step": 41360 }, { "epoch": 0.38931764705882355, "grad_norm": 0.6696327792508827, "learning_rate": 3.206161267090723e-06, "loss": 0.019766396284103392, "step": 41365 }, { "epoch": 0.38936470588235295, "grad_norm": 0.3654852284885375, "learning_rate": 3.205967507382374e-06, "loss": 0.019260185956954955, "step": 41370 }, { "epoch": 0.38941176470588235, "grad_norm": 0.6029363797521843, "learning_rate": 3.205773782798541e-06, "loss": 0.026583397388458253, "step": 41375 }, { "epoch": 0.38945882352941175, "grad_norm": 0.5010090781239391, "learning_rate": 3.205580093328613e-06, "loss": 0.023440542817115783, "step": 41380 }, { "epoch": 0.3895058823529412, "grad_norm": 0.48905250905814934, "learning_rate": 3.2053864389619838e-06, "loss": 0.017828258872032165, "step": 41385 }, { "epoch": 0.3895529411764706, "grad_norm": 0.5288768294439721, "learning_rate": 3.2051928196880505e-06, "loss": 0.019383034110069274, "step": 41390 }, { "epoch": 0.3896, "grad_norm": 0.39989256490961916, "learning_rate": 3.2049992354962166e-06, "loss": 0.01596958339214325, "step": 41395 }, { "epoch": 0.3896470588235294, "grad_norm": 0.49106876980738157, "learning_rate": 3.2048056863758894e-06, "loss": 0.018201689422130584, "step": 41400 }, { "epoch": 0.3896941176470588, "grad_norm": 0.9683013806724755, "learning_rate": 3.2046121723164793e-06, "loss": 0.015348955988883972, "step": 41405 }, { "epoch": 0.38974117647058826, "grad_norm": 0.5126902852471495, "learning_rate": 3.2044186933074024e-06, "loss": 0.023281486332416536, "step": 41410 }, { "epoch": 0.38978823529411766, "grad_norm": 0.5070811753111706, "learning_rate": 3.20422524933808e-06, "loss": 0.018261510133743285, "step": 41415 }, { "epoch": 0.38983529411764706, "grad_norm": 0.4521324858079227, "learning_rate": 3.204031840397936e-06, "loss": 0.018302027881145478, "step": 41420 }, { "epoch": 0.38988235294117646, "grad_norm": 0.43691499722812926, "learning_rate": 3.2038384664764007e-06, "loss": 0.016824787855148314, "step": 41425 }, { "epoch": 0.38992941176470586, "grad_norm": 0.5053071925999449, "learning_rate": 3.203645127562907e-06, "loss": 0.028508645296096802, "step": 41430 }, { "epoch": 0.3899764705882353, "grad_norm": 0.844774051130855, "learning_rate": 3.2034518236468936e-06, "loss": 0.026148873567581176, "step": 41435 }, { "epoch": 0.3900235294117647, "grad_norm": 0.44841806739787843, "learning_rate": 3.2032585547178034e-06, "loss": 0.018890222907066344, "step": 41440 }, { "epoch": 0.3900705882352941, "grad_norm": 0.43396287861725924, "learning_rate": 3.2030653207650836e-06, "loss": 0.01824954301118851, "step": 41445 }, { "epoch": 0.3901176470588235, "grad_norm": 0.6567669727793489, "learning_rate": 3.2028721217781855e-06, "loss": 0.024504363536834717, "step": 41450 }, { "epoch": 0.3901647058823529, "grad_norm": 0.7164579982482963, "learning_rate": 3.202678957746566e-06, "loss": 0.01692585051059723, "step": 41455 }, { "epoch": 0.39021176470588237, "grad_norm": 0.533757826351515, "learning_rate": 3.202485828659684e-06, "loss": 0.01940298229455948, "step": 41460 }, { "epoch": 0.39025882352941177, "grad_norm": 0.49452830063964504, "learning_rate": 3.202292734507007e-06, "loss": 0.01928684115409851, "step": 41465 }, { "epoch": 0.39030588235294117, "grad_norm": 0.6700195156069223, "learning_rate": 3.202099675278003e-06, "loss": 0.026378756761550902, "step": 41470 }, { "epoch": 0.39035294117647057, "grad_norm": 0.5633474680507613, "learning_rate": 3.2019066509621456e-06, "loss": 0.021378064155578615, "step": 41475 }, { "epoch": 0.3904, "grad_norm": 0.5062035603691317, "learning_rate": 3.2017136615489143e-06, "loss": 0.019077733159065247, "step": 41480 }, { "epoch": 0.3904470588235294, "grad_norm": 0.48574791832681835, "learning_rate": 3.2015207070277903e-06, "loss": 0.014938536286354064, "step": 41485 }, { "epoch": 0.3904941176470588, "grad_norm": 0.6769599209597488, "learning_rate": 3.2013277873882624e-06, "loss": 0.018457278609275818, "step": 41490 }, { "epoch": 0.3905411764705882, "grad_norm": 0.5997686415466925, "learning_rate": 3.2011349026198214e-06, "loss": 0.026255837082862853, "step": 41495 }, { "epoch": 0.3905882352941176, "grad_norm": 0.4410557511909088, "learning_rate": 3.200942052711963e-06, "loss": 0.013839806616306304, "step": 41500 }, { "epoch": 0.3906352941176471, "grad_norm": 0.7395373908197721, "learning_rate": 3.200749237654189e-06, "loss": 0.022649618983268737, "step": 41505 }, { "epoch": 0.3906823529411765, "grad_norm": 0.5596691956968195, "learning_rate": 3.2005564574360037e-06, "loss": 0.020356786251068116, "step": 41510 }, { "epoch": 0.3907294117647059, "grad_norm": 0.5473788090049206, "learning_rate": 3.2003637120469163e-06, "loss": 0.018722918629646302, "step": 41515 }, { "epoch": 0.3907764705882353, "grad_norm": 0.5110976598424889, "learning_rate": 3.2001710014764404e-06, "loss": 0.021090346574783325, "step": 41520 }, { "epoch": 0.3908235294117647, "grad_norm": 1.0192080676797708, "learning_rate": 3.1999783257140944e-06, "loss": 0.027337646484375, "step": 41525 }, { "epoch": 0.39087058823529414, "grad_norm": 0.43974156149216886, "learning_rate": 3.1997856847494014e-06, "loss": 0.01635015308856964, "step": 41530 }, { "epoch": 0.39091764705882354, "grad_norm": 0.5884081558943327, "learning_rate": 3.199593078571888e-06, "loss": 0.020502644777297973, "step": 41535 }, { "epoch": 0.39096470588235294, "grad_norm": 0.6228893773832489, "learning_rate": 3.1994005071710855e-06, "loss": 0.023193559050559996, "step": 41540 }, { "epoch": 0.39101176470588234, "grad_norm": 0.7761584345898881, "learning_rate": 3.1992079705365303e-06, "loss": 0.01672148108482361, "step": 41545 }, { "epoch": 0.3910588235294118, "grad_norm": 0.3946912007750195, "learning_rate": 3.199015468657762e-06, "loss": 0.020485320687294008, "step": 41550 }, { "epoch": 0.3911058823529412, "grad_norm": 0.6396643972845492, "learning_rate": 3.198823001524326e-06, "loss": 0.024879968166351317, "step": 41555 }, { "epoch": 0.3911529411764706, "grad_norm": 0.8146738020021332, "learning_rate": 3.1986305691257713e-06, "loss": 0.022382462024688722, "step": 41560 }, { "epoch": 0.3912, "grad_norm": 0.532528164115859, "learning_rate": 3.198438171451651e-06, "loss": 0.01651606559753418, "step": 41565 }, { "epoch": 0.3912470588235294, "grad_norm": 0.5097401595908216, "learning_rate": 3.198245808491523e-06, "loss": 0.02155391275882721, "step": 41570 }, { "epoch": 0.39129411764705885, "grad_norm": 0.465484744219834, "learning_rate": 3.19805348023495e-06, "loss": 0.018715503811836242, "step": 41575 }, { "epoch": 0.39134117647058825, "grad_norm": 0.7210753435726233, "learning_rate": 3.1978611866714976e-06, "loss": 0.020625662803649903, "step": 41580 }, { "epoch": 0.39138823529411765, "grad_norm": 0.33270466562933826, "learning_rate": 3.1976689277907387e-06, "loss": 0.018110281229019164, "step": 41585 }, { "epoch": 0.39143529411764705, "grad_norm": 0.4556461901984142, "learning_rate": 3.197476703582247e-06, "loss": 0.018053102493286132, "step": 41590 }, { "epoch": 0.39148235294117645, "grad_norm": 0.45938173213218214, "learning_rate": 3.1972845140356027e-06, "loss": 0.021345245838165283, "step": 41595 }, { "epoch": 0.3915294117647059, "grad_norm": 0.564870019411771, "learning_rate": 3.197092359140391e-06, "loss": 0.01625586599111557, "step": 41600 }, { "epoch": 0.3915764705882353, "grad_norm": 0.5741932309008739, "learning_rate": 3.196900238886199e-06, "loss": 0.01721007823944092, "step": 41605 }, { "epoch": 0.3916235294117647, "grad_norm": 0.5327186720054771, "learning_rate": 3.196708153262621e-06, "loss": 0.01685943305492401, "step": 41610 }, { "epoch": 0.3916705882352941, "grad_norm": 0.7330122754614832, "learning_rate": 3.196516102259254e-06, "loss": 0.021044400334358216, "step": 41615 }, { "epoch": 0.3917176470588235, "grad_norm": 0.6553402359403724, "learning_rate": 3.1963240858656992e-06, "loss": 0.02106706202030182, "step": 41620 }, { "epoch": 0.39176470588235296, "grad_norm": 0.5289856866940618, "learning_rate": 3.196132104071563e-06, "loss": 0.020618492364883424, "step": 41625 }, { "epoch": 0.39181176470588236, "grad_norm": 0.48321817072776324, "learning_rate": 3.195940156866457e-06, "loss": 0.019232538342475892, "step": 41630 }, { "epoch": 0.39185882352941176, "grad_norm": 0.7524886600643705, "learning_rate": 3.1957482442399935e-06, "loss": 0.021241119503974913, "step": 41635 }, { "epoch": 0.39190588235294116, "grad_norm": 0.713406863344309, "learning_rate": 3.1955563661817947e-06, "loss": 0.021941518783569335, "step": 41640 }, { "epoch": 0.3919529411764706, "grad_norm": 0.6824423360784364, "learning_rate": 3.195364522681482e-06, "loss": 0.015805558860301973, "step": 41645 }, { "epoch": 0.392, "grad_norm": 0.38076502270846074, "learning_rate": 3.1951727137286837e-06, "loss": 0.02086389660835266, "step": 41650 }, { "epoch": 0.3920470588235294, "grad_norm": 0.44366427003966513, "learning_rate": 3.1949809393130315e-06, "loss": 0.018260936439037322, "step": 41655 }, { "epoch": 0.3920941176470588, "grad_norm": 0.4611828884202202, "learning_rate": 3.1947891994241647e-06, "loss": 0.01781712770462036, "step": 41660 }, { "epoch": 0.3921411764705882, "grad_norm": 0.5644217792565452, "learning_rate": 3.194597494051721e-06, "loss": 0.018799901008605957, "step": 41665 }, { "epoch": 0.39218823529411767, "grad_norm": 0.6430492841050961, "learning_rate": 3.194405823185348e-06, "loss": 0.01937679946422577, "step": 41670 }, { "epoch": 0.39223529411764707, "grad_norm": 0.5655549056429933, "learning_rate": 3.1942141868146948e-06, "loss": 0.021618345379829408, "step": 41675 }, { "epoch": 0.39228235294117647, "grad_norm": 0.6329825891234794, "learning_rate": 3.194022584929415e-06, "loss": 0.020475779473781586, "step": 41680 }, { "epoch": 0.39232941176470587, "grad_norm": 0.6841455366243531, "learning_rate": 3.1938310175191672e-06, "loss": 0.02047467827796936, "step": 41685 }, { "epoch": 0.39237647058823527, "grad_norm": 0.6533441009066921, "learning_rate": 3.1936394845736144e-06, "loss": 0.024009911715984343, "step": 41690 }, { "epoch": 0.3924235294117647, "grad_norm": 0.38037489174031214, "learning_rate": 3.193447986082423e-06, "loss": 0.01804984360933304, "step": 41695 }, { "epoch": 0.3924705882352941, "grad_norm": 0.5268131847675448, "learning_rate": 3.193256522035265e-06, "loss": 0.020030704140663148, "step": 41700 }, { "epoch": 0.3925176470588235, "grad_norm": 0.43000725364133124, "learning_rate": 3.193065092421816e-06, "loss": 0.02030121088027954, "step": 41705 }, { "epoch": 0.3925647058823529, "grad_norm": 0.5603140752127622, "learning_rate": 3.192873697231756e-06, "loss": 0.01676691472530365, "step": 41710 }, { "epoch": 0.3926117647058823, "grad_norm": 0.5677571054321646, "learning_rate": 3.1926823364547697e-06, "loss": 0.023880141973495483, "step": 41715 }, { "epoch": 0.3926588235294118, "grad_norm": 0.5203113606770416, "learning_rate": 3.192491010080545e-06, "loss": 0.02148028612136841, "step": 41720 }, { "epoch": 0.3927058823529412, "grad_norm": 0.6665243026028549, "learning_rate": 3.192299718098776e-06, "loss": 0.021674399077892304, "step": 41725 }, { "epoch": 0.3927529411764706, "grad_norm": 0.8234091342868933, "learning_rate": 3.1921084604991587e-06, "loss": 0.021355593204498292, "step": 41730 }, { "epoch": 0.3928, "grad_norm": 0.6701244298493415, "learning_rate": 3.191917237271396e-06, "loss": 0.020231756567955016, "step": 41735 }, { "epoch": 0.39284705882352944, "grad_norm": 0.3567305368681954, "learning_rate": 3.191726048405194e-06, "loss": 0.01784796267747879, "step": 41740 }, { "epoch": 0.39289411764705884, "grad_norm": 0.8363792402952565, "learning_rate": 3.1915348938902622e-06, "loss": 0.023203213512897492, "step": 41745 }, { "epoch": 0.39294117647058824, "grad_norm": 0.746315554708543, "learning_rate": 3.1913437737163156e-06, "loss": 0.021123526990413664, "step": 41750 }, { "epoch": 0.39298823529411764, "grad_norm": 0.4496070731885399, "learning_rate": 3.191152687873073e-06, "loss": 0.023975613713264465, "step": 41755 }, { "epoch": 0.39303529411764704, "grad_norm": 0.8845228165161261, "learning_rate": 3.190961636350258e-06, "loss": 0.017731139063835145, "step": 41760 }, { "epoch": 0.3930823529411765, "grad_norm": 0.579046767556056, "learning_rate": 3.1907706191375982e-06, "loss": 0.0171124666929245, "step": 41765 }, { "epoch": 0.3931294117647059, "grad_norm": 0.3678084853340966, "learning_rate": 3.190579636224825e-06, "loss": 0.015946966409683228, "step": 41770 }, { "epoch": 0.3931764705882353, "grad_norm": 0.4836719871699866, "learning_rate": 3.1903886876016743e-06, "loss": 0.02040519118309021, "step": 41775 }, { "epoch": 0.3932235294117647, "grad_norm": 0.5926544205338985, "learning_rate": 3.1901977732578875e-06, "loss": 0.019324350357055663, "step": 41780 }, { "epoch": 0.3932705882352941, "grad_norm": 0.4518649740517979, "learning_rate": 3.190006893183209e-06, "loss": 0.015277226269245148, "step": 41785 }, { "epoch": 0.39331764705882355, "grad_norm": 0.4219821115692686, "learning_rate": 3.1898160473673872e-06, "loss": 0.02010555863380432, "step": 41790 }, { "epoch": 0.39336470588235295, "grad_norm": 0.6490734626057424, "learning_rate": 3.189625235800176e-06, "loss": 0.021292787790298463, "step": 41795 }, { "epoch": 0.39341176470588235, "grad_norm": 0.6052421578405033, "learning_rate": 3.1894344584713338e-06, "loss": 0.016937264800071718, "step": 41800 }, { "epoch": 0.39345882352941175, "grad_norm": 0.5200132638392279, "learning_rate": 3.189243715370621e-06, "loss": 0.02068401426076889, "step": 41805 }, { "epoch": 0.39350588235294115, "grad_norm": 0.6234145604824678, "learning_rate": 3.1890530064878055e-06, "loss": 0.017351076006889343, "step": 41810 }, { "epoch": 0.3935529411764706, "grad_norm": 0.6144993183455569, "learning_rate": 3.1888623318126556e-06, "loss": 0.021570281684398653, "step": 41815 }, { "epoch": 0.3936, "grad_norm": 0.6027242753278089, "learning_rate": 3.1886716913349486e-06, "loss": 0.021493099629878998, "step": 41820 }, { "epoch": 0.3936470588235294, "grad_norm": 0.6416447857800407, "learning_rate": 3.1884810850444615e-06, "loss": 0.021085363626480103, "step": 41825 }, { "epoch": 0.3936941176470588, "grad_norm": 0.5689052280665908, "learning_rate": 3.188290512930979e-06, "loss": 0.01825261265039444, "step": 41830 }, { "epoch": 0.39374117647058826, "grad_norm": 0.4993142154405657, "learning_rate": 3.188099974984287e-06, "loss": 0.01980225443840027, "step": 41835 }, { "epoch": 0.39378823529411766, "grad_norm": 0.5759433520678101, "learning_rate": 3.18790947119418e-06, "loss": 0.01880858391523361, "step": 41840 }, { "epoch": 0.39383529411764706, "grad_norm": 0.6893862711484207, "learning_rate": 3.1877190015504518e-06, "loss": 0.021602493524551392, "step": 41845 }, { "epoch": 0.39388235294117646, "grad_norm": 0.6564959552125417, "learning_rate": 3.187528566042904e-06, "loss": 0.01958739459514618, "step": 41850 }, { "epoch": 0.39392941176470586, "grad_norm": 0.5235764426347815, "learning_rate": 3.1873381646613412e-06, "loss": 0.017092299461364747, "step": 41855 }, { "epoch": 0.3939764705882353, "grad_norm": 0.6596044161258094, "learning_rate": 3.1871477973955716e-06, "loss": 0.015150848031044006, "step": 41860 }, { "epoch": 0.3940235294117647, "grad_norm": 0.5335057753748619, "learning_rate": 3.186957464235409e-06, "loss": 0.024385172128677367, "step": 41865 }, { "epoch": 0.3940705882352941, "grad_norm": 0.9488569018318345, "learning_rate": 3.186767165170671e-06, "loss": 0.019081540405750275, "step": 41870 }, { "epoch": 0.3941176470588235, "grad_norm": 0.5470451256970775, "learning_rate": 3.186576900191179e-06, "loss": 0.02143191695213318, "step": 41875 }, { "epoch": 0.3941647058823529, "grad_norm": 0.3921115787742061, "learning_rate": 3.1863866692867583e-06, "loss": 0.01979459971189499, "step": 41880 }, { "epoch": 0.3942117647058824, "grad_norm": 0.40682759197974955, "learning_rate": 3.186196472447241e-06, "loss": 0.022381514310836792, "step": 41885 }, { "epoch": 0.3942588235294118, "grad_norm": 0.5628257381920083, "learning_rate": 3.18600630966246e-06, "loss": 0.02326972335577011, "step": 41890 }, { "epoch": 0.3943058823529412, "grad_norm": 0.7332595600902695, "learning_rate": 3.1858161809222544e-06, "loss": 0.020482420921325684, "step": 41895 }, { "epoch": 0.3943529411764706, "grad_norm": 0.38939939023201803, "learning_rate": 3.185626086216467e-06, "loss": 0.020075705647468568, "step": 41900 }, { "epoch": 0.3944, "grad_norm": 0.553774354249389, "learning_rate": 3.185436025534946e-06, "loss": 0.02159448266029358, "step": 41905 }, { "epoch": 0.39444705882352943, "grad_norm": 0.5212700552999953, "learning_rate": 3.185245998867541e-06, "loss": 0.016663309931755067, "step": 41910 }, { "epoch": 0.39449411764705883, "grad_norm": 0.7065396549562103, "learning_rate": 3.185056006204109e-06, "loss": 0.02200371026992798, "step": 41915 }, { "epoch": 0.39454117647058823, "grad_norm": 0.558191348828089, "learning_rate": 3.18486604753451e-06, "loss": 0.018001294136047362, "step": 41920 }, { "epoch": 0.39458823529411763, "grad_norm": 0.5377696186173319, "learning_rate": 3.1846761228486078e-06, "loss": 0.021697038412094118, "step": 41925 }, { "epoch": 0.3946352941176471, "grad_norm": 0.6331584582726218, "learning_rate": 3.18448623213627e-06, "loss": 0.01800633817911148, "step": 41930 }, { "epoch": 0.3946823529411765, "grad_norm": 0.49740238134335557, "learning_rate": 3.184296375387371e-06, "loss": 0.020995086431503295, "step": 41935 }, { "epoch": 0.3947294117647059, "grad_norm": 0.5956401703703733, "learning_rate": 3.1841065525917857e-06, "loss": 0.021089629828929903, "step": 41940 }, { "epoch": 0.3947764705882353, "grad_norm": 0.5697058778868652, "learning_rate": 3.1839167637393963e-06, "loss": 0.021086719632148743, "step": 41945 }, { "epoch": 0.3948235294117647, "grad_norm": 0.5385345744671791, "learning_rate": 3.1837270088200882e-06, "loss": 0.020621445775032044, "step": 41950 }, { "epoch": 0.39487058823529414, "grad_norm": 0.43267557407124535, "learning_rate": 3.18353728782375e-06, "loss": 0.022967976331710816, "step": 41955 }, { "epoch": 0.39491764705882354, "grad_norm": 0.39504561439515684, "learning_rate": 3.1833476007402763e-06, "loss": 0.017770859599113464, "step": 41960 }, { "epoch": 0.39496470588235294, "grad_norm": 0.5400435664750685, "learning_rate": 3.1831579475595647e-06, "loss": 0.019621589779853822, "step": 41965 }, { "epoch": 0.39501176470588234, "grad_norm": 0.5600017434615644, "learning_rate": 3.1829683282715174e-06, "loss": 0.018589328229427337, "step": 41970 }, { "epoch": 0.39505882352941174, "grad_norm": 0.6190372047528777, "learning_rate": 3.18277874286604e-06, "loss": 0.016435936093330383, "step": 41975 }, { "epoch": 0.3951058823529412, "grad_norm": 0.5729300534099341, "learning_rate": 3.182589191333044e-06, "loss": 0.018959802389144898, "step": 41980 }, { "epoch": 0.3951529411764706, "grad_norm": 0.7220921850586443, "learning_rate": 3.182399673662444e-06, "loss": 0.01777026802301407, "step": 41985 }, { "epoch": 0.3952, "grad_norm": 0.33727365793857006, "learning_rate": 3.1822101898441588e-06, "loss": 0.015539318323135376, "step": 41990 }, { "epoch": 0.3952470588235294, "grad_norm": 0.7828097950257221, "learning_rate": 3.182020739868112e-06, "loss": 0.01878681778907776, "step": 41995 }, { "epoch": 0.3952941176470588, "grad_norm": 0.4961848757060495, "learning_rate": 3.1818313237242292e-06, "loss": 0.017124392092227936, "step": 42000 }, { "epoch": 0.39534117647058825, "grad_norm": 1.209701168646078, "learning_rate": 3.1816419414024446e-06, "loss": 0.024919134378433228, "step": 42005 }, { "epoch": 0.39538823529411765, "grad_norm": 0.5875770307750688, "learning_rate": 3.1814525928926926e-06, "loss": 0.020340460538864135, "step": 42010 }, { "epoch": 0.39543529411764705, "grad_norm": 0.5694552833364883, "learning_rate": 3.1812632781849127e-06, "loss": 0.018777190148830412, "step": 42015 }, { "epoch": 0.39548235294117645, "grad_norm": 0.5659232614923458, "learning_rate": 3.1810739972690498e-06, "loss": 0.01805855631828308, "step": 42020 }, { "epoch": 0.3955294117647059, "grad_norm": 0.49690589214475245, "learning_rate": 3.1808847501350516e-06, "loss": 0.021645671129226683, "step": 42025 }, { "epoch": 0.3955764705882353, "grad_norm": 0.40822777856447473, "learning_rate": 3.1806955367728716e-06, "loss": 0.018435537815093994, "step": 42030 }, { "epoch": 0.3956235294117647, "grad_norm": 0.5013588851715496, "learning_rate": 3.180506357172465e-06, "loss": 0.02307565212249756, "step": 42035 }, { "epoch": 0.3956705882352941, "grad_norm": 0.5913921597928576, "learning_rate": 3.1803172113237946e-06, "loss": 0.021954666078090667, "step": 42040 }, { "epoch": 0.3957176470588235, "grad_norm": 0.4555781261509996, "learning_rate": 3.1801280992168237e-06, "loss": 0.020635107159614564, "step": 42045 }, { "epoch": 0.39576470588235296, "grad_norm": 0.5970857058777613, "learning_rate": 3.1799390208415233e-06, "loss": 0.024761824309825896, "step": 42050 }, { "epoch": 0.39581176470588236, "grad_norm": 0.515029079701791, "learning_rate": 3.1797499761878645e-06, "loss": 0.02224369943141937, "step": 42055 }, { "epoch": 0.39585882352941176, "grad_norm": 0.49455373130847147, "learning_rate": 3.1795609652458265e-06, "loss": 0.023003704845905304, "step": 42060 }, { "epoch": 0.39590588235294116, "grad_norm": 0.7018324445751117, "learning_rate": 3.179371988005391e-06, "loss": 0.022178223729133605, "step": 42065 }, { "epoch": 0.39595294117647056, "grad_norm": 0.5012281092908275, "learning_rate": 3.1791830444565434e-06, "loss": 0.023157948255538942, "step": 42070 }, { "epoch": 0.396, "grad_norm": 0.6357250555977699, "learning_rate": 3.1789941345892745e-06, "loss": 0.02149241268634796, "step": 42075 }, { "epoch": 0.3960470588235294, "grad_norm": 0.4834487104267128, "learning_rate": 3.178805258393577e-06, "loss": 0.01694713830947876, "step": 42080 }, { "epoch": 0.3960941176470588, "grad_norm": 0.4143183631100025, "learning_rate": 3.178616415859451e-06, "loss": 0.01512586772441864, "step": 42085 }, { "epoch": 0.3961411764705882, "grad_norm": 0.4513526193483185, "learning_rate": 3.1784276069768983e-06, "loss": 0.02142983376979828, "step": 42090 }, { "epoch": 0.3961882352941177, "grad_norm": 0.5583210748629084, "learning_rate": 3.1782388317359258e-06, "loss": 0.020984475314617158, "step": 42095 }, { "epoch": 0.3962352941176471, "grad_norm": 0.5222489041000149, "learning_rate": 3.1780500901265443e-06, "loss": 0.01824546903371811, "step": 42100 }, { "epoch": 0.3962823529411765, "grad_norm": 0.4218780670449627, "learning_rate": 3.177861382138768e-06, "loss": 0.013241647183895111, "step": 42105 }, { "epoch": 0.3963294117647059, "grad_norm": 0.6031282875559656, "learning_rate": 3.1776727077626184e-06, "loss": 0.018569639325141905, "step": 42110 }, { "epoch": 0.3963764705882353, "grad_norm": 0.4178960404339768, "learning_rate": 3.177484066988116e-06, "loss": 0.019242164492607117, "step": 42115 }, { "epoch": 0.39642352941176473, "grad_norm": 0.6203681517304777, "learning_rate": 3.17729545980529e-06, "loss": 0.021362662315368652, "step": 42120 }, { "epoch": 0.39647058823529413, "grad_norm": 0.47339015533117046, "learning_rate": 3.177106886204172e-06, "loss": 0.017822903394699097, "step": 42125 }, { "epoch": 0.39651764705882353, "grad_norm": 0.5216114248233041, "learning_rate": 3.176918346174797e-06, "loss": 0.01690783500671387, "step": 42130 }, { "epoch": 0.39656470588235293, "grad_norm": 0.7143171529268556, "learning_rate": 3.176729839707205e-06, "loss": 0.017576536536216734, "step": 42135 }, { "epoch": 0.39661176470588233, "grad_norm": 0.5674938968894622, "learning_rate": 3.1765413667914404e-06, "loss": 0.01934114545583725, "step": 42140 }, { "epoch": 0.3966588235294118, "grad_norm": 0.33207455772006406, "learning_rate": 3.1763529274175515e-06, "loss": 0.019038677215576172, "step": 42145 }, { "epoch": 0.3967058823529412, "grad_norm": 0.41276616236548475, "learning_rate": 3.1761645215755904e-06, "loss": 0.016561976075172423, "step": 42150 }, { "epoch": 0.3967529411764706, "grad_norm": 0.6786750468056598, "learning_rate": 3.175976149255613e-06, "loss": 0.02040908336639404, "step": 42155 }, { "epoch": 0.3968, "grad_norm": 0.4573733602495322, "learning_rate": 3.1757878104476804e-06, "loss": 0.01729254871606827, "step": 42160 }, { "epoch": 0.3968470588235294, "grad_norm": 0.6648824312363495, "learning_rate": 3.175599505141857e-06, "loss": 0.023441913723945617, "step": 42165 }, { "epoch": 0.39689411764705884, "grad_norm": 0.6973762060531082, "learning_rate": 3.1754112333282122e-06, "loss": 0.019257202744483948, "step": 42170 }, { "epoch": 0.39694117647058824, "grad_norm": 0.5425904846910129, "learning_rate": 3.175222994996818e-06, "loss": 0.01879277378320694, "step": 42175 }, { "epoch": 0.39698823529411764, "grad_norm": 0.5147152504548261, "learning_rate": 3.175034790137752e-06, "loss": 0.017348089814186098, "step": 42180 }, { "epoch": 0.39703529411764704, "grad_norm": 0.3091902225290286, "learning_rate": 3.1748466187410954e-06, "loss": 0.017725569009780884, "step": 42185 }, { "epoch": 0.3970823529411765, "grad_norm": 0.786489585618252, "learning_rate": 3.174658480796934e-06, "loss": 0.02096330523490906, "step": 42190 }, { "epoch": 0.3971294117647059, "grad_norm": 0.49292456225375064, "learning_rate": 3.1744703762953557e-06, "loss": 0.018951478600502013, "step": 42195 }, { "epoch": 0.3971764705882353, "grad_norm": 0.4786243330976554, "learning_rate": 3.174282305226454e-06, "loss": 0.016567865014076234, "step": 42200 }, { "epoch": 0.3972235294117647, "grad_norm": 0.8731720111545344, "learning_rate": 3.1740942675803293e-06, "loss": 0.021415716409683226, "step": 42205 }, { "epoch": 0.3972705882352941, "grad_norm": 0.6649460779567272, "learning_rate": 3.17390626334708e-06, "loss": 0.020269738137722017, "step": 42210 }, { "epoch": 0.39731764705882355, "grad_norm": 0.4209249206988062, "learning_rate": 3.1737182925168143e-06, "loss": 0.017194591462612152, "step": 42215 }, { "epoch": 0.39736470588235295, "grad_norm": 0.543939594793466, "learning_rate": 3.1735303550796396e-06, "loss": 0.017378367483615875, "step": 42220 }, { "epoch": 0.39741176470588235, "grad_norm": 0.6038311503065046, "learning_rate": 3.173342451025673e-06, "loss": 0.016720354557037354, "step": 42225 }, { "epoch": 0.39745882352941175, "grad_norm": 0.4717753474632812, "learning_rate": 3.17315458034503e-06, "loss": 0.019325459003448488, "step": 42230 }, { "epoch": 0.39750588235294115, "grad_norm": 0.7312271235456128, "learning_rate": 3.1729667430278336e-06, "loss": 0.018263107538223265, "step": 42235 }, { "epoch": 0.3975529411764706, "grad_norm": 0.5635978118188553, "learning_rate": 3.172778939064211e-06, "loss": 0.02047830820083618, "step": 42240 }, { "epoch": 0.3976, "grad_norm": 0.7063083815367878, "learning_rate": 3.1725911684442913e-06, "loss": 0.022731707990169527, "step": 42245 }, { "epoch": 0.3976470588235294, "grad_norm": 0.6282855773037037, "learning_rate": 3.17240343115821e-06, "loss": 0.02242860198020935, "step": 42250 }, { "epoch": 0.3976941176470588, "grad_norm": 0.557447592721349, "learning_rate": 3.1722157271961045e-06, "loss": 0.018630558252334596, "step": 42255 }, { "epoch": 0.3977411764705882, "grad_norm": 0.971880846827622, "learning_rate": 3.172028056548118e-06, "loss": 0.021848008036613464, "step": 42260 }, { "epoch": 0.39778823529411766, "grad_norm": 0.2789797558345737, "learning_rate": 3.1718404192043984e-06, "loss": 0.0166846364736557, "step": 42265 }, { "epoch": 0.39783529411764706, "grad_norm": 0.4980001445535331, "learning_rate": 3.171652815155095e-06, "loss": 0.02196243554353714, "step": 42270 }, { "epoch": 0.39788235294117646, "grad_norm": 0.5926113659639554, "learning_rate": 3.1714652443903634e-06, "loss": 0.027316167950630188, "step": 42275 }, { "epoch": 0.39792941176470586, "grad_norm": 0.5173580433062417, "learning_rate": 3.1712777069003616e-06, "loss": 0.017003840208053587, "step": 42280 }, { "epoch": 0.3979764705882353, "grad_norm": 0.26434428817876554, "learning_rate": 3.1710902026752532e-06, "loss": 0.016621263325214387, "step": 42285 }, { "epoch": 0.3980235294117647, "grad_norm": 0.515424207126345, "learning_rate": 3.1709027317052064e-06, "loss": 0.020671018958091737, "step": 42290 }, { "epoch": 0.3980705882352941, "grad_norm": 0.3973339463526301, "learning_rate": 3.170715293980391e-06, "loss": 0.016166552901268005, "step": 42295 }, { "epoch": 0.3981176470588235, "grad_norm": 0.4703993019624502, "learning_rate": 3.1705278894909825e-06, "loss": 0.01875235140323639, "step": 42300 }, { "epoch": 0.3981647058823529, "grad_norm": 0.5953408532972677, "learning_rate": 3.170340518227161e-06, "loss": 0.024138323962688446, "step": 42305 }, { "epoch": 0.3982117647058824, "grad_norm": 0.3615426925709525, "learning_rate": 3.170153180179108e-06, "loss": 0.019947442412376403, "step": 42310 }, { "epoch": 0.3982588235294118, "grad_norm": 0.495245098997897, "learning_rate": 3.1699658753370125e-06, "loss": 0.018272531032562257, "step": 42315 }, { "epoch": 0.3983058823529412, "grad_norm": 0.8220649276013863, "learning_rate": 3.169778603691066e-06, "loss": 0.01781320571899414, "step": 42320 }, { "epoch": 0.3983529411764706, "grad_norm": 0.5286037704836553, "learning_rate": 3.169591365231464e-06, "loss": 0.016148707270622252, "step": 42325 }, { "epoch": 0.3984, "grad_norm": 0.4077980291940822, "learning_rate": 3.1694041599484055e-06, "loss": 0.01737222820520401, "step": 42330 }, { "epoch": 0.39844705882352943, "grad_norm": 0.515918382070426, "learning_rate": 3.169216987832094e-06, "loss": 0.020158272981643677, "step": 42335 }, { "epoch": 0.39849411764705883, "grad_norm": 0.46354511760083456, "learning_rate": 3.1690298488727378e-06, "loss": 0.0187936469912529, "step": 42340 }, { "epoch": 0.39854117647058823, "grad_norm": 0.6868599703681407, "learning_rate": 3.168842743060548e-06, "loss": 0.01650262176990509, "step": 42345 }, { "epoch": 0.39858823529411763, "grad_norm": 0.7151869807527529, "learning_rate": 3.168655670385741e-06, "loss": 0.019232334196567537, "step": 42350 }, { "epoch": 0.39863529411764703, "grad_norm": 0.4256791494782136, "learning_rate": 3.1684686308385365e-06, "loss": 0.018042242527008055, "step": 42355 }, { "epoch": 0.3986823529411765, "grad_norm": 0.5846723573707027, "learning_rate": 3.1682816244091585e-06, "loss": 0.017527176439762114, "step": 42360 }, { "epoch": 0.3987294117647059, "grad_norm": 0.5525075425481324, "learning_rate": 3.168094651087834e-06, "loss": 0.018997229635715485, "step": 42365 }, { "epoch": 0.3987764705882353, "grad_norm": 0.5831790747036627, "learning_rate": 3.1679077108647965e-06, "loss": 0.018108390271663666, "step": 42370 }, { "epoch": 0.3988235294117647, "grad_norm": 0.6065782232522026, "learning_rate": 3.1677208037302804e-06, "loss": 0.017292430996894835, "step": 42375 }, { "epoch": 0.39887058823529414, "grad_norm": 0.4142017637415547, "learning_rate": 3.1675339296745274e-06, "loss": 0.021142521500587465, "step": 42380 }, { "epoch": 0.39891764705882354, "grad_norm": 0.52247599313888, "learning_rate": 3.16734708868778e-06, "loss": 0.026446539163589477, "step": 42385 }, { "epoch": 0.39896470588235294, "grad_norm": 0.7115471901787942, "learning_rate": 3.1671602807602865e-06, "loss": 0.02575441300868988, "step": 42390 }, { "epoch": 0.39901176470588234, "grad_norm": 0.4459671327505883, "learning_rate": 3.1669735058822994e-06, "loss": 0.015433531999588013, "step": 42395 }, { "epoch": 0.39905882352941174, "grad_norm": 0.7322105279431271, "learning_rate": 3.1667867640440743e-06, "loss": 0.021903897821903228, "step": 42400 }, { "epoch": 0.3991058823529412, "grad_norm": 0.575332440907642, "learning_rate": 3.166600055235872e-06, "loss": 0.024373206496238708, "step": 42405 }, { "epoch": 0.3991529411764706, "grad_norm": 0.5060840537765268, "learning_rate": 3.166413379447957e-06, "loss": 0.01557396948337555, "step": 42410 }, { "epoch": 0.3992, "grad_norm": 0.7632852028858538, "learning_rate": 3.1662267366705967e-06, "loss": 0.024221545457839964, "step": 42415 }, { "epoch": 0.3992470588235294, "grad_norm": 0.7767385089382004, "learning_rate": 3.1660401268940627e-06, "loss": 0.015225824713706971, "step": 42420 }, { "epoch": 0.3992941176470588, "grad_norm": 0.4108019709217938, "learning_rate": 3.165853550108633e-06, "loss": 0.015261001884937286, "step": 42425 }, { "epoch": 0.39934117647058825, "grad_norm": 0.6931840603021416, "learning_rate": 3.165667006304586e-06, "loss": 0.02529406249523163, "step": 42430 }, { "epoch": 0.39938823529411766, "grad_norm": 0.4517459979681108, "learning_rate": 3.1654804954722074e-06, "loss": 0.021879157423973082, "step": 42435 }, { "epoch": 0.39943529411764706, "grad_norm": 0.5137908412995322, "learning_rate": 3.1652940176017845e-06, "loss": 0.018200233578681946, "step": 42440 }, { "epoch": 0.39948235294117646, "grad_norm": 0.5265769594205744, "learning_rate": 3.165107572683609e-06, "loss": 0.014688098430633545, "step": 42445 }, { "epoch": 0.39952941176470586, "grad_norm": 0.579770595804092, "learning_rate": 3.164921160707979e-06, "loss": 0.02823966443538666, "step": 42450 }, { "epoch": 0.3995764705882353, "grad_norm": 0.4929980391145356, "learning_rate": 3.1647347816651926e-06, "loss": 0.020594803988933562, "step": 42455 }, { "epoch": 0.3996235294117647, "grad_norm": 0.6154957512003087, "learning_rate": 3.1645484355455557e-06, "loss": 0.022703315317630767, "step": 42460 }, { "epoch": 0.3996705882352941, "grad_norm": 0.6171740590397656, "learning_rate": 3.1643621223393754e-06, "loss": 0.020662620663642883, "step": 42465 }, { "epoch": 0.3997176470588235, "grad_norm": 0.6636100195238565, "learning_rate": 3.164175842036965e-06, "loss": 0.02234945446252823, "step": 42470 }, { "epoch": 0.39976470588235297, "grad_norm": 0.612341728097491, "learning_rate": 3.1639895946286398e-06, "loss": 0.017289216816425323, "step": 42475 }, { "epoch": 0.39981176470588237, "grad_norm": 0.4777362673662915, "learning_rate": 3.163803380104721e-06, "loss": 0.02072660028934479, "step": 42480 }, { "epoch": 0.39985882352941177, "grad_norm": 0.819132715301926, "learning_rate": 3.163617198455531e-06, "loss": 0.02753605544567108, "step": 42485 }, { "epoch": 0.39990588235294117, "grad_norm": 0.5830532069984269, "learning_rate": 3.1634310496713993e-06, "loss": 0.026652726531028747, "step": 42490 }, { "epoch": 0.39995294117647057, "grad_norm": 0.43485981300022414, "learning_rate": 3.163244933742657e-06, "loss": 0.015293711423873901, "step": 42495 }, { "epoch": 0.4, "grad_norm": 0.5602284731054609, "learning_rate": 3.1630588506596433e-06, "loss": 0.022714880108833314, "step": 42500 }, { "epoch": 0.4000470588235294, "grad_norm": 0.7614494530125266, "learning_rate": 3.1628728004126946e-06, "loss": 0.018103151023387908, "step": 42505 }, { "epoch": 0.4000941176470588, "grad_norm": 0.5758848843109319, "learning_rate": 3.162686782992157e-06, "loss": 0.019651681184768677, "step": 42510 }, { "epoch": 0.4001411764705882, "grad_norm": 0.7541883947449761, "learning_rate": 3.1625007983883772e-06, "loss": 0.018912342190742493, "step": 42515 }, { "epoch": 0.4001882352941176, "grad_norm": 0.6784336025792126, "learning_rate": 3.1623148465917093e-06, "loss": 0.027367568016052245, "step": 42520 }, { "epoch": 0.4002352941176471, "grad_norm": 0.7665500796627013, "learning_rate": 3.162128927592507e-06, "loss": 0.018155258893966675, "step": 42525 }, { "epoch": 0.4002823529411765, "grad_norm": 0.6645848274878805, "learning_rate": 3.1619430413811313e-06, "loss": 0.019766807556152344, "step": 42530 }, { "epoch": 0.4003294117647059, "grad_norm": 0.6914287117105643, "learning_rate": 3.1617571879479474e-06, "loss": 0.018336784839630128, "step": 42535 }, { "epoch": 0.4003764705882353, "grad_norm": 0.5703182544060481, "learning_rate": 3.161571367283321e-06, "loss": 0.016847053170204164, "step": 42540 }, { "epoch": 0.4004235294117647, "grad_norm": 0.4493493700428692, "learning_rate": 3.1613855793776253e-06, "loss": 0.013422995805740356, "step": 42545 }, { "epoch": 0.40047058823529413, "grad_norm": 0.49751135593836926, "learning_rate": 3.1611998242212362e-06, "loss": 0.02025912404060364, "step": 42550 }, { "epoch": 0.40051764705882353, "grad_norm": 0.8871178529411804, "learning_rate": 3.1610141018045328e-06, "loss": 0.0183961421251297, "step": 42555 }, { "epoch": 0.40056470588235293, "grad_norm": 0.34197135862518446, "learning_rate": 3.1608284121178994e-06, "loss": 0.016356763243675233, "step": 42560 }, { "epoch": 0.40061176470588233, "grad_norm": 0.617913963800611, "learning_rate": 3.160642755151723e-06, "loss": 0.019586315751075743, "step": 42565 }, { "epoch": 0.4006588235294118, "grad_norm": 0.4623712600881634, "learning_rate": 3.160457130896396e-06, "loss": 0.01712421178817749, "step": 42570 }, { "epoch": 0.4007058823529412, "grad_norm": 0.5948006219457953, "learning_rate": 3.1602715393423146e-06, "loss": 0.01725873053073883, "step": 42575 }, { "epoch": 0.4007529411764706, "grad_norm": 0.632134614146037, "learning_rate": 3.160085980479876e-06, "loss": 0.02064448595046997, "step": 42580 }, { "epoch": 0.4008, "grad_norm": 0.5160352868383774, "learning_rate": 3.159900454299486e-06, "loss": 0.016640058159828185, "step": 42585 }, { "epoch": 0.4008470588235294, "grad_norm": 0.7305489341460368, "learning_rate": 3.159714960791551e-06, "loss": 0.02293437272310257, "step": 42590 }, { "epoch": 0.40089411764705885, "grad_norm": 0.5816159670173685, "learning_rate": 3.159529499946483e-06, "loss": 0.025658011436462402, "step": 42595 }, { "epoch": 0.40094117647058825, "grad_norm": 0.491959142441714, "learning_rate": 3.1593440717546965e-06, "loss": 0.01596134006977081, "step": 42600 }, { "epoch": 0.40098823529411765, "grad_norm": 0.9592248041085222, "learning_rate": 3.159158676206611e-06, "loss": 0.02129124253988266, "step": 42605 }, { "epoch": 0.40103529411764705, "grad_norm": 0.49950962485982375, "learning_rate": 3.1589733132926505e-06, "loss": 0.02263598144054413, "step": 42610 }, { "epoch": 0.40108235294117645, "grad_norm": 0.4876872198925118, "learning_rate": 3.158787983003241e-06, "loss": 0.018096305429935455, "step": 42615 }, { "epoch": 0.4011294117647059, "grad_norm": 0.5699461108831212, "learning_rate": 3.1586026853288144e-06, "loss": 0.018565939366817476, "step": 42620 }, { "epoch": 0.4011764705882353, "grad_norm": 0.602913969955854, "learning_rate": 3.158417420259805e-06, "loss": 0.02569858431816101, "step": 42625 }, { "epoch": 0.4012235294117647, "grad_norm": 0.5914820184202185, "learning_rate": 3.1582321877866523e-06, "loss": 0.0216755673289299, "step": 42630 }, { "epoch": 0.4012705882352941, "grad_norm": 0.44783897482077395, "learning_rate": 3.158046987899799e-06, "loss": 0.016227900981903076, "step": 42635 }, { "epoch": 0.40131764705882356, "grad_norm": 0.6304141762525393, "learning_rate": 3.157861820589691e-06, "loss": 0.01468966007232666, "step": 42640 }, { "epoch": 0.40136470588235296, "grad_norm": 0.7520891085095828, "learning_rate": 3.1576766858467804e-06, "loss": 0.022486811876296996, "step": 42645 }, { "epoch": 0.40141176470588236, "grad_norm": 0.46832902701160956, "learning_rate": 3.157491583661521e-06, "loss": 0.01818876564502716, "step": 42650 }, { "epoch": 0.40145882352941176, "grad_norm": 0.4537286847065088, "learning_rate": 3.157306514024371e-06, "loss": 0.02075047791004181, "step": 42655 }, { "epoch": 0.40150588235294116, "grad_norm": 0.45093409987871796, "learning_rate": 3.157121476925794e-06, "loss": 0.020501026511192323, "step": 42660 }, { "epoch": 0.4015529411764706, "grad_norm": 0.44253992647905627, "learning_rate": 3.1569364723562552e-06, "loss": 0.017000123858451843, "step": 42665 }, { "epoch": 0.4016, "grad_norm": 0.41741298897969903, "learning_rate": 3.1567515003062255e-06, "loss": 0.018392971158027648, "step": 42670 }, { "epoch": 0.4016470588235294, "grad_norm": 0.3621696895402032, "learning_rate": 3.1565665607661784e-06, "loss": 0.015230253338813782, "step": 42675 }, { "epoch": 0.4016941176470588, "grad_norm": 0.5247235022257783, "learning_rate": 3.1563816537265927e-06, "loss": 0.018590930104255676, "step": 42680 }, { "epoch": 0.4017411764705882, "grad_norm": 0.47889048622395053, "learning_rate": 3.1561967791779496e-06, "loss": 0.017885249853134156, "step": 42685 }, { "epoch": 0.40178823529411767, "grad_norm": 0.4823745583430749, "learning_rate": 3.1560119371107355e-06, "loss": 0.01664663553237915, "step": 42690 }, { "epoch": 0.40183529411764707, "grad_norm": 0.5644553979902414, "learning_rate": 3.1558271275154405e-06, "loss": 0.014770308136940002, "step": 42695 }, { "epoch": 0.40188235294117647, "grad_norm": 0.6401872290634688, "learning_rate": 3.1556423503825574e-06, "loss": 0.022159546613693237, "step": 42700 }, { "epoch": 0.40192941176470587, "grad_norm": 0.4971424853613981, "learning_rate": 3.155457605702585e-06, "loss": 0.02073171138763428, "step": 42705 }, { "epoch": 0.40197647058823527, "grad_norm": 0.9989786629963272, "learning_rate": 3.155272893466023e-06, "loss": 0.01677105724811554, "step": 42710 }, { "epoch": 0.4020235294117647, "grad_norm": 0.5249669807740451, "learning_rate": 3.155088213663379e-06, "loss": 0.0178224578499794, "step": 42715 }, { "epoch": 0.4020705882352941, "grad_norm": 0.5677063086666722, "learning_rate": 3.1549035662851592e-06, "loss": 0.020721065998077392, "step": 42720 }, { "epoch": 0.4021176470588235, "grad_norm": 0.5970871629525021, "learning_rate": 3.1547189513218796e-06, "loss": 0.01942518204450607, "step": 42725 }, { "epoch": 0.4021647058823529, "grad_norm": 0.5814244048359727, "learning_rate": 3.1545343687640557e-06, "loss": 0.02147056758403778, "step": 42730 }, { "epoch": 0.4022117647058824, "grad_norm": 0.5148956102511241, "learning_rate": 3.1543498186022088e-06, "loss": 0.02166019380092621, "step": 42735 }, { "epoch": 0.4022588235294118, "grad_norm": 0.29280206886108584, "learning_rate": 3.1541653008268635e-06, "loss": 0.015042012929916382, "step": 42740 }, { "epoch": 0.4023058823529412, "grad_norm": 0.3944332713912674, "learning_rate": 3.1539808154285493e-06, "loss": 0.021193179488182067, "step": 42745 }, { "epoch": 0.4023529411764706, "grad_norm": 0.633430163722607, "learning_rate": 3.1537963623977975e-06, "loss": 0.025788351893424988, "step": 42750 }, { "epoch": 0.4024, "grad_norm": 0.3957781120812361, "learning_rate": 3.1536119417251455e-06, "loss": 0.018952006101608278, "step": 42755 }, { "epoch": 0.40244705882352944, "grad_norm": 0.5954946424741425, "learning_rate": 3.153427553401133e-06, "loss": 0.017834380269050598, "step": 42760 }, { "epoch": 0.40249411764705884, "grad_norm": 0.7213307655289907, "learning_rate": 3.153243197416304e-06, "loss": 0.019921377301216125, "step": 42765 }, { "epoch": 0.40254117647058824, "grad_norm": 0.44192433208425125, "learning_rate": 3.1530588737612066e-06, "loss": 0.022297552227973937, "step": 42770 }, { "epoch": 0.40258823529411764, "grad_norm": 0.7378256219168801, "learning_rate": 3.1528745824263932e-06, "loss": 0.02285514771938324, "step": 42775 }, { "epoch": 0.40263529411764704, "grad_norm": 0.4548367265224287, "learning_rate": 3.15269032340242e-06, "loss": 0.015805886685848237, "step": 42780 }, { "epoch": 0.4026823529411765, "grad_norm": 0.44344524426921444, "learning_rate": 3.1525060966798447e-06, "loss": 0.015722054243087768, "step": 42785 }, { "epoch": 0.4027294117647059, "grad_norm": 0.6947094163457772, "learning_rate": 3.152321902249233e-06, "loss": 0.019862991571426392, "step": 42790 }, { "epoch": 0.4027764705882353, "grad_norm": 0.39106968460189157, "learning_rate": 3.1521377401011505e-06, "loss": 0.015348854660987853, "step": 42795 }, { "epoch": 0.4028235294117647, "grad_norm": 0.4726241369875303, "learning_rate": 3.1519536102261694e-06, "loss": 0.014786487817764283, "step": 42800 }, { "epoch": 0.4028705882352941, "grad_norm": 0.36632077796615065, "learning_rate": 3.1517695126148646e-06, "loss": 0.016660323739051817, "step": 42805 }, { "epoch": 0.40291764705882355, "grad_norm": 0.6810504584880327, "learning_rate": 3.1515854472578147e-06, "loss": 0.01894295811653137, "step": 42810 }, { "epoch": 0.40296470588235295, "grad_norm": 0.5811467606458536, "learning_rate": 3.151401414145602e-06, "loss": 0.018871778249740602, "step": 42815 }, { "epoch": 0.40301176470588235, "grad_norm": 0.5134514272445873, "learning_rate": 3.1512174132688143e-06, "loss": 0.013590371608734131, "step": 42820 }, { "epoch": 0.40305882352941175, "grad_norm": 0.46913379171123126, "learning_rate": 3.151033444618041e-06, "loss": 0.019746050238609314, "step": 42825 }, { "epoch": 0.4031058823529412, "grad_norm": 0.6970020348810232, "learning_rate": 3.150849508183878e-06, "loss": 0.02007162421941757, "step": 42830 }, { "epoch": 0.4031529411764706, "grad_norm": 0.5350882074061222, "learning_rate": 3.150665603956921e-06, "loss": 0.017071790993213654, "step": 42835 }, { "epoch": 0.4032, "grad_norm": 0.6620221758212531, "learning_rate": 3.1504817319277743e-06, "loss": 0.01983284652233124, "step": 42840 }, { "epoch": 0.4032470588235294, "grad_norm": 0.6409792315492241, "learning_rate": 3.150297892087042e-06, "loss": 0.017999152839183807, "step": 42845 }, { "epoch": 0.4032941176470588, "grad_norm": 1.3111934108083936, "learning_rate": 3.1501140844253346e-06, "loss": 0.022773824632167816, "step": 42850 }, { "epoch": 0.40334117647058826, "grad_norm": 0.46814490862032554, "learning_rate": 3.1499303089332655e-06, "loss": 0.021520990133285522, "step": 42855 }, { "epoch": 0.40338823529411766, "grad_norm": 0.5855447329172155, "learning_rate": 3.149746565601452e-06, "loss": 0.018463873863220216, "step": 42860 }, { "epoch": 0.40343529411764706, "grad_norm": 0.532056340353021, "learning_rate": 3.1495628544205155e-06, "loss": 0.019981646537780763, "step": 42865 }, { "epoch": 0.40348235294117646, "grad_norm": 0.7071138587418097, "learning_rate": 3.14937917538108e-06, "loss": 0.020031002163887025, "step": 42870 }, { "epoch": 0.40352941176470586, "grad_norm": 0.6467321436184005, "learning_rate": 3.1491955284737753e-06, "loss": 0.01852550357580185, "step": 42875 }, { "epoch": 0.4035764705882353, "grad_norm": 0.7325033334295414, "learning_rate": 3.149011913689234e-06, "loss": 0.020507660508155823, "step": 42880 }, { "epoch": 0.4036235294117647, "grad_norm": 0.5371594916465424, "learning_rate": 3.1488283310180924e-06, "loss": 0.02142465263605118, "step": 42885 }, { "epoch": 0.4036705882352941, "grad_norm": 0.4870708636702842, "learning_rate": 3.14864478045099e-06, "loss": 0.02169654369354248, "step": 42890 }, { "epoch": 0.4037176470588235, "grad_norm": 0.40266705741300013, "learning_rate": 3.148461261978572e-06, "loss": 0.01762719601392746, "step": 42895 }, { "epoch": 0.4037647058823529, "grad_norm": 0.7146568716292421, "learning_rate": 3.148277775591485e-06, "loss": 0.021196907758712767, "step": 42900 }, { "epoch": 0.40381176470588237, "grad_norm": 0.5009538410357447, "learning_rate": 3.148094321280383e-06, "loss": 0.021195557713508607, "step": 42905 }, { "epoch": 0.40385882352941177, "grad_norm": 0.6151524334500171, "learning_rate": 3.147910899035919e-06, "loss": 0.01810588538646698, "step": 42910 }, { "epoch": 0.40390588235294117, "grad_norm": 0.4324343922749134, "learning_rate": 3.147727508848754e-06, "loss": 0.015323685109615326, "step": 42915 }, { "epoch": 0.40395294117647057, "grad_norm": 0.5535826772536346, "learning_rate": 3.1475441507095512e-06, "loss": 0.022917202115058898, "step": 42920 }, { "epoch": 0.404, "grad_norm": 0.5005462814839229, "learning_rate": 3.147360824608976e-06, "loss": 0.018406975269317626, "step": 42925 }, { "epoch": 0.4040470588235294, "grad_norm": 0.3162328113065856, "learning_rate": 3.1471775305377005e-06, "loss": 0.021216964721679686, "step": 42930 }, { "epoch": 0.4040941176470588, "grad_norm": 0.4986365083895856, "learning_rate": 3.146994268486399e-06, "loss": 0.018670696020126342, "step": 42935 }, { "epoch": 0.4041411764705882, "grad_norm": 0.41769505688699277, "learning_rate": 3.14681103844575e-06, "loss": 0.018786582350730895, "step": 42940 }, { "epoch": 0.4041882352941176, "grad_norm": 0.624547407566033, "learning_rate": 3.146627840406435e-06, "loss": 0.016853564977645875, "step": 42945 }, { "epoch": 0.4042352941176471, "grad_norm": 0.47684284374435365, "learning_rate": 3.1464446743591417e-06, "loss": 0.02310345768928528, "step": 42950 }, { "epoch": 0.4042823529411765, "grad_norm": 1.285951924632565, "learning_rate": 3.1462615402945566e-06, "loss": 0.017564594745635986, "step": 42955 }, { "epoch": 0.4043294117647059, "grad_norm": 0.6779012832547768, "learning_rate": 3.1460784382033766e-06, "loss": 0.020082426071166993, "step": 42960 }, { "epoch": 0.4043764705882353, "grad_norm": 0.5492134320272926, "learning_rate": 3.1458953680762973e-06, "loss": 0.01907922625541687, "step": 42965 }, { "epoch": 0.4044235294117647, "grad_norm": 0.4782252645375096, "learning_rate": 3.145712329904021e-06, "loss": 0.017953771352767944, "step": 42970 }, { "epoch": 0.40447058823529414, "grad_norm": 0.6162550260252957, "learning_rate": 3.145529323677251e-06, "loss": 0.01942310631275177, "step": 42975 }, { "epoch": 0.40451764705882354, "grad_norm": 0.4640888011314463, "learning_rate": 3.1453463493866977e-06, "loss": 0.021366459131240845, "step": 42980 }, { "epoch": 0.40456470588235294, "grad_norm": 0.6138317920102009, "learning_rate": 3.1451634070230722e-06, "loss": 0.01907203793525696, "step": 42985 }, { "epoch": 0.40461176470588234, "grad_norm": 0.5314592909275594, "learning_rate": 3.144980496577092e-06, "loss": 0.01922810971736908, "step": 42990 }, { "epoch": 0.40465882352941174, "grad_norm": 0.521803959514531, "learning_rate": 3.1447976180394755e-06, "loss": 0.01980891227722168, "step": 42995 }, { "epoch": 0.4047058823529412, "grad_norm": 0.5490258864316211, "learning_rate": 3.1446147714009484e-06, "loss": 0.01990817189216614, "step": 43000 }, { "epoch": 0.4047529411764706, "grad_norm": 0.5046320381556442, "learning_rate": 3.1444319566522368e-06, "loss": 0.018567217886447905, "step": 43005 }, { "epoch": 0.4048, "grad_norm": 0.5034232755510081, "learning_rate": 3.144249173784073e-06, "loss": 0.02059776782989502, "step": 43010 }, { "epoch": 0.4048470588235294, "grad_norm": 0.6235673081363656, "learning_rate": 3.144066422787192e-06, "loss": 0.019593289494514464, "step": 43015 }, { "epoch": 0.40489411764705885, "grad_norm": 0.36875605192923766, "learning_rate": 3.143883703652332e-06, "loss": 0.01565377414226532, "step": 43020 }, { "epoch": 0.40494117647058825, "grad_norm": 2.420249610231474, "learning_rate": 3.1437010163702368e-06, "loss": 0.019468624889850617, "step": 43025 }, { "epoch": 0.40498823529411765, "grad_norm": 0.520013004474067, "learning_rate": 3.143518360931652e-06, "loss": 0.01640394926071167, "step": 43030 }, { "epoch": 0.40503529411764705, "grad_norm": 0.4249482096940631, "learning_rate": 3.1433357373273287e-06, "loss": 0.019108128547668458, "step": 43035 }, { "epoch": 0.40508235294117645, "grad_norm": 0.519589740428941, "learning_rate": 3.1431531455480197e-06, "loss": 0.017576465010643007, "step": 43040 }, { "epoch": 0.4051294117647059, "grad_norm": 0.6465726261560185, "learning_rate": 3.142970585584484e-06, "loss": 0.015263521671295166, "step": 43045 }, { "epoch": 0.4051764705882353, "grad_norm": 0.3744632712077603, "learning_rate": 3.1427880574274814e-06, "loss": 0.016310980916023253, "step": 43050 }, { "epoch": 0.4052235294117647, "grad_norm": 0.6007906017854593, "learning_rate": 3.142605561067779e-06, "loss": 0.015335191786289216, "step": 43055 }, { "epoch": 0.4052705882352941, "grad_norm": 0.4541988888619293, "learning_rate": 3.1424230964961446e-06, "loss": 0.029803696274757385, "step": 43060 }, { "epoch": 0.4053176470588235, "grad_norm": 0.41799982662530444, "learning_rate": 3.1422406637033516e-06, "loss": 0.020060761272907256, "step": 43065 }, { "epoch": 0.40536470588235296, "grad_norm": 0.646327589322886, "learning_rate": 3.1420582626801755e-06, "loss": 0.017675222456455232, "step": 43070 }, { "epoch": 0.40541176470588236, "grad_norm": 0.5051606031939969, "learning_rate": 3.1418758934173976e-06, "loss": 0.020181167125701904, "step": 43075 }, { "epoch": 0.40545882352941176, "grad_norm": 0.46455443193821727, "learning_rate": 3.1416935559058013e-06, "loss": 0.01549759805202484, "step": 43080 }, { "epoch": 0.40550588235294116, "grad_norm": 0.35699651932459936, "learning_rate": 3.141511250136175e-06, "loss": 0.017954140901565552, "step": 43085 }, { "epoch": 0.40555294117647056, "grad_norm": 0.5938269860204887, "learning_rate": 3.14132897609931e-06, "loss": 0.017879295349121093, "step": 43090 }, { "epoch": 0.4056, "grad_norm": 0.7906821179858621, "learning_rate": 3.1411467337860003e-06, "loss": 0.020401567220687866, "step": 43095 }, { "epoch": 0.4056470588235294, "grad_norm": 0.5921228298068557, "learning_rate": 3.1409645231870463e-06, "loss": 0.018900471925735473, "step": 43100 }, { "epoch": 0.4056941176470588, "grad_norm": 0.5153963886304493, "learning_rate": 3.14078234429325e-06, "loss": 0.01778201311826706, "step": 43105 }, { "epoch": 0.4057411764705882, "grad_norm": 0.6106843222061504, "learning_rate": 3.140600197095418e-06, "loss": 0.02229967713356018, "step": 43110 }, { "epoch": 0.40578823529411767, "grad_norm": 0.48481723440677865, "learning_rate": 3.140418081584361e-06, "loss": 0.02084997296333313, "step": 43115 }, { "epoch": 0.40583529411764707, "grad_norm": 0.48346446114760044, "learning_rate": 3.1402359977508922e-06, "loss": 0.017390406131744383, "step": 43120 }, { "epoch": 0.40588235294117647, "grad_norm": 0.5824837935940991, "learning_rate": 3.1400539455858287e-06, "loss": 0.0253696471452713, "step": 43125 }, { "epoch": 0.40592941176470587, "grad_norm": 0.609707029636917, "learning_rate": 3.139871925079993e-06, "loss": 0.020557281374931336, "step": 43130 }, { "epoch": 0.40597647058823527, "grad_norm": 0.8592247109328854, "learning_rate": 3.1396899362242093e-06, "loss": 0.021865302324295045, "step": 43135 }, { "epoch": 0.4060235294117647, "grad_norm": 0.34977808768130175, "learning_rate": 3.1395079790093076e-06, "loss": 0.015052224695682525, "step": 43140 }, { "epoch": 0.4060705882352941, "grad_norm": 0.5822283600506459, "learning_rate": 3.1393260534261183e-06, "loss": 0.026160192489624024, "step": 43145 }, { "epoch": 0.4061176470588235, "grad_norm": 0.6295351981558639, "learning_rate": 3.1391441594654797e-06, "loss": 0.017958715558052063, "step": 43150 }, { "epoch": 0.4061647058823529, "grad_norm": 0.4840616378489217, "learning_rate": 3.1389622971182304e-06, "loss": 0.01714555025100708, "step": 43155 }, { "epoch": 0.40621176470588233, "grad_norm": 0.5037695247479111, "learning_rate": 3.1387804663752153e-06, "loss": 0.01652040481567383, "step": 43160 }, { "epoch": 0.4062588235294118, "grad_norm": 0.49997335681696803, "learning_rate": 3.1385986672272804e-06, "loss": 0.015139900147914886, "step": 43165 }, { "epoch": 0.4063058823529412, "grad_norm": 0.42909871375128245, "learning_rate": 3.1384168996652773e-06, "loss": 0.01874555051326752, "step": 43170 }, { "epoch": 0.4063529411764706, "grad_norm": 0.675435653833004, "learning_rate": 3.1382351636800616e-06, "loss": 0.02197125107049942, "step": 43175 }, { "epoch": 0.4064, "grad_norm": 0.42742451442502266, "learning_rate": 3.1380534592624908e-06, "loss": 0.017153210937976837, "step": 43180 }, { "epoch": 0.40644705882352944, "grad_norm": 0.4756136302730706, "learning_rate": 3.137871786403427e-06, "loss": 0.016840991377830506, "step": 43185 }, { "epoch": 0.40649411764705884, "grad_norm": 0.3649648634551372, "learning_rate": 3.1376901450937362e-06, "loss": 0.020093894004821776, "step": 43190 }, { "epoch": 0.40654117647058824, "grad_norm": 0.4648905759719302, "learning_rate": 3.137508535324289e-06, "loss": 0.019546309113502504, "step": 43195 }, { "epoch": 0.40658823529411764, "grad_norm": 0.836766761895802, "learning_rate": 3.1373269570859576e-06, "loss": 0.01971845477819443, "step": 43200 }, { "epoch": 0.40663529411764704, "grad_norm": 0.6080166614374863, "learning_rate": 3.137145410369619e-06, "loss": 0.016384290158748628, "step": 43205 }, { "epoch": 0.4066823529411765, "grad_norm": 0.5595740571156692, "learning_rate": 3.136963895166155e-06, "loss": 0.022512930631637573, "step": 43210 }, { "epoch": 0.4067294117647059, "grad_norm": 0.5701023908371219, "learning_rate": 3.136782411466449e-06, "loss": 0.015443414449691772, "step": 43215 }, { "epoch": 0.4067764705882353, "grad_norm": 0.48428499622633536, "learning_rate": 3.136600959261389e-06, "loss": 0.020168282091617584, "step": 43220 }, { "epoch": 0.4068235294117647, "grad_norm": 0.6975152761909648, "learning_rate": 3.1364195385418674e-06, "loss": 0.019230303168296815, "step": 43225 }, { "epoch": 0.4068705882352941, "grad_norm": 0.6626529822386479, "learning_rate": 3.1362381492987794e-06, "loss": 0.021601441502571105, "step": 43230 }, { "epoch": 0.40691764705882355, "grad_norm": 0.7217846799612663, "learning_rate": 3.1360567915230234e-06, "loss": 0.019777436554431916, "step": 43235 }, { "epoch": 0.40696470588235295, "grad_norm": 0.6646052976523701, "learning_rate": 3.135875465205503e-06, "loss": 0.02123182564973831, "step": 43240 }, { "epoch": 0.40701176470588235, "grad_norm": 0.7414652191828478, "learning_rate": 3.1356941703371248e-06, "loss": 0.022142428159713744, "step": 43245 }, { "epoch": 0.40705882352941175, "grad_norm": 0.550613988932491, "learning_rate": 3.1355129069087985e-06, "loss": 0.01708795130252838, "step": 43250 }, { "epoch": 0.40710588235294115, "grad_norm": 0.620001547746924, "learning_rate": 3.135331674911438e-06, "loss": 0.019821518659591676, "step": 43255 }, { "epoch": 0.4071529411764706, "grad_norm": 0.5094911795016315, "learning_rate": 3.135150474335962e-06, "loss": 0.020219722390174867, "step": 43260 }, { "epoch": 0.4072, "grad_norm": 0.7443715623025813, "learning_rate": 3.13496930517329e-06, "loss": 0.019303347170352935, "step": 43265 }, { "epoch": 0.4072470588235294, "grad_norm": 0.471647933653019, "learning_rate": 3.1347881674143478e-06, "loss": 0.020743663609027862, "step": 43270 }, { "epoch": 0.4072941176470588, "grad_norm": 0.4874711354956438, "learning_rate": 3.1346070610500636e-06, "loss": 0.022049324214458467, "step": 43275 }, { "epoch": 0.40734117647058826, "grad_norm": 0.5031000920542078, "learning_rate": 3.1344259860713698e-06, "loss": 0.01943016201257706, "step": 43280 }, { "epoch": 0.40738823529411766, "grad_norm": 0.5304682418884374, "learning_rate": 3.1342449424692022e-06, "loss": 0.017753291130065917, "step": 43285 }, { "epoch": 0.40743529411764706, "grad_norm": 0.5604432036455269, "learning_rate": 3.1340639302345007e-06, "loss": 0.020906031131744385, "step": 43290 }, { "epoch": 0.40748235294117646, "grad_norm": 0.573573092979053, "learning_rate": 3.1338829493582078e-06, "loss": 0.01883702278137207, "step": 43295 }, { "epoch": 0.40752941176470586, "grad_norm": 0.5287505806581488, "learning_rate": 3.1337019998312713e-06, "loss": 0.01821589320898056, "step": 43300 }, { "epoch": 0.4075764705882353, "grad_norm": 0.4438867738291609, "learning_rate": 3.133521081644641e-06, "loss": 0.0146861732006073, "step": 43305 }, { "epoch": 0.4076235294117647, "grad_norm": 0.6665310806222591, "learning_rate": 3.1333401947892713e-06, "loss": 0.017633384466171263, "step": 43310 }, { "epoch": 0.4076705882352941, "grad_norm": 0.5673613490041225, "learning_rate": 3.13315933925612e-06, "loss": 0.017915031313896178, "step": 43315 }, { "epoch": 0.4077176470588235, "grad_norm": 0.5687662153426254, "learning_rate": 3.1329785150361484e-06, "loss": 0.01671176254749298, "step": 43320 }, { "epoch": 0.4077647058823529, "grad_norm": 0.5776581445654906, "learning_rate": 3.132797722120322e-06, "loss": 0.0247947096824646, "step": 43325 }, { "epoch": 0.4078117647058824, "grad_norm": 0.6223010732621932, "learning_rate": 3.1326169604996097e-06, "loss": 0.0197604238986969, "step": 43330 }, { "epoch": 0.4078588235294118, "grad_norm": 0.5884733682271388, "learning_rate": 3.1324362301649836e-06, "loss": 0.019021223485469817, "step": 43335 }, { "epoch": 0.4079058823529412, "grad_norm": 0.37667757432445564, "learning_rate": 3.1322555311074197e-06, "loss": 0.016871190071105956, "step": 43340 }, { "epoch": 0.4079529411764706, "grad_norm": 0.534446343364336, "learning_rate": 3.1320748633178987e-06, "loss": 0.019973510503768922, "step": 43345 }, { "epoch": 0.408, "grad_norm": 0.38286772119469115, "learning_rate": 3.131894226787403e-06, "loss": 0.018129965662956236, "step": 43350 }, { "epoch": 0.40804705882352943, "grad_norm": 0.6362064089723842, "learning_rate": 3.1317136215069193e-06, "loss": 0.019225239753723145, "step": 43355 }, { "epoch": 0.40809411764705883, "grad_norm": 0.4454988953679768, "learning_rate": 3.1315330474674395e-06, "loss": 0.019749616086483002, "step": 43360 }, { "epoch": 0.40814117647058823, "grad_norm": 0.6072172995194782, "learning_rate": 3.1313525046599563e-06, "loss": 0.021473892033100128, "step": 43365 }, { "epoch": 0.40818823529411763, "grad_norm": 0.5618598592271538, "learning_rate": 3.1311719930754687e-06, "loss": 0.01850341558456421, "step": 43370 }, { "epoch": 0.4082352941176471, "grad_norm": 0.5168565071385969, "learning_rate": 3.130991512704979e-06, "loss": 0.01727650910615921, "step": 43375 }, { "epoch": 0.4082823529411765, "grad_norm": 0.8264753993063705, "learning_rate": 3.13081106353949e-06, "loss": 0.021770080924034117, "step": 43380 }, { "epoch": 0.4083294117647059, "grad_norm": 0.3816972876346585, "learning_rate": 3.130630645570013e-06, "loss": 0.017236551642417906, "step": 43385 }, { "epoch": 0.4083764705882353, "grad_norm": 0.5500768414914495, "learning_rate": 3.130450258787559e-06, "loss": 0.02028414309024811, "step": 43390 }, { "epoch": 0.4084235294117647, "grad_norm": 0.8788596548653491, "learning_rate": 3.130269903183144e-06, "loss": 0.02394433617591858, "step": 43395 }, { "epoch": 0.40847058823529414, "grad_norm": 0.4412661184498618, "learning_rate": 3.1300895787477886e-06, "loss": 0.019350966811180113, "step": 43400 }, { "epoch": 0.40851764705882354, "grad_norm": 0.8401195594343265, "learning_rate": 3.1299092854725156e-06, "loss": 0.020529018342494966, "step": 43405 }, { "epoch": 0.40856470588235294, "grad_norm": 0.6628377638524798, "learning_rate": 3.129729023348352e-06, "loss": 0.019393345713615416, "step": 43410 }, { "epoch": 0.40861176470588234, "grad_norm": 0.5525082198891325, "learning_rate": 3.1295487923663276e-06, "loss": 0.020780101418495178, "step": 43415 }, { "epoch": 0.40865882352941174, "grad_norm": 0.6804823026045586, "learning_rate": 3.129368592517478e-06, "loss": 0.02003040611743927, "step": 43420 }, { "epoch": 0.4087058823529412, "grad_norm": 0.5516335064197276, "learning_rate": 3.12918842379284e-06, "loss": 0.019421643018722533, "step": 43425 }, { "epoch": 0.4087529411764706, "grad_norm": 0.6881493466767934, "learning_rate": 3.129008286183455e-06, "loss": 0.023651137948036194, "step": 43430 }, { "epoch": 0.4088, "grad_norm": 0.5426065258309307, "learning_rate": 3.128828179680368e-06, "loss": 0.019096174836158754, "step": 43435 }, { "epoch": 0.4088470588235294, "grad_norm": 1.8904851820091997, "learning_rate": 3.128648104274628e-06, "loss": 0.019395989179611207, "step": 43440 }, { "epoch": 0.4088941176470588, "grad_norm": 0.622828551402021, "learning_rate": 3.1284680599572875e-06, "loss": 0.019573327898979188, "step": 43445 }, { "epoch": 0.40894117647058825, "grad_norm": 0.6694447020079061, "learning_rate": 3.128288046719401e-06, "loss": 0.02818286418914795, "step": 43450 }, { "epoch": 0.40898823529411765, "grad_norm": 0.5055711285481133, "learning_rate": 3.128108064552029e-06, "loss": 0.018939393758773803, "step": 43455 }, { "epoch": 0.40903529411764705, "grad_norm": 0.30076402682491804, "learning_rate": 3.1279281134462347e-06, "loss": 0.01850189417600632, "step": 43460 }, { "epoch": 0.40908235294117645, "grad_norm": 1.0431560574119738, "learning_rate": 3.127748193393084e-06, "loss": 0.026967328786849976, "step": 43465 }, { "epoch": 0.4091294117647059, "grad_norm": 0.7688846221517145, "learning_rate": 3.1275683043836468e-06, "loss": 0.0176441490650177, "step": 43470 }, { "epoch": 0.4091764705882353, "grad_norm": 0.6696969059785465, "learning_rate": 3.1273884464089976e-06, "loss": 0.026887011528015137, "step": 43475 }, { "epoch": 0.4092235294117647, "grad_norm": 0.5189886021306862, "learning_rate": 3.127208619460214e-06, "loss": 0.016509231925010682, "step": 43480 }, { "epoch": 0.4092705882352941, "grad_norm": 0.45848398626135484, "learning_rate": 3.1270288235283765e-06, "loss": 0.01705198585987091, "step": 43485 }, { "epoch": 0.4093176470588235, "grad_norm": 0.9877967241585642, "learning_rate": 3.1268490586045695e-06, "loss": 0.024153247475624084, "step": 43490 }, { "epoch": 0.40936470588235296, "grad_norm": 1.1348059985434125, "learning_rate": 3.1266693246798818e-06, "loss": 0.019982315599918365, "step": 43495 }, { "epoch": 0.40941176470588236, "grad_norm": 0.6389986992155486, "learning_rate": 3.1264896217454042e-06, "loss": 0.017980489134788512, "step": 43500 }, { "epoch": 0.40945882352941176, "grad_norm": 0.7148440510178119, "learning_rate": 3.1263099497922333e-06, "loss": 0.019658556580543517, "step": 43505 }, { "epoch": 0.40950588235294116, "grad_norm": 0.568172065917739, "learning_rate": 3.126130308811467e-06, "loss": 0.022146794199943542, "step": 43510 }, { "epoch": 0.40955294117647056, "grad_norm": 0.29892145155494887, "learning_rate": 3.1259506987942085e-06, "loss": 0.017778414487838744, "step": 43515 }, { "epoch": 0.4096, "grad_norm": 0.5676423440785808, "learning_rate": 3.125771119731563e-06, "loss": 0.020046603679656983, "step": 43520 }, { "epoch": 0.4096470588235294, "grad_norm": 0.624710683788257, "learning_rate": 3.1255915716146408e-06, "loss": 0.01957182139158249, "step": 43525 }, { "epoch": 0.4096941176470588, "grad_norm": 0.5205070333919376, "learning_rate": 3.1254120544345547e-06, "loss": 0.021206888556480407, "step": 43530 }, { "epoch": 0.4097411764705882, "grad_norm": 0.4398355189020223, "learning_rate": 3.1252325681824227e-06, "loss": 0.017202073335647584, "step": 43535 }, { "epoch": 0.4097882352941176, "grad_norm": 0.5767858256690781, "learning_rate": 3.1250531128493632e-06, "loss": 0.018895290791988373, "step": 43540 }, { "epoch": 0.4098352941176471, "grad_norm": 0.43099428440118903, "learning_rate": 3.124873688426502e-06, "loss": 0.017784896492958068, "step": 43545 }, { "epoch": 0.4098823529411765, "grad_norm": 0.5608004608879613, "learning_rate": 3.1246942949049647e-06, "loss": 0.0201934814453125, "step": 43550 }, { "epoch": 0.4099294117647059, "grad_norm": 0.6663203993858118, "learning_rate": 3.124514932275884e-06, "loss": 0.0285702645778656, "step": 43555 }, { "epoch": 0.4099764705882353, "grad_norm": 0.5531388213947834, "learning_rate": 3.1243356005303937e-06, "loss": 0.01948305368423462, "step": 43560 }, { "epoch": 0.41002352941176473, "grad_norm": 0.5605447059757469, "learning_rate": 3.1241562996596325e-06, "loss": 0.022298617660999297, "step": 43565 }, { "epoch": 0.41007058823529413, "grad_norm": 0.6126254165798923, "learning_rate": 3.1239770296547417e-06, "loss": 0.016681846976280213, "step": 43570 }, { "epoch": 0.41011764705882353, "grad_norm": 0.45872529762384173, "learning_rate": 3.1237977905068665e-06, "loss": 0.017152345180511473, "step": 43575 }, { "epoch": 0.41016470588235293, "grad_norm": 0.597031940290669, "learning_rate": 3.123618582207157e-06, "loss": 0.018974103033542633, "step": 43580 }, { "epoch": 0.41021176470588233, "grad_norm": 0.5909256036137138, "learning_rate": 3.1234394047467635e-06, "loss": 0.01600879430770874, "step": 43585 }, { "epoch": 0.4102588235294118, "grad_norm": 0.5189244862045705, "learning_rate": 3.1232602581168442e-06, "loss": 0.01751495599746704, "step": 43590 }, { "epoch": 0.4103058823529412, "grad_norm": 0.496056033050624, "learning_rate": 3.1230811423085565e-06, "loss": 0.020407797396183015, "step": 43595 }, { "epoch": 0.4103529411764706, "grad_norm": 0.6032124157192132, "learning_rate": 3.1229020573130654e-06, "loss": 0.02378915697336197, "step": 43600 }, { "epoch": 0.4104, "grad_norm": 0.45129447503098713, "learning_rate": 3.122723003121536e-06, "loss": 0.02013028860092163, "step": 43605 }, { "epoch": 0.4104470588235294, "grad_norm": 0.5520147394375734, "learning_rate": 3.1225439797251395e-06, "loss": 0.02087036371231079, "step": 43610 }, { "epoch": 0.41049411764705884, "grad_norm": 0.8553831374971342, "learning_rate": 3.1223649871150484e-06, "loss": 0.021802179515361786, "step": 43615 }, { "epoch": 0.41054117647058824, "grad_norm": 0.4497613649234338, "learning_rate": 3.1221860252824414e-06, "loss": 0.02191693037748337, "step": 43620 }, { "epoch": 0.41058823529411764, "grad_norm": 0.5583985658291054, "learning_rate": 3.122007094218499e-06, "loss": 0.014783751964569092, "step": 43625 }, { "epoch": 0.41063529411764704, "grad_norm": 0.3765028548854831, "learning_rate": 3.1218281939144047e-06, "loss": 0.019135738909244537, "step": 43630 }, { "epoch": 0.41068235294117644, "grad_norm": 0.5411215192497747, "learning_rate": 3.1216493243613465e-06, "loss": 0.02297690510749817, "step": 43635 }, { "epoch": 0.4107294117647059, "grad_norm": 0.3604502707892546, "learning_rate": 3.1214704855505164e-06, "loss": 0.018147604167461397, "step": 43640 }, { "epoch": 0.4107764705882353, "grad_norm": 0.6971511505234478, "learning_rate": 3.1212916774731084e-06, "loss": 0.022242659330368043, "step": 43645 }, { "epoch": 0.4108235294117647, "grad_norm": 0.5112969711720953, "learning_rate": 3.121112900120322e-06, "loss": 0.0216021791100502, "step": 43650 }, { "epoch": 0.4108705882352941, "grad_norm": 0.3826279157739177, "learning_rate": 3.1209341534833587e-06, "loss": 0.025150907039642335, "step": 43655 }, { "epoch": 0.41091764705882355, "grad_norm": 0.5121147701208023, "learning_rate": 3.1207554375534245e-06, "loss": 0.017461055517196657, "step": 43660 }, { "epoch": 0.41096470588235295, "grad_norm": 0.6028062193037875, "learning_rate": 3.1205767523217274e-06, "loss": 0.019213846325874327, "step": 43665 }, { "epoch": 0.41101176470588235, "grad_norm": 0.6391689683842032, "learning_rate": 3.120398097779481e-06, "loss": 0.021040743589401244, "step": 43670 }, { "epoch": 0.41105882352941175, "grad_norm": 0.4364010590978147, "learning_rate": 3.1202194739179e-06, "loss": 0.017284269630908965, "step": 43675 }, { "epoch": 0.41110588235294115, "grad_norm": 0.6957350020590852, "learning_rate": 3.120040880728205e-06, "loss": 0.02134379595518112, "step": 43680 }, { "epoch": 0.4111529411764706, "grad_norm": 0.410065430119529, "learning_rate": 3.1198623182016195e-06, "loss": 0.01927029937505722, "step": 43685 }, { "epoch": 0.4112, "grad_norm": 0.6281169572266982, "learning_rate": 3.11968378632937e-06, "loss": 0.024130991101264952, "step": 43690 }, { "epoch": 0.4112470588235294, "grad_norm": 0.5211231819654467, "learning_rate": 3.1195052851026853e-06, "loss": 0.019149112701416015, "step": 43695 }, { "epoch": 0.4112941176470588, "grad_norm": 0.6885396568675101, "learning_rate": 3.1193268145128e-06, "loss": 0.01727433204650879, "step": 43700 }, { "epoch": 0.4113411764705882, "grad_norm": 0.6139890372343626, "learning_rate": 3.119148374550953e-06, "loss": 0.02180219888687134, "step": 43705 }, { "epoch": 0.41138823529411767, "grad_norm": 0.3396168547133698, "learning_rate": 3.1189699652083814e-06, "loss": 0.017802152037620544, "step": 43710 }, { "epoch": 0.41143529411764707, "grad_norm": 0.5160078618031914, "learning_rate": 3.1187915864763322e-06, "loss": 0.01952454149723053, "step": 43715 }, { "epoch": 0.41148235294117647, "grad_norm": 0.46883792456844225, "learning_rate": 3.1186132383460515e-06, "loss": 0.023257309198379518, "step": 43720 }, { "epoch": 0.41152941176470587, "grad_norm": 0.635717051239018, "learning_rate": 3.1184349208087916e-06, "loss": 0.016955664753913878, "step": 43725 }, { "epoch": 0.4115764705882353, "grad_norm": 0.5930106563079681, "learning_rate": 3.118256633855806e-06, "loss": 0.023420429229736327, "step": 43730 }, { "epoch": 0.4116235294117647, "grad_norm": 0.46314683777462357, "learning_rate": 3.1180783774783547e-06, "loss": 0.02137710154056549, "step": 43735 }, { "epoch": 0.4116705882352941, "grad_norm": 0.4753087693166412, "learning_rate": 3.1179001516676975e-06, "loss": 0.017008012533187865, "step": 43740 }, { "epoch": 0.4117176470588235, "grad_norm": 0.8027149191188042, "learning_rate": 3.1177219564151006e-06, "loss": 0.017966876924037933, "step": 43745 }, { "epoch": 0.4117647058823529, "grad_norm": 0.4156634059600188, "learning_rate": 3.117543791711833e-06, "loss": 0.014232063293457031, "step": 43750 }, { "epoch": 0.4118117647058824, "grad_norm": 0.714460107720315, "learning_rate": 3.117365657549165e-06, "loss": 0.017895418405532836, "step": 43755 }, { "epoch": 0.4118588235294118, "grad_norm": 0.6494908921423701, "learning_rate": 3.1171875539183743e-06, "loss": 0.026739978790283205, "step": 43760 }, { "epoch": 0.4119058823529412, "grad_norm": 0.7835382788691679, "learning_rate": 3.1170094808107398e-06, "loss": 0.019064533710479736, "step": 43765 }, { "epoch": 0.4119529411764706, "grad_norm": 1.5701937169403586, "learning_rate": 3.116831438217543e-06, "loss": 0.020197397470474242, "step": 43770 }, { "epoch": 0.412, "grad_norm": 0.5148762280940942, "learning_rate": 3.116653426130071e-06, "loss": 0.02118488848209381, "step": 43775 }, { "epoch": 0.41204705882352943, "grad_norm": 0.5994109820566668, "learning_rate": 3.116475444539613e-06, "loss": 0.018112370371818544, "step": 43780 }, { "epoch": 0.41209411764705883, "grad_norm": 0.6213216156509804, "learning_rate": 3.116297493437462e-06, "loss": 0.018659673631191254, "step": 43785 }, { "epoch": 0.41214117647058823, "grad_norm": 0.7153179804349381, "learning_rate": 3.1161195728149147e-06, "loss": 0.019289882481098176, "step": 43790 }, { "epoch": 0.41218823529411763, "grad_norm": 0.47090006643137466, "learning_rate": 3.115941682663271e-06, "loss": 0.0187448650598526, "step": 43795 }, { "epoch": 0.41223529411764703, "grad_norm": 0.627926296463686, "learning_rate": 3.1157638229738348e-06, "loss": 0.019582003355026245, "step": 43800 }, { "epoch": 0.4122823529411765, "grad_norm": 0.6314909795277094, "learning_rate": 3.1155859937379124e-06, "loss": 0.020919063687324525, "step": 43805 }, { "epoch": 0.4123294117647059, "grad_norm": 0.5720456691887178, "learning_rate": 3.1154081949468157e-06, "loss": 0.01980791687965393, "step": 43810 }, { "epoch": 0.4123764705882353, "grad_norm": 0.3644790653537594, "learning_rate": 3.115230426591857e-06, "loss": 0.017885348200798033, "step": 43815 }, { "epoch": 0.4124235294117647, "grad_norm": 1.2091598012760216, "learning_rate": 3.115052688664355e-06, "loss": 0.01821855306625366, "step": 43820 }, { "epoch": 0.41247058823529414, "grad_norm": 0.5183665470871229, "learning_rate": 3.1148749811556294e-06, "loss": 0.016256698966026308, "step": 43825 }, { "epoch": 0.41251764705882354, "grad_norm": 0.4426997114584739, "learning_rate": 3.114697304057005e-06, "loss": 0.019581618905067443, "step": 43830 }, { "epoch": 0.41256470588235294, "grad_norm": 0.4903313639635492, "learning_rate": 3.1145196573598103e-06, "loss": 0.020243313908576966, "step": 43835 }, { "epoch": 0.41261176470588234, "grad_norm": 0.7578621937409277, "learning_rate": 3.1143420410553755e-06, "loss": 0.022793540358543397, "step": 43840 }, { "epoch": 0.41265882352941174, "grad_norm": 0.521016599880484, "learning_rate": 3.1141644551350358e-06, "loss": 0.01853048801422119, "step": 43845 }, { "epoch": 0.4127058823529412, "grad_norm": 0.5704060766743293, "learning_rate": 3.1139868995901297e-06, "loss": 0.021060740947723387, "step": 43850 }, { "epoch": 0.4127529411764706, "grad_norm": 0.5009529781882412, "learning_rate": 3.1138093744119986e-06, "loss": 0.017405292391777037, "step": 43855 }, { "epoch": 0.4128, "grad_norm": 0.6115918989653241, "learning_rate": 3.113631879591987e-06, "loss": 0.017617160081863405, "step": 43860 }, { "epoch": 0.4128470588235294, "grad_norm": 0.5748675969693218, "learning_rate": 3.113454415121444e-06, "loss": 0.018051612377166747, "step": 43865 }, { "epoch": 0.4128941176470588, "grad_norm": 0.48050331998932966, "learning_rate": 3.1132769809917217e-06, "loss": 0.021780383586883546, "step": 43870 }, { "epoch": 0.41294117647058826, "grad_norm": 0.6109812290405064, "learning_rate": 3.113099577194176e-06, "loss": 0.021808615326881407, "step": 43875 }, { "epoch": 0.41298823529411766, "grad_norm": 0.4201388585881071, "learning_rate": 3.112922203720164e-06, "loss": 0.020212388038635253, "step": 43880 }, { "epoch": 0.41303529411764706, "grad_norm": 0.469370323007157, "learning_rate": 3.1127448605610504e-06, "loss": 0.02157369703054428, "step": 43885 }, { "epoch": 0.41308235294117646, "grad_norm": 0.7734782856310661, "learning_rate": 3.112567547708199e-06, "loss": 0.020057857036590576, "step": 43890 }, { "epoch": 0.41312941176470586, "grad_norm": 0.481425906080323, "learning_rate": 3.1123902651529803e-06, "loss": 0.017928168177604675, "step": 43895 }, { "epoch": 0.4131764705882353, "grad_norm": 0.7112536443385921, "learning_rate": 3.1122130128867662e-06, "loss": 0.019981464743614195, "step": 43900 }, { "epoch": 0.4132235294117647, "grad_norm": 0.6447018402821016, "learning_rate": 3.1120357909009335e-06, "loss": 0.023714932799339294, "step": 43905 }, { "epoch": 0.4132705882352941, "grad_norm": 0.5603321846239595, "learning_rate": 3.111858599186861e-06, "loss": 0.020216977596282958, "step": 43910 }, { "epoch": 0.4133176470588235, "grad_norm": 0.584519287934479, "learning_rate": 3.1116814377359324e-06, "loss": 0.020372170209884643, "step": 43915 }, { "epoch": 0.41336470588235297, "grad_norm": 0.5707647740537166, "learning_rate": 3.1115043065395332e-06, "loss": 0.017217296361923217, "step": 43920 }, { "epoch": 0.41341176470588237, "grad_norm": 0.5272674732969629, "learning_rate": 3.1113272055890547e-06, "loss": 0.024983586370944978, "step": 43925 }, { "epoch": 0.41345882352941177, "grad_norm": 0.9941834262687818, "learning_rate": 3.111150134875889e-06, "loss": 0.020468056201934814, "step": 43930 }, { "epoch": 0.41350588235294117, "grad_norm": 0.5344819647081713, "learning_rate": 3.110973094391433e-06, "loss": 0.01851568967103958, "step": 43935 }, { "epoch": 0.41355294117647057, "grad_norm": 0.4661149763046863, "learning_rate": 3.1107960841270864e-06, "loss": 0.019236303865909576, "step": 43940 }, { "epoch": 0.4136, "grad_norm": 0.5351368258133544, "learning_rate": 3.110619104074254e-06, "loss": 0.02526627779006958, "step": 43945 }, { "epoch": 0.4136470588235294, "grad_norm": 0.34042298294069895, "learning_rate": 3.110442154224342e-06, "loss": 0.01824883818626404, "step": 43950 }, { "epoch": 0.4136941176470588, "grad_norm": 0.5351607246950503, "learning_rate": 3.1102652345687606e-06, "loss": 0.021032480895519255, "step": 43955 }, { "epoch": 0.4137411764705882, "grad_norm": 0.8445363654469502, "learning_rate": 3.1100883450989238e-06, "loss": 0.020277217030525208, "step": 43960 }, { "epoch": 0.4137882352941176, "grad_norm": 0.5372616433863795, "learning_rate": 3.109911485806249e-06, "loss": 0.014374186098575593, "step": 43965 }, { "epoch": 0.4138352941176471, "grad_norm": 0.4425555371639793, "learning_rate": 3.1097346566821575e-06, "loss": 0.024635571241378783, "step": 43970 }, { "epoch": 0.4138823529411765, "grad_norm": 0.6014548767514658, "learning_rate": 3.109557857718072e-06, "loss": 0.017880849540233612, "step": 43975 }, { "epoch": 0.4139294117647059, "grad_norm": 0.5723575685239866, "learning_rate": 3.1093810889054205e-06, "loss": 0.020751366019248964, "step": 43980 }, { "epoch": 0.4139764705882353, "grad_norm": 0.46030282847398263, "learning_rate": 3.109204350235634e-06, "loss": 0.019951224327087402, "step": 43985 }, { "epoch": 0.4140235294117647, "grad_norm": 0.45177680643485424, "learning_rate": 3.1090276417001473e-06, "loss": 0.015255317091941833, "step": 43990 }, { "epoch": 0.41407058823529413, "grad_norm": 0.5106684852176648, "learning_rate": 3.1088509632903973e-06, "loss": 0.018887796998023988, "step": 43995 }, { "epoch": 0.41411764705882353, "grad_norm": 0.3625879289693825, "learning_rate": 3.1086743149978253e-06, "loss": 0.01554497480392456, "step": 44000 }, { "epoch": 0.41416470588235293, "grad_norm": 0.8223215584394578, "learning_rate": 3.1084976968138763e-06, "loss": 0.01996655762195587, "step": 44005 }, { "epoch": 0.41421176470588233, "grad_norm": 0.7089050660643678, "learning_rate": 3.108321108729998e-06, "loss": 0.018645861744880678, "step": 44010 }, { "epoch": 0.4142588235294118, "grad_norm": 0.46708338789622533, "learning_rate": 3.108144550737641e-06, "loss": 0.0171785831451416, "step": 44015 }, { "epoch": 0.4143058823529412, "grad_norm": 0.35375771587585614, "learning_rate": 3.107968022828261e-06, "loss": 0.016436439752578736, "step": 44020 }, { "epoch": 0.4143529411764706, "grad_norm": 0.5440242749515487, "learning_rate": 3.107791524993316e-06, "loss": 0.030664542317390443, "step": 44025 }, { "epoch": 0.4144, "grad_norm": 0.6313333360335025, "learning_rate": 3.1076150572242676e-06, "loss": 0.019197911024093628, "step": 44030 }, { "epoch": 0.4144470588235294, "grad_norm": 0.5555897857078694, "learning_rate": 3.1074386195125796e-06, "loss": 0.02125924527645111, "step": 44035 }, { "epoch": 0.41449411764705885, "grad_norm": 0.5155921388438305, "learning_rate": 3.1072622118497207e-06, "loss": 0.02036992311477661, "step": 44040 }, { "epoch": 0.41454117647058825, "grad_norm": 0.6155256928852433, "learning_rate": 3.1070858342271636e-06, "loss": 0.019620686769485474, "step": 44045 }, { "epoch": 0.41458823529411765, "grad_norm": 0.5237996245529005, "learning_rate": 3.1069094866363832e-06, "loss": 0.01738229990005493, "step": 44050 }, { "epoch": 0.41463529411764705, "grad_norm": 0.37752140404286044, "learning_rate": 3.1067331690688568e-06, "loss": 0.018052175641059875, "step": 44055 }, { "epoch": 0.41468235294117645, "grad_norm": 0.43227818358311393, "learning_rate": 3.106556881516067e-06, "loss": 0.014883872866630555, "step": 44060 }, { "epoch": 0.4147294117647059, "grad_norm": 0.5041733880661818, "learning_rate": 3.106380623969499e-06, "loss": 0.014252877235412598, "step": 44065 }, { "epoch": 0.4147764705882353, "grad_norm": 0.8258011771735416, "learning_rate": 3.106204396420641e-06, "loss": 0.021069705486297607, "step": 44070 }, { "epoch": 0.4148235294117647, "grad_norm": 0.3480106357192835, "learning_rate": 3.1060281988609853e-06, "loss": 0.019233906269073488, "step": 44075 }, { "epoch": 0.4148705882352941, "grad_norm": 0.49027560190555647, "learning_rate": 3.105852031282028e-06, "loss": 0.017793862521648406, "step": 44080 }, { "epoch": 0.4149176470588235, "grad_norm": 0.46228291009039724, "learning_rate": 3.105675893675267e-06, "loss": 0.016239255666732788, "step": 44085 }, { "epoch": 0.41496470588235296, "grad_norm": 0.7828847066900615, "learning_rate": 3.1054997860322046e-06, "loss": 0.01672028601169586, "step": 44090 }, { "epoch": 0.41501176470588236, "grad_norm": 0.5987098298208887, "learning_rate": 3.105323708344346e-06, "loss": 0.01611112356185913, "step": 44095 }, { "epoch": 0.41505882352941176, "grad_norm": 0.6967217229521548, "learning_rate": 3.1051476606032003e-06, "loss": 0.020207667350769044, "step": 44100 }, { "epoch": 0.41510588235294116, "grad_norm": 0.40056965020411855, "learning_rate": 3.1049716428002803e-06, "loss": 0.020396402478218077, "step": 44105 }, { "epoch": 0.4151529411764706, "grad_norm": 1.0274755292120095, "learning_rate": 3.104795654927101e-06, "loss": 0.01916767954826355, "step": 44110 }, { "epoch": 0.4152, "grad_norm": 0.3506648681346248, "learning_rate": 3.1046196969751807e-06, "loss": 0.014885994791984557, "step": 44115 }, { "epoch": 0.4152470588235294, "grad_norm": 0.6022586493814781, "learning_rate": 3.1044437689360437e-06, "loss": 0.021082204580307008, "step": 44120 }, { "epoch": 0.4152941176470588, "grad_norm": 0.6707216006820259, "learning_rate": 3.104267870801213e-06, "loss": 0.018763864040374757, "step": 44125 }, { "epoch": 0.4153411764705882, "grad_norm": 0.6724472178692871, "learning_rate": 3.1040920025622203e-06, "loss": 0.01876378506422043, "step": 44130 }, { "epoch": 0.41538823529411767, "grad_norm": 0.5642827208777682, "learning_rate": 3.1039161642105963e-06, "loss": 0.021237486600875856, "step": 44135 }, { "epoch": 0.41543529411764707, "grad_norm": 0.4667271984916046, "learning_rate": 3.103740355737877e-06, "loss": 0.0186850905418396, "step": 44140 }, { "epoch": 0.41548235294117647, "grad_norm": 0.4341845894091617, "learning_rate": 3.1035645771356023e-06, "loss": 0.014189939200878143, "step": 44145 }, { "epoch": 0.41552941176470587, "grad_norm": 0.7481834919977206, "learning_rate": 3.1033888283953144e-06, "loss": 0.01833527535200119, "step": 44150 }, { "epoch": 0.41557647058823527, "grad_norm": 0.5007810907915328, "learning_rate": 3.1032131095085584e-06, "loss": 0.029985970258712767, "step": 44155 }, { "epoch": 0.4156235294117647, "grad_norm": 0.5898293392206383, "learning_rate": 3.103037420466884e-06, "loss": 0.018534326553344728, "step": 44160 }, { "epoch": 0.4156705882352941, "grad_norm": 0.7872749262087005, "learning_rate": 3.1028617612618438e-06, "loss": 0.020717096328735352, "step": 44165 }, { "epoch": 0.4157176470588235, "grad_norm": 0.6487168856618835, "learning_rate": 3.1026861318849937e-06, "loss": 0.01885518729686737, "step": 44170 }, { "epoch": 0.4157647058823529, "grad_norm": 0.6709837163122744, "learning_rate": 3.1025105323278933e-06, "loss": 0.02112395763397217, "step": 44175 }, { "epoch": 0.4158117647058823, "grad_norm": 0.5453147625391235, "learning_rate": 3.1023349625821042e-06, "loss": 0.0159505695104599, "step": 44180 }, { "epoch": 0.4158588235294118, "grad_norm": 0.7754355122772185, "learning_rate": 3.1021594226391928e-06, "loss": 0.022546109557151795, "step": 44185 }, { "epoch": 0.4159058823529412, "grad_norm": 0.33227365968567674, "learning_rate": 3.1019839124907287e-06, "loss": 0.017230457067489623, "step": 44190 }, { "epoch": 0.4159529411764706, "grad_norm": 0.7481265586809335, "learning_rate": 3.1018084321282838e-06, "loss": 0.037469005584716795, "step": 44195 }, { "epoch": 0.416, "grad_norm": 0.532508993280877, "learning_rate": 3.101632981543434e-06, "loss": 0.017540359497070314, "step": 44200 }, { "epoch": 0.41604705882352944, "grad_norm": 0.6033879494062985, "learning_rate": 3.1014575607277598e-06, "loss": 0.014517191052436828, "step": 44205 }, { "epoch": 0.41609411764705884, "grad_norm": 0.5095362223734777, "learning_rate": 3.101282169672842e-06, "loss": 0.021086922287940978, "step": 44210 }, { "epoch": 0.41614117647058824, "grad_norm": 0.6529702792724298, "learning_rate": 3.101106808370269e-06, "loss": 0.022120276093482973, "step": 44215 }, { "epoch": 0.41618823529411764, "grad_norm": 0.5801076098549399, "learning_rate": 3.1009314768116266e-06, "loss": 0.021828463673591612, "step": 44220 }, { "epoch": 0.41623529411764704, "grad_norm": 0.5291625436994163, "learning_rate": 3.100756174988511e-06, "loss": 0.02227437198162079, "step": 44225 }, { "epoch": 0.4162823529411765, "grad_norm": 0.8914129406835655, "learning_rate": 3.1005809028925148e-06, "loss": 0.014440411329269409, "step": 44230 }, { "epoch": 0.4163294117647059, "grad_norm": 0.6081687076778014, "learning_rate": 3.1004056605152398e-06, "loss": 0.019492700695991516, "step": 44235 }, { "epoch": 0.4163764705882353, "grad_norm": 0.5658435724820732, "learning_rate": 3.1002304478482874e-06, "loss": 0.017613333463668824, "step": 44240 }, { "epoch": 0.4164235294117647, "grad_norm": 0.9008229679010339, "learning_rate": 3.100055264883263e-06, "loss": 0.018917804956436156, "step": 44245 }, { "epoch": 0.4164705882352941, "grad_norm": 0.9526838310409321, "learning_rate": 3.0998801116117777e-06, "loss": 0.01772363781929016, "step": 44250 }, { "epoch": 0.41651764705882355, "grad_norm": 0.627934277217258, "learning_rate": 3.099704988025442e-06, "loss": 0.023569712042808534, "step": 44255 }, { "epoch": 0.41656470588235295, "grad_norm": 0.49156199605469825, "learning_rate": 3.0995298941158724e-06, "loss": 0.018421745300292967, "step": 44260 }, { "epoch": 0.41661176470588235, "grad_norm": 1.1205706608113821, "learning_rate": 3.099354829874689e-06, "loss": 0.016930760443210603, "step": 44265 }, { "epoch": 0.41665882352941175, "grad_norm": 0.41509157029527916, "learning_rate": 3.099179795293512e-06, "loss": 0.022882811725139618, "step": 44270 }, { "epoch": 0.4167058823529412, "grad_norm": 0.43762117350747604, "learning_rate": 3.0990047903639693e-06, "loss": 0.014907291531562806, "step": 44275 }, { "epoch": 0.4167529411764706, "grad_norm": 0.7851558175969373, "learning_rate": 3.09882981507769e-06, "loss": 0.025947552919387818, "step": 44280 }, { "epoch": 0.4168, "grad_norm": 0.6258240557259016, "learning_rate": 3.0986548694263046e-06, "loss": 0.022716917097568512, "step": 44285 }, { "epoch": 0.4168470588235294, "grad_norm": 0.5026573587998718, "learning_rate": 3.0984799534014505e-06, "loss": 0.01886652410030365, "step": 44290 }, { "epoch": 0.4168941176470588, "grad_norm": 0.6086908694574238, "learning_rate": 3.098305066994766e-06, "loss": 0.016409844160079956, "step": 44295 }, { "epoch": 0.41694117647058826, "grad_norm": 0.550214492430881, "learning_rate": 3.0981302101978937e-06, "loss": 0.020176005363464356, "step": 44300 }, { "epoch": 0.41698823529411766, "grad_norm": 0.5727709344775135, "learning_rate": 3.0979553830024787e-06, "loss": 0.019556218385696413, "step": 44305 }, { "epoch": 0.41703529411764706, "grad_norm": 0.5798142011647277, "learning_rate": 3.097780585400171e-06, "loss": 0.01920537054538727, "step": 44310 }, { "epoch": 0.41708235294117646, "grad_norm": 0.616395674088584, "learning_rate": 3.0976058173826213e-06, "loss": 0.022083309292793275, "step": 44315 }, { "epoch": 0.41712941176470586, "grad_norm": 0.3843072556434601, "learning_rate": 3.0974310789414857e-06, "loss": 0.020499391853809355, "step": 44320 }, { "epoch": 0.4171764705882353, "grad_norm": 0.7168473469095586, "learning_rate": 3.0972563700684236e-06, "loss": 0.024340219795703888, "step": 44325 }, { "epoch": 0.4172235294117647, "grad_norm": 0.5367101210281784, "learning_rate": 3.097081690755097e-06, "loss": 0.02239903062582016, "step": 44330 }, { "epoch": 0.4172705882352941, "grad_norm": 0.45591602343720666, "learning_rate": 3.0969070409931708e-06, "loss": 0.02160872519016266, "step": 44335 }, { "epoch": 0.4173176470588235, "grad_norm": 0.47962568433493347, "learning_rate": 3.0967324207743133e-06, "loss": 0.0218619704246521, "step": 44340 }, { "epoch": 0.4173647058823529, "grad_norm": 0.5049767326145004, "learning_rate": 3.096557830090197e-06, "loss": 0.016449254751205445, "step": 44345 }, { "epoch": 0.41741176470588237, "grad_norm": 0.7865291045379227, "learning_rate": 3.0963832689324973e-06, "loss": 0.02228439450263977, "step": 44350 }, { "epoch": 0.41745882352941177, "grad_norm": 0.5648544580918811, "learning_rate": 3.096208737292893e-06, "loss": 0.019668780267238617, "step": 44355 }, { "epoch": 0.41750588235294117, "grad_norm": 0.5312772173112299, "learning_rate": 3.096034235163065e-06, "loss": 0.01619846522808075, "step": 44360 }, { "epoch": 0.41755294117647057, "grad_norm": 0.6650655052093437, "learning_rate": 3.0958597625346987e-06, "loss": 0.018781414628028868, "step": 44365 }, { "epoch": 0.4176, "grad_norm": 0.6410080821327802, "learning_rate": 3.0956853193994824e-06, "loss": 0.02638271450996399, "step": 44370 }, { "epoch": 0.4176470588235294, "grad_norm": 0.7257121343965925, "learning_rate": 3.0955109057491082e-06, "loss": 0.016761782765388488, "step": 44375 }, { "epoch": 0.4176941176470588, "grad_norm": 0.6246913811002655, "learning_rate": 3.0953365215752714e-06, "loss": 0.020878666639328004, "step": 44380 }, { "epoch": 0.4177411764705882, "grad_norm": 0.5552798200659529, "learning_rate": 3.0951621668696684e-06, "loss": 0.015096598863601684, "step": 44385 }, { "epoch": 0.4177882352941176, "grad_norm": 0.5851976413078962, "learning_rate": 3.0949878416240027e-06, "loss": 0.023807641863822938, "step": 44390 }, { "epoch": 0.4178352941176471, "grad_norm": 0.5317541414581809, "learning_rate": 3.094813545829978e-06, "loss": 0.01943669319152832, "step": 44395 }, { "epoch": 0.4178823529411765, "grad_norm": 0.42835111526617414, "learning_rate": 3.0946392794793023e-06, "loss": 0.018210840225219727, "step": 44400 }, { "epoch": 0.4179294117647059, "grad_norm": 0.7489588980076196, "learning_rate": 3.0944650425636867e-06, "loss": 0.025723275542259217, "step": 44405 }, { "epoch": 0.4179764705882353, "grad_norm": 0.5417831533082187, "learning_rate": 3.094290835074847e-06, "loss": 0.019181498885154726, "step": 44410 }, { "epoch": 0.4180235294117647, "grad_norm": 0.7617917141650143, "learning_rate": 3.0941166570045e-06, "loss": 0.01851056218147278, "step": 44415 }, { "epoch": 0.41807058823529414, "grad_norm": 0.7450647637089287, "learning_rate": 3.0939425083443665e-06, "loss": 0.031705647706985474, "step": 44420 }, { "epoch": 0.41811764705882354, "grad_norm": 0.4481885910183483, "learning_rate": 3.0937683890861715e-06, "loss": 0.014667756855487823, "step": 44425 }, { "epoch": 0.41816470588235294, "grad_norm": 0.4706516468861712, "learning_rate": 3.0935942992216424e-06, "loss": 0.015444251894950866, "step": 44430 }, { "epoch": 0.41821176470588234, "grad_norm": 0.4750531919035614, "learning_rate": 3.0934202387425093e-06, "loss": 0.020902860164642333, "step": 44435 }, { "epoch": 0.41825882352941174, "grad_norm": 0.6519454913489616, "learning_rate": 3.093246207640508e-06, "loss": 0.021644578874111177, "step": 44440 }, { "epoch": 0.4183058823529412, "grad_norm": 0.5440481839950315, "learning_rate": 3.0930722059073743e-06, "loss": 0.02106400728225708, "step": 44445 }, { "epoch": 0.4183529411764706, "grad_norm": 0.4232102222553102, "learning_rate": 3.09289823353485e-06, "loss": 0.01760580241680145, "step": 44450 }, { "epoch": 0.4184, "grad_norm": 0.5959947142353055, "learning_rate": 3.0927242905146774e-06, "loss": 0.01733544170856476, "step": 44455 }, { "epoch": 0.4184470588235294, "grad_norm": 0.7611532968455508, "learning_rate": 3.0925503768386056e-06, "loss": 0.020844292640686036, "step": 44460 }, { "epoch": 0.41849411764705885, "grad_norm": 0.6729196236896069, "learning_rate": 3.0923764924983833e-06, "loss": 0.01970677524805069, "step": 44465 }, { "epoch": 0.41854117647058825, "grad_norm": 0.5167898565388443, "learning_rate": 3.0922026374857657e-06, "loss": 0.014633375406265258, "step": 44470 }, { "epoch": 0.41858823529411765, "grad_norm": 0.41522944822712266, "learning_rate": 3.0920288117925075e-06, "loss": 0.017984017729759216, "step": 44475 }, { "epoch": 0.41863529411764705, "grad_norm": 0.6005396298442538, "learning_rate": 3.091855015410371e-06, "loss": 0.019152772426605225, "step": 44480 }, { "epoch": 0.41868235294117645, "grad_norm": 0.6243048524430138, "learning_rate": 3.091681248331118e-06, "loss": 0.029195791482925414, "step": 44485 }, { "epoch": 0.4187294117647059, "grad_norm": 0.7690390558901401, "learning_rate": 3.0915075105465157e-06, "loss": 0.020075549185276032, "step": 44490 }, { "epoch": 0.4187764705882353, "grad_norm": 0.7359541170021884, "learning_rate": 3.0913338020483346e-06, "loss": 0.01670750230550766, "step": 44495 }, { "epoch": 0.4188235294117647, "grad_norm": 0.47162852849230336, "learning_rate": 3.0911601228283463e-06, "loss": 0.017984730005264283, "step": 44500 }, { "epoch": 0.4188705882352941, "grad_norm": 0.6159176279381906, "learning_rate": 3.0909864728783285e-06, "loss": 0.024162012338638305, "step": 44505 }, { "epoch": 0.4189176470588235, "grad_norm": 0.3798548205072832, "learning_rate": 3.0908128521900595e-06, "loss": 0.016090673208236695, "step": 44510 }, { "epoch": 0.41896470588235296, "grad_norm": 0.47223623545268706, "learning_rate": 3.0906392607553232e-06, "loss": 0.02035318613052368, "step": 44515 }, { "epoch": 0.41901176470588236, "grad_norm": 0.5344664472751741, "learning_rate": 3.090465698565905e-06, "loss": 0.022463229298591614, "step": 44520 }, { "epoch": 0.41905882352941176, "grad_norm": 0.4291079192455412, "learning_rate": 3.0902921656135938e-06, "loss": 0.019304549694061278, "step": 44525 }, { "epoch": 0.41910588235294116, "grad_norm": 0.5315647543085753, "learning_rate": 3.090118661890182e-06, "loss": 0.019182109832763673, "step": 44530 }, { "epoch": 0.41915294117647056, "grad_norm": 0.5430038050900449, "learning_rate": 3.089945187387467e-06, "loss": 0.02071021944284439, "step": 44535 }, { "epoch": 0.4192, "grad_norm": 0.6555373745479559, "learning_rate": 3.0897717420972456e-06, "loss": 0.025004738569259645, "step": 44540 }, { "epoch": 0.4192470588235294, "grad_norm": 0.493318009259209, "learning_rate": 3.0895983260113203e-06, "loss": 0.02565166652202606, "step": 44545 }, { "epoch": 0.4192941176470588, "grad_norm": 0.6670869121694994, "learning_rate": 3.089424939121498e-06, "loss": 0.019934949278831483, "step": 44550 }, { "epoch": 0.4193411764705882, "grad_norm": 0.588471333052394, "learning_rate": 3.0892515814195856e-06, "loss": 0.01643517017364502, "step": 44555 }, { "epoch": 0.4193882352941177, "grad_norm": 0.4156996590299708, "learning_rate": 3.089078252897395e-06, "loss": 0.02183714359998703, "step": 44560 }, { "epoch": 0.4194352941176471, "grad_norm": 0.3147368041547579, "learning_rate": 3.088904953546742e-06, "loss": 0.015855154395103453, "step": 44565 }, { "epoch": 0.4194823529411765, "grad_norm": 0.4459402266308388, "learning_rate": 3.0887316833594444e-06, "loss": 0.02153543382883072, "step": 44570 }, { "epoch": 0.4195294117647059, "grad_norm": 0.6487151013327019, "learning_rate": 3.088558442327323e-06, "loss": 0.01984439790248871, "step": 44575 }, { "epoch": 0.4195764705882353, "grad_norm": 0.4140776414227676, "learning_rate": 3.0883852304422037e-06, "loss": 0.018340511620044707, "step": 44580 }, { "epoch": 0.41962352941176473, "grad_norm": 0.4940140076944016, "learning_rate": 3.0882120476959126e-06, "loss": 0.014591503143310546, "step": 44585 }, { "epoch": 0.41967058823529413, "grad_norm": 0.3918932372830083, "learning_rate": 3.088038894080283e-06, "loss": 0.01541694551706314, "step": 44590 }, { "epoch": 0.41971764705882353, "grad_norm": 0.4833977291945294, "learning_rate": 3.087865769587147e-06, "loss": 0.023937350511550902, "step": 44595 }, { "epoch": 0.41976470588235293, "grad_norm": 0.46241669357214077, "learning_rate": 3.0876926742083443e-06, "loss": 0.017140956223011018, "step": 44600 }, { "epoch": 0.41981176470588233, "grad_norm": 0.5921965710229934, "learning_rate": 3.0875196079357132e-06, "loss": 0.023068690299987794, "step": 44605 }, { "epoch": 0.4198588235294118, "grad_norm": 0.44521792397143484, "learning_rate": 3.0873465707610987e-06, "loss": 0.018287119269371033, "step": 44610 }, { "epoch": 0.4199058823529412, "grad_norm": 0.5014717585245977, "learning_rate": 3.087173562676348e-06, "loss": 0.019341695308685302, "step": 44615 }, { "epoch": 0.4199529411764706, "grad_norm": 0.6004822663618742, "learning_rate": 3.087000583673312e-06, "loss": 0.017960450053215025, "step": 44620 }, { "epoch": 0.42, "grad_norm": 0.4988258509918467, "learning_rate": 3.086827633743842e-06, "loss": 0.018324613571166992, "step": 44625 }, { "epoch": 0.4200470588235294, "grad_norm": 0.3644573462425256, "learning_rate": 3.0866547128797962e-06, "loss": 0.015087440609931946, "step": 44630 }, { "epoch": 0.42009411764705884, "grad_norm": 0.5675299964241495, "learning_rate": 3.086481821073034e-06, "loss": 0.01945580095052719, "step": 44635 }, { "epoch": 0.42014117647058824, "grad_norm": 0.5657365965087234, "learning_rate": 3.0863089583154195e-06, "loss": 0.015491855144500733, "step": 44640 }, { "epoch": 0.42018823529411764, "grad_norm": 0.5163457242403687, "learning_rate": 3.086136124598817e-06, "loss": 0.01703445762395859, "step": 44645 }, { "epoch": 0.42023529411764704, "grad_norm": 0.5697887888375731, "learning_rate": 3.0859633199150973e-06, "loss": 0.02098168432712555, "step": 44650 }, { "epoch": 0.4202823529411765, "grad_norm": 0.36005542354156195, "learning_rate": 3.0857905442561323e-06, "loss": 0.015360254049301147, "step": 44655 }, { "epoch": 0.4203294117647059, "grad_norm": 0.6525843407078099, "learning_rate": 3.0856177976137984e-06, "loss": 0.01566007435321808, "step": 44660 }, { "epoch": 0.4203764705882353, "grad_norm": 0.7964724862880704, "learning_rate": 3.085445079979975e-06, "loss": 0.022956496477127074, "step": 44665 }, { "epoch": 0.4204235294117647, "grad_norm": 0.5727584226459117, "learning_rate": 3.085272391346542e-06, "loss": 0.01853795051574707, "step": 44670 }, { "epoch": 0.4204705882352941, "grad_norm": 0.5126766420021971, "learning_rate": 3.0850997317053877e-06, "loss": 0.019530048966407774, "step": 44675 }, { "epoch": 0.42051764705882355, "grad_norm": 0.8142100542665153, "learning_rate": 3.0849271010483985e-06, "loss": 0.020313245058059693, "step": 44680 }, { "epoch": 0.42056470588235295, "grad_norm": 0.808230274716227, "learning_rate": 3.0847544993674665e-06, "loss": 0.029263341426849367, "step": 44685 }, { "epoch": 0.42061176470588235, "grad_norm": 0.6289224917065905, "learning_rate": 3.084581926654487e-06, "loss": 0.017018625140190126, "step": 44690 }, { "epoch": 0.42065882352941175, "grad_norm": 0.6872578064482571, "learning_rate": 3.084409382901358e-06, "loss": 0.018851402401924133, "step": 44695 }, { "epoch": 0.42070588235294115, "grad_norm": 0.9127257629614908, "learning_rate": 3.0842368680999803e-06, "loss": 0.022948071360588074, "step": 44700 }, { "epoch": 0.4207529411764706, "grad_norm": 0.5960995602649698, "learning_rate": 3.0840643822422578e-06, "loss": 0.020708304643630982, "step": 44705 }, { "epoch": 0.4208, "grad_norm": 0.43174489568586, "learning_rate": 3.0838919253200993e-06, "loss": 0.018067872524261473, "step": 44710 }, { "epoch": 0.4208470588235294, "grad_norm": 0.5606256074743213, "learning_rate": 3.0837194973254154e-06, "loss": 0.02712244391441345, "step": 44715 }, { "epoch": 0.4208941176470588, "grad_norm": 0.5419134675293634, "learning_rate": 3.083547098250119e-06, "loss": 0.017050260305404664, "step": 44720 }, { "epoch": 0.42094117647058826, "grad_norm": 0.5799302987032434, "learning_rate": 3.083374728086127e-06, "loss": 0.016117179393768312, "step": 44725 }, { "epoch": 0.42098823529411766, "grad_norm": 0.4306614819188263, "learning_rate": 3.0832023868253607e-06, "loss": 0.016162893176078795, "step": 44730 }, { "epoch": 0.42103529411764706, "grad_norm": 0.8227503295223735, "learning_rate": 3.083030074459743e-06, "loss": 0.0194924995303154, "step": 44735 }, { "epoch": 0.42108235294117646, "grad_norm": 0.5664679794503773, "learning_rate": 3.0828577909812006e-06, "loss": 0.01735653877258301, "step": 44740 }, { "epoch": 0.42112941176470586, "grad_norm": 0.5691073989073584, "learning_rate": 3.0826855363816627e-06, "loss": 0.019750872254371644, "step": 44745 }, { "epoch": 0.4211764705882353, "grad_norm": 0.4983036469321763, "learning_rate": 3.0825133106530623e-06, "loss": 0.016139866411685945, "step": 44750 }, { "epoch": 0.4212235294117647, "grad_norm": 0.6820583077878214, "learning_rate": 3.082341113787336e-06, "loss": 0.02815162539482117, "step": 44755 }, { "epoch": 0.4212705882352941, "grad_norm": 0.42477563600254664, "learning_rate": 3.0821689457764224e-06, "loss": 0.024948860704898834, "step": 44760 }, { "epoch": 0.4213176470588235, "grad_norm": 0.3684576357157455, "learning_rate": 3.0819968066122637e-06, "loss": 0.02017129957675934, "step": 44765 }, { "epoch": 0.4213647058823529, "grad_norm": 0.4364512621707983, "learning_rate": 3.0818246962868054e-06, "loss": 0.015162813663482665, "step": 44770 }, { "epoch": 0.4214117647058824, "grad_norm": 0.41373123190421535, "learning_rate": 3.0816526147919956e-06, "loss": 0.020214331150054932, "step": 44775 }, { "epoch": 0.4214588235294118, "grad_norm": 0.3608149258877938, "learning_rate": 3.0814805621197875e-06, "loss": 0.020367439091205596, "step": 44780 }, { "epoch": 0.4215058823529412, "grad_norm": 0.5998148554485864, "learning_rate": 3.0813085382621346e-06, "loss": 0.01702393889427185, "step": 44785 }, { "epoch": 0.4215529411764706, "grad_norm": 0.6596000139318979, "learning_rate": 3.0811365432109956e-06, "loss": 0.017735320329666137, "step": 44790 }, { "epoch": 0.4216, "grad_norm": 0.6634398660971491, "learning_rate": 3.0809645769583314e-06, "loss": 0.01765611469745636, "step": 44795 }, { "epoch": 0.42164705882352943, "grad_norm": 0.8484401404787842, "learning_rate": 3.0807926394961062e-06, "loss": 0.01770898252725601, "step": 44800 }, { "epoch": 0.42169411764705883, "grad_norm": 0.520696309192114, "learning_rate": 3.0806207308162876e-06, "loss": 0.020605652034282683, "step": 44805 }, { "epoch": 0.42174117647058823, "grad_norm": 0.5016116453367477, "learning_rate": 3.0804488509108464e-06, "loss": 0.021380992233753206, "step": 44810 }, { "epoch": 0.42178823529411763, "grad_norm": 0.40381140397877985, "learning_rate": 3.0802769997717564e-06, "loss": 0.017740491032600402, "step": 44815 }, { "epoch": 0.4218352941176471, "grad_norm": 0.5563901135950537, "learning_rate": 3.0801051773909934e-06, "loss": 0.021228265762329102, "step": 44820 }, { "epoch": 0.4218823529411765, "grad_norm": 0.748026057263045, "learning_rate": 3.079933383760539e-06, "loss": 0.0240869402885437, "step": 44825 }, { "epoch": 0.4219294117647059, "grad_norm": 0.7387204022325459, "learning_rate": 3.0797616188723745e-06, "loss": 0.021703021228313447, "step": 44830 }, { "epoch": 0.4219764705882353, "grad_norm": 0.6292483396191825, "learning_rate": 3.0795898827184886e-06, "loss": 0.01896376609802246, "step": 44835 }, { "epoch": 0.4220235294117647, "grad_norm": 0.5973112612754712, "learning_rate": 3.0794181752908682e-06, "loss": 0.018906936049461365, "step": 44840 }, { "epoch": 0.42207058823529414, "grad_norm": 0.5432465840452713, "learning_rate": 3.0792464965815065e-06, "loss": 0.02274134159088135, "step": 44845 }, { "epoch": 0.42211764705882354, "grad_norm": 0.4398389462594797, "learning_rate": 3.0790748465824e-06, "loss": 0.019958245754241943, "step": 44850 }, { "epoch": 0.42216470588235294, "grad_norm": 0.48456303304514803, "learning_rate": 3.078903225285547e-06, "loss": 0.022227923572063445, "step": 44855 }, { "epoch": 0.42221176470588234, "grad_norm": 0.525269596993917, "learning_rate": 3.0787316326829485e-06, "loss": 0.015035295486450195, "step": 44860 }, { "epoch": 0.42225882352941174, "grad_norm": 0.47588976704653607, "learning_rate": 3.078560068766611e-06, "loss": 0.018310104310512543, "step": 44865 }, { "epoch": 0.4223058823529412, "grad_norm": 0.4246629266649576, "learning_rate": 3.0783885335285416e-06, "loss": 0.021109268069267273, "step": 44870 }, { "epoch": 0.4223529411764706, "grad_norm": 0.47772374699282644, "learning_rate": 3.078217026960752e-06, "loss": 0.020202924311161042, "step": 44875 }, { "epoch": 0.4224, "grad_norm": 0.5439444453855683, "learning_rate": 3.0780455490552557e-06, "loss": 0.01676923930644989, "step": 44880 }, { "epoch": 0.4224470588235294, "grad_norm": 0.49489264793831766, "learning_rate": 3.0778740998040714e-06, "loss": 0.02423006147146225, "step": 44885 }, { "epoch": 0.4224941176470588, "grad_norm": 0.5662484819544531, "learning_rate": 3.0777026791992186e-06, "loss": 0.01906956881284714, "step": 44890 }, { "epoch": 0.42254117647058825, "grad_norm": 0.5985386841885441, "learning_rate": 3.0775312872327213e-06, "loss": 0.018953265249729158, "step": 44895 }, { "epoch": 0.42258823529411765, "grad_norm": 0.30067157164977587, "learning_rate": 3.0773599238966068e-06, "loss": 0.02061903178691864, "step": 44900 }, { "epoch": 0.42263529411764705, "grad_norm": 0.4354856734579078, "learning_rate": 3.077188589182904e-06, "loss": 0.014687350392341614, "step": 44905 }, { "epoch": 0.42268235294117645, "grad_norm": 0.3757021526344575, "learning_rate": 3.077017283083647e-06, "loss": 0.016467097401618957, "step": 44910 }, { "epoch": 0.4227294117647059, "grad_norm": 0.6966270061097661, "learning_rate": 3.076846005590871e-06, "loss": 0.024933722615242005, "step": 44915 }, { "epoch": 0.4227764705882353, "grad_norm": 0.6333286196406525, "learning_rate": 3.076674756696616e-06, "loss": 0.017195402085781096, "step": 44920 }, { "epoch": 0.4228235294117647, "grad_norm": 0.45956324845880503, "learning_rate": 3.0765035363929236e-06, "loss": 0.021128803491592407, "step": 44925 }, { "epoch": 0.4228705882352941, "grad_norm": 0.535632104882839, "learning_rate": 3.0763323446718395e-06, "loss": 0.01914929151535034, "step": 44930 }, { "epoch": 0.4229176470588235, "grad_norm": 0.7431274996643953, "learning_rate": 3.0761611815254116e-06, "loss": 0.021667499840259553, "step": 44935 }, { "epoch": 0.42296470588235296, "grad_norm": 0.6091327678261752, "learning_rate": 3.075990046945693e-06, "loss": 0.018522103130817414, "step": 44940 }, { "epoch": 0.42301176470588236, "grad_norm": 0.5615211696322316, "learning_rate": 3.0758189409247367e-06, "loss": 0.017902135848999023, "step": 44945 }, { "epoch": 0.42305882352941176, "grad_norm": 0.5262492394474135, "learning_rate": 3.075647863454602e-06, "loss": 0.01912868022918701, "step": 44950 }, { "epoch": 0.42310588235294116, "grad_norm": 0.5408875471501141, "learning_rate": 3.0754768145273487e-06, "loss": 0.021429887413978575, "step": 44955 }, { "epoch": 0.42315294117647057, "grad_norm": 0.6106548110278827, "learning_rate": 3.0753057941350417e-06, "loss": 0.018264731764793395, "step": 44960 }, { "epoch": 0.4232, "grad_norm": 0.5155521643928024, "learning_rate": 3.0751348022697468e-06, "loss": 0.018150216341018675, "step": 44965 }, { "epoch": 0.4232470588235294, "grad_norm": 1.0079732082551678, "learning_rate": 3.0749638389235354e-06, "loss": 0.020419391989707946, "step": 44970 }, { "epoch": 0.4232941176470588, "grad_norm": 0.2979539241112418, "learning_rate": 3.0747929040884792e-06, "loss": 0.018179154396057128, "step": 44975 }, { "epoch": 0.4233411764705882, "grad_norm": 0.41994138644760254, "learning_rate": 3.074621997756656e-06, "loss": 0.014563915133476258, "step": 44980 }, { "epoch": 0.4233882352941176, "grad_norm": 0.5624132212910105, "learning_rate": 3.074451119920145e-06, "loss": 0.02516351342201233, "step": 44985 }, { "epoch": 0.4234352941176471, "grad_norm": 1.060099849524118, "learning_rate": 3.0742802705710283e-06, "loss": 0.020420783758163454, "step": 44990 }, { "epoch": 0.4234823529411765, "grad_norm": 0.6290942568396123, "learning_rate": 3.0741094497013905e-06, "loss": 0.016152223944664, "step": 44995 }, { "epoch": 0.4235294117647059, "grad_norm": 0.37194299858773655, "learning_rate": 3.0739386573033223e-06, "loss": 0.016754341125488282, "step": 45000 }, { "epoch": 0.4235764705882353, "grad_norm": 0.44585731695254727, "learning_rate": 3.0737678933689146e-06, "loss": 0.018457686901092528, "step": 45005 }, { "epoch": 0.42362352941176473, "grad_norm": 0.47421251889364413, "learning_rate": 3.073597157890261e-06, "loss": 0.021105653047561644, "step": 45010 }, { "epoch": 0.42367058823529413, "grad_norm": 0.4912824851433146, "learning_rate": 3.073426450859461e-06, "loss": 0.016889998316764833, "step": 45015 }, { "epoch": 0.42371764705882353, "grad_norm": 0.7034161008818003, "learning_rate": 3.0732557722686144e-06, "loss": 0.019016548991203308, "step": 45020 }, { "epoch": 0.42376470588235293, "grad_norm": 1.5283401541663308, "learning_rate": 3.0730851221098266e-06, "loss": 0.01933809220790863, "step": 45025 }, { "epoch": 0.42381176470588233, "grad_norm": 0.721156645651825, "learning_rate": 3.0729145003752025e-06, "loss": 0.02340330630540848, "step": 45030 }, { "epoch": 0.4238588235294118, "grad_norm": 0.6133802164894051, "learning_rate": 3.0727439070568545e-06, "loss": 0.022672319412231447, "step": 45035 }, { "epoch": 0.4239058823529412, "grad_norm": 0.6276951530345175, "learning_rate": 3.072573342146894e-06, "loss": 0.02544514834880829, "step": 45040 }, { "epoch": 0.4239529411764706, "grad_norm": 0.3584294962813475, "learning_rate": 3.0724028056374384e-06, "loss": 0.01917101740837097, "step": 45045 }, { "epoch": 0.424, "grad_norm": 0.4613703199035135, "learning_rate": 3.072232297520607e-06, "loss": 0.01851360946893692, "step": 45050 }, { "epoch": 0.4240470588235294, "grad_norm": 0.4592186729901209, "learning_rate": 3.0720618177885213e-06, "loss": 0.018297716975212097, "step": 45055 }, { "epoch": 0.42409411764705884, "grad_norm": 0.47050608717999404, "learning_rate": 3.0718913664333077e-06, "loss": 0.02196422517299652, "step": 45060 }, { "epoch": 0.42414117647058824, "grad_norm": 0.5572790397095793, "learning_rate": 3.0717209434470936e-06, "loss": 0.019192616641521453, "step": 45065 }, { "epoch": 0.42418823529411764, "grad_norm": 0.4006351284379872, "learning_rate": 3.071550548822012e-06, "loss": 0.019909484684467314, "step": 45070 }, { "epoch": 0.42423529411764704, "grad_norm": 0.5122738797582295, "learning_rate": 3.0713801825501973e-06, "loss": 0.020105546712875365, "step": 45075 }, { "epoch": 0.42428235294117644, "grad_norm": 0.6258464911457219, "learning_rate": 3.071209844623786e-06, "loss": 0.01608353853225708, "step": 45080 }, { "epoch": 0.4243294117647059, "grad_norm": 0.5579408966877536, "learning_rate": 3.071039535034919e-06, "loss": 0.021500128507614135, "step": 45085 }, { "epoch": 0.4243764705882353, "grad_norm": 0.6780844655643649, "learning_rate": 3.0708692537757423e-06, "loss": 0.02183780074119568, "step": 45090 }, { "epoch": 0.4244235294117647, "grad_norm": 0.6010424689823197, "learning_rate": 3.0706990008384e-06, "loss": 0.02100537419319153, "step": 45095 }, { "epoch": 0.4244705882352941, "grad_norm": 0.6506577441450173, "learning_rate": 3.0705287762150425e-06, "loss": 0.025515791773796082, "step": 45100 }, { "epoch": 0.42451764705882356, "grad_norm": 0.31163824244955207, "learning_rate": 3.070358579897824e-06, "loss": 0.016495706140995027, "step": 45105 }, { "epoch": 0.42456470588235296, "grad_norm": 0.5295603195895411, "learning_rate": 3.0701884118789e-06, "loss": 0.020581568777561187, "step": 45110 }, { "epoch": 0.42461176470588236, "grad_norm": 0.47095163725229977, "learning_rate": 3.0700182721504284e-06, "loss": 0.019591303169727327, "step": 45115 }, { "epoch": 0.42465882352941176, "grad_norm": 0.452559617011622, "learning_rate": 3.0698481607045726e-06, "loss": 0.01961490511894226, "step": 45120 }, { "epoch": 0.42470588235294116, "grad_norm": 0.5025779881794026, "learning_rate": 3.0696780775334965e-06, "loss": 0.015647101402282714, "step": 45125 }, { "epoch": 0.4247529411764706, "grad_norm": 0.6010203761193995, "learning_rate": 3.06950802262937e-06, "loss": 0.017974305152893066, "step": 45130 }, { "epoch": 0.4248, "grad_norm": 0.4299254354901611, "learning_rate": 3.0693379959843623e-06, "loss": 0.021659138798713683, "step": 45135 }, { "epoch": 0.4248470588235294, "grad_norm": 0.6441434263761934, "learning_rate": 3.0691679975906484e-06, "loss": 0.022052542865276338, "step": 45140 }, { "epoch": 0.4248941176470588, "grad_norm": 0.7111766152642471, "learning_rate": 3.0689980274404062e-06, "loss": 0.01892942488193512, "step": 45145 }, { "epoch": 0.4249411764705882, "grad_norm": 0.651396732787429, "learning_rate": 3.068828085525815e-06, "loss": 0.020137175917625427, "step": 45150 }, { "epoch": 0.42498823529411767, "grad_norm": 0.46982535131753644, "learning_rate": 3.0686581718390585e-06, "loss": 0.01723325252532959, "step": 45155 }, { "epoch": 0.42503529411764707, "grad_norm": 0.3740886386556058, "learning_rate": 3.068488286372323e-06, "loss": 0.013580010831356048, "step": 45160 }, { "epoch": 0.42508235294117647, "grad_norm": 0.595357124588579, "learning_rate": 3.0683184291177975e-06, "loss": 0.0153481125831604, "step": 45165 }, { "epoch": 0.42512941176470587, "grad_norm": 0.6617898006092219, "learning_rate": 3.0681486000676754e-06, "loss": 0.037547171115875244, "step": 45170 }, { "epoch": 0.42517647058823527, "grad_norm": 0.4348607039501189, "learning_rate": 3.0679787992141515e-06, "loss": 0.019936418533325194, "step": 45175 }, { "epoch": 0.4252235294117647, "grad_norm": 0.42867015705742934, "learning_rate": 3.0678090265494233e-06, "loss": 0.01569366306066513, "step": 45180 }, { "epoch": 0.4252705882352941, "grad_norm": 0.6058235461280593, "learning_rate": 3.067639282065693e-06, "loss": 0.021631236374378204, "step": 45185 }, { "epoch": 0.4253176470588235, "grad_norm": 0.8651022099062639, "learning_rate": 3.0674695657551658e-06, "loss": 0.02033707797527313, "step": 45190 }, { "epoch": 0.4253647058823529, "grad_norm": 0.5977719131426746, "learning_rate": 3.067299877610049e-06, "loss": 0.017545145750045777, "step": 45195 }, { "epoch": 0.4254117647058824, "grad_norm": 0.6495260685065183, "learning_rate": 3.0671302176225515e-06, "loss": 0.018788620829582214, "step": 45200 }, { "epoch": 0.4254588235294118, "grad_norm": 0.4552730498487952, "learning_rate": 3.0669605857848888e-06, "loss": 0.02025885581970215, "step": 45205 }, { "epoch": 0.4255058823529412, "grad_norm": 0.540459174230999, "learning_rate": 3.066790982089276e-06, "loss": 0.013848638534545899, "step": 45210 }, { "epoch": 0.4255529411764706, "grad_norm": 0.5331660796886458, "learning_rate": 3.0666214065279344e-06, "loss": 0.01574823409318924, "step": 45215 }, { "epoch": 0.4256, "grad_norm": 0.649309625386535, "learning_rate": 3.066451859093084e-06, "loss": 0.018131911754608154, "step": 45220 }, { "epoch": 0.42564705882352943, "grad_norm": 0.350433850731837, "learning_rate": 3.066282339776952e-06, "loss": 0.022140076756477355, "step": 45225 }, { "epoch": 0.42569411764705883, "grad_norm": 0.3369057331539446, "learning_rate": 3.0661128485717678e-06, "loss": 0.02211373895406723, "step": 45230 }, { "epoch": 0.42574117647058823, "grad_norm": 0.7432477339725946, "learning_rate": 3.0659433854697614e-06, "loss": 0.023339594900608062, "step": 45235 }, { "epoch": 0.42578823529411763, "grad_norm": 0.6183439184069085, "learning_rate": 3.0657739504631675e-06, "loss": 0.01682870239019394, "step": 45240 }, { "epoch": 0.42583529411764703, "grad_norm": 0.6134652594307328, "learning_rate": 3.0656045435442246e-06, "loss": 0.02051329016685486, "step": 45245 }, { "epoch": 0.4258823529411765, "grad_norm": 0.4647432809103112, "learning_rate": 3.0654351647051727e-06, "loss": 0.016433730721473694, "step": 45250 }, { "epoch": 0.4259294117647059, "grad_norm": 0.6531404703191735, "learning_rate": 3.065265813938255e-06, "loss": 0.019052344560623168, "step": 45255 }, { "epoch": 0.4259764705882353, "grad_norm": 0.7836921631533564, "learning_rate": 3.0650964912357197e-06, "loss": 0.019267107546329498, "step": 45260 }, { "epoch": 0.4260235294117647, "grad_norm": 0.43602466468927126, "learning_rate": 3.064927196589815e-06, "loss": 0.017552924156188966, "step": 45265 }, { "epoch": 0.42607058823529415, "grad_norm": 0.5054360783373691, "learning_rate": 3.0647579299927934e-06, "loss": 0.02056290805339813, "step": 45270 }, { "epoch": 0.42611764705882355, "grad_norm": 0.5942774396964376, "learning_rate": 3.064588691436911e-06, "loss": 0.019283846020698547, "step": 45275 }, { "epoch": 0.42616470588235295, "grad_norm": 0.6834502085232737, "learning_rate": 3.064419480914426e-06, "loss": 0.015933702886104583, "step": 45280 }, { "epoch": 0.42621176470588235, "grad_norm": 0.5759777194161199, "learning_rate": 3.0642502984176007e-06, "loss": 0.0237153559923172, "step": 45285 }, { "epoch": 0.42625882352941175, "grad_norm": 0.45745191427438736, "learning_rate": 3.064081143938699e-06, "loss": 0.02326148748397827, "step": 45290 }, { "epoch": 0.4263058823529412, "grad_norm": 0.5930459995367175, "learning_rate": 3.063912017469989e-06, "loss": 0.016229946911334992, "step": 45295 }, { "epoch": 0.4263529411764706, "grad_norm": 0.5243254254941869, "learning_rate": 3.0637429190037404e-06, "loss": 0.017623767256736755, "step": 45300 }, { "epoch": 0.4264, "grad_norm": 0.6029014010166943, "learning_rate": 3.063573848532227e-06, "loss": 0.01996614634990692, "step": 45305 }, { "epoch": 0.4264470588235294, "grad_norm": 0.5752340145172463, "learning_rate": 3.0634048060477257e-06, "loss": 0.018098530173301697, "step": 45310 }, { "epoch": 0.4264941176470588, "grad_norm": 0.5975036844069498, "learning_rate": 3.063235791542516e-06, "loss": 0.014980828762054444, "step": 45315 }, { "epoch": 0.42654117647058826, "grad_norm": 0.6093712812102657, "learning_rate": 3.0630668050088796e-06, "loss": 0.026623743772506713, "step": 45320 }, { "epoch": 0.42658823529411766, "grad_norm": 0.4236656466713619, "learning_rate": 3.0628978464391035e-06, "loss": 0.01400202363729477, "step": 45325 }, { "epoch": 0.42663529411764706, "grad_norm": 0.45425261074493095, "learning_rate": 3.0627289158254746e-06, "loss": 0.016944818198680878, "step": 45330 }, { "epoch": 0.42668235294117646, "grad_norm": 0.5608570803905922, "learning_rate": 3.0625600131602847e-06, "loss": 0.019948087632656097, "step": 45335 }, { "epoch": 0.42672941176470586, "grad_norm": 0.6704372204839133, "learning_rate": 3.062391138435828e-06, "loss": 0.021121099591255188, "step": 45340 }, { "epoch": 0.4267764705882353, "grad_norm": 0.6893699788635833, "learning_rate": 3.062222291644403e-06, "loss": 0.017656879127025606, "step": 45345 }, { "epoch": 0.4268235294117647, "grad_norm": 0.7070849062614033, "learning_rate": 3.0620534727783086e-06, "loss": 0.016824701428413393, "step": 45350 }, { "epoch": 0.4268705882352941, "grad_norm": 0.4362374933688803, "learning_rate": 3.061884681829849e-06, "loss": 0.013919830322265625, "step": 45355 }, { "epoch": 0.4269176470588235, "grad_norm": 1.382940545146696, "learning_rate": 3.0617159187913293e-06, "loss": 0.01849326491355896, "step": 45360 }, { "epoch": 0.42696470588235297, "grad_norm": 0.5245249354528069, "learning_rate": 3.06154718365506e-06, "loss": 0.015949271619319916, "step": 45365 }, { "epoch": 0.42701176470588237, "grad_norm": 0.5587114947116181, "learning_rate": 3.0613784764133537e-06, "loss": 0.020024338364601137, "step": 45370 }, { "epoch": 0.42705882352941177, "grad_norm": 0.5877923376928408, "learning_rate": 3.0612097970585243e-06, "loss": 0.016876474022865295, "step": 45375 }, { "epoch": 0.42710588235294117, "grad_norm": 0.46360614270826955, "learning_rate": 3.0610411455828896e-06, "loss": 0.016856904327869415, "step": 45380 }, { "epoch": 0.42715294117647057, "grad_norm": 0.902263542550468, "learning_rate": 3.0608725219787724e-06, "loss": 0.019756218791007994, "step": 45385 }, { "epoch": 0.4272, "grad_norm": 0.458897897189675, "learning_rate": 3.060703926238495e-06, "loss": 0.022483569383621217, "step": 45390 }, { "epoch": 0.4272470588235294, "grad_norm": 0.6575673809209202, "learning_rate": 3.060535358354386e-06, "loss": 0.016217818856239317, "step": 45395 }, { "epoch": 0.4272941176470588, "grad_norm": 0.5051440320095966, "learning_rate": 3.0603668183187746e-06, "loss": 0.020157809555530547, "step": 45400 }, { "epoch": 0.4273411764705882, "grad_norm": 0.497176155272469, "learning_rate": 3.0601983061239937e-06, "loss": 0.01963183581829071, "step": 45405 }, { "epoch": 0.4273882352941176, "grad_norm": 0.5928754536134601, "learning_rate": 3.060029821762379e-06, "loss": 0.014382897317409516, "step": 45410 }, { "epoch": 0.4274352941176471, "grad_norm": 0.6811584652414953, "learning_rate": 3.0598613652262692e-06, "loss": 0.01474834680557251, "step": 45415 }, { "epoch": 0.4274823529411765, "grad_norm": 0.5422683142795829, "learning_rate": 3.0596929365080075e-06, "loss": 0.0195296049118042, "step": 45420 }, { "epoch": 0.4275294117647059, "grad_norm": 0.5155852776307057, "learning_rate": 3.0595245355999363e-06, "loss": 0.017553335428237914, "step": 45425 }, { "epoch": 0.4275764705882353, "grad_norm": 0.5334512675353991, "learning_rate": 3.059356162494406e-06, "loss": 0.021933123469352722, "step": 45430 }, { "epoch": 0.4276235294117647, "grad_norm": 0.6066957419633233, "learning_rate": 3.0591878171837643e-06, "loss": 0.02302469313144684, "step": 45435 }, { "epoch": 0.42767058823529414, "grad_norm": 0.5570220760344886, "learning_rate": 3.059019499660367e-06, "loss": 0.016643762588500977, "step": 45440 }, { "epoch": 0.42771764705882354, "grad_norm": 0.6382071581745445, "learning_rate": 3.0588512099165697e-06, "loss": 0.019204142689704894, "step": 45445 }, { "epoch": 0.42776470588235294, "grad_norm": 0.37581073930024883, "learning_rate": 3.0586829479447324e-06, "loss": 0.015650829672813414, "step": 45450 }, { "epoch": 0.42781176470588234, "grad_norm": 0.4231333375467312, "learning_rate": 3.0585147137372167e-06, "loss": 0.021394968032836914, "step": 45455 }, { "epoch": 0.4278588235294118, "grad_norm": 0.5984603553614026, "learning_rate": 3.0583465072863893e-06, "loss": 0.022117030620574952, "step": 45460 }, { "epoch": 0.4279058823529412, "grad_norm": 0.658318062897854, "learning_rate": 3.0581783285846174e-06, "loss": 0.022796881198883057, "step": 45465 }, { "epoch": 0.4279529411764706, "grad_norm": 0.4495867589444157, "learning_rate": 3.0580101776242715e-06, "loss": 0.020966728031635285, "step": 45470 }, { "epoch": 0.428, "grad_norm": 0.5567840296322354, "learning_rate": 3.0578420543977278e-06, "loss": 0.019306266307830812, "step": 45475 }, { "epoch": 0.4280470588235294, "grad_norm": 0.5702065634538094, "learning_rate": 3.0576739588973613e-06, "loss": 0.020868581533432008, "step": 45480 }, { "epoch": 0.42809411764705885, "grad_norm": 0.9592687428737451, "learning_rate": 3.0575058911155538e-06, "loss": 0.019682124257087708, "step": 45485 }, { "epoch": 0.42814117647058825, "grad_norm": 0.5599060348409745, "learning_rate": 3.057337851044687e-06, "loss": 0.016047185659408568, "step": 45490 }, { "epoch": 0.42818823529411765, "grad_norm": 0.4043183209582694, "learning_rate": 3.0571698386771476e-06, "loss": 0.018557049334049225, "step": 45495 }, { "epoch": 0.42823529411764705, "grad_norm": 0.4757839352381904, "learning_rate": 3.0570018540053243e-06, "loss": 0.017000414431095123, "step": 45500 }, { "epoch": 0.42828235294117645, "grad_norm": 0.49179860375061585, "learning_rate": 3.0568338970216084e-06, "loss": 0.024046576023101805, "step": 45505 }, { "epoch": 0.4283294117647059, "grad_norm": 0.7735084020079572, "learning_rate": 3.0566659677183943e-06, "loss": 0.018096916377544403, "step": 45510 }, { "epoch": 0.4283764705882353, "grad_norm": 0.6189795922219531, "learning_rate": 3.056498066088081e-06, "loss": 0.018835310637950898, "step": 45515 }, { "epoch": 0.4284235294117647, "grad_norm": 0.4443818763668928, "learning_rate": 3.0563301921230676e-06, "loss": 0.019479578733444212, "step": 45520 }, { "epoch": 0.4284705882352941, "grad_norm": 0.3730042916492732, "learning_rate": 3.056162345815758e-06, "loss": 0.01572714149951935, "step": 45525 }, { "epoch": 0.4285176470588235, "grad_norm": 0.7206615974568771, "learning_rate": 3.055994527158559e-06, "loss": 0.02011828273534775, "step": 45530 }, { "epoch": 0.42856470588235296, "grad_norm": 0.718871654454515, "learning_rate": 3.055826736143879e-06, "loss": 0.016658440232276917, "step": 45535 }, { "epoch": 0.42861176470588236, "grad_norm": 0.5798653907893323, "learning_rate": 3.0556589727641314e-06, "loss": 0.015213602781295776, "step": 45540 }, { "epoch": 0.42865882352941176, "grad_norm": 0.433873363386548, "learning_rate": 3.0554912370117302e-06, "loss": 0.018588219583034516, "step": 45545 }, { "epoch": 0.42870588235294116, "grad_norm": 0.7939431291473463, "learning_rate": 3.055323528879094e-06, "loss": 0.02344624400138855, "step": 45550 }, { "epoch": 0.4287529411764706, "grad_norm": 0.5321082211536623, "learning_rate": 3.0551558483586434e-06, "loss": 0.01761908531188965, "step": 45555 }, { "epoch": 0.4288, "grad_norm": 0.49122939001916105, "learning_rate": 3.054988195442803e-06, "loss": 0.016642946004867553, "step": 45560 }, { "epoch": 0.4288470588235294, "grad_norm": 0.46977319268991724, "learning_rate": 3.054820570123998e-06, "loss": 0.02086370587348938, "step": 45565 }, { "epoch": 0.4288941176470588, "grad_norm": 0.6225433015068196, "learning_rate": 3.05465297239466e-06, "loss": 0.01837928593158722, "step": 45570 }, { "epoch": 0.4289411764705882, "grad_norm": 0.6794564313264044, "learning_rate": 3.0544854022472205e-06, "loss": 0.019830669462680816, "step": 45575 }, { "epoch": 0.42898823529411767, "grad_norm": 0.3183019230097288, "learning_rate": 3.0543178596741148e-06, "loss": 0.018959128856658937, "step": 45580 }, { "epoch": 0.42903529411764707, "grad_norm": 0.49282562509638433, "learning_rate": 3.0541503446677815e-06, "loss": 0.017162083089351653, "step": 45585 }, { "epoch": 0.42908235294117647, "grad_norm": 0.47656331479219866, "learning_rate": 3.0539828572206625e-06, "loss": 0.015018971264362335, "step": 45590 }, { "epoch": 0.42912941176470587, "grad_norm": 0.4209711075684494, "learning_rate": 3.053815397325201e-06, "loss": 0.019848909974098206, "step": 45595 }, { "epoch": 0.42917647058823527, "grad_norm": 0.6675557976327093, "learning_rate": 3.053647964973845e-06, "loss": 0.017850257456302643, "step": 45600 }, { "epoch": 0.4292235294117647, "grad_norm": 0.42256929153055345, "learning_rate": 3.053480560159044e-06, "loss": 0.01620439738035202, "step": 45605 }, { "epoch": 0.4292705882352941, "grad_norm": 0.39391159852762375, "learning_rate": 3.053313182873251e-06, "loss": 0.01784195303916931, "step": 45610 }, { "epoch": 0.4293176470588235, "grad_norm": 0.49234183188181707, "learning_rate": 3.053145833108921e-06, "loss": 0.017706418037414552, "step": 45615 }, { "epoch": 0.4293647058823529, "grad_norm": 0.5019146398999443, "learning_rate": 3.052978510858515e-06, "loss": 0.023129728436470032, "step": 45620 }, { "epoch": 0.4294117647058823, "grad_norm": 0.603971178119846, "learning_rate": 3.052811216114491e-06, "loss": 0.01733955442905426, "step": 45625 }, { "epoch": 0.4294588235294118, "grad_norm": 0.33426889523111414, "learning_rate": 3.0526439488693167e-06, "loss": 0.019747784733772276, "step": 45630 }, { "epoch": 0.4295058823529412, "grad_norm": 0.7691446660555674, "learning_rate": 3.052476709115458e-06, "loss": 0.024236467480659486, "step": 45635 }, { "epoch": 0.4295529411764706, "grad_norm": 0.4125040657862359, "learning_rate": 3.052309496845385e-06, "loss": 0.018823722004890443, "step": 45640 }, { "epoch": 0.4296, "grad_norm": 0.44840949970432514, "learning_rate": 3.052142312051572e-06, "loss": 0.020576106011867525, "step": 45645 }, { "epoch": 0.42964705882352944, "grad_norm": 0.4897838566475643, "learning_rate": 3.051975154726493e-06, "loss": 0.019246420264244078, "step": 45650 }, { "epoch": 0.42969411764705884, "grad_norm": 0.5966420677583008, "learning_rate": 3.0518080248626286e-06, "loss": 0.01977360248565674, "step": 45655 }, { "epoch": 0.42974117647058824, "grad_norm": 0.4446013148997002, "learning_rate": 3.05164092245246e-06, "loss": 0.016281022131443022, "step": 45660 }, { "epoch": 0.42978823529411764, "grad_norm": 0.5337671414094017, "learning_rate": 3.051473847488472e-06, "loss": 0.013594548404216766, "step": 45665 }, { "epoch": 0.42983529411764704, "grad_norm": 0.35490389806366957, "learning_rate": 3.051306799963152e-06, "loss": 0.017921897768974304, "step": 45670 }, { "epoch": 0.4298823529411765, "grad_norm": 0.46823872769648056, "learning_rate": 3.05113977986899e-06, "loss": 0.016348424553871154, "step": 45675 }, { "epoch": 0.4299294117647059, "grad_norm": 0.36804737563910417, "learning_rate": 3.0509727871984803e-06, "loss": 0.016992537677288054, "step": 45680 }, { "epoch": 0.4299764705882353, "grad_norm": 0.6206308094170889, "learning_rate": 3.0508058219441185e-06, "loss": 0.019705159962177275, "step": 45685 }, { "epoch": 0.4300235294117647, "grad_norm": 0.48713727772651466, "learning_rate": 3.050638884098403e-06, "loss": 0.016045120358467103, "step": 45690 }, { "epoch": 0.4300705882352941, "grad_norm": 0.4996095233029882, "learning_rate": 3.0504719736538376e-06, "loss": 0.024479743838310242, "step": 45695 }, { "epoch": 0.43011764705882355, "grad_norm": 0.3898375498422633, "learning_rate": 3.0503050906029243e-06, "loss": 0.016514548659324647, "step": 45700 }, { "epoch": 0.43016470588235295, "grad_norm": 0.5588163004596595, "learning_rate": 3.050138234938173e-06, "loss": 0.022998699545860292, "step": 45705 }, { "epoch": 0.43021176470588235, "grad_norm": 0.4921257719889077, "learning_rate": 3.0499714066520937e-06, "loss": 0.017084145545959474, "step": 45710 }, { "epoch": 0.43025882352941175, "grad_norm": 0.38578617608164834, "learning_rate": 3.0498046057371995e-06, "loss": 0.01567872315645218, "step": 45715 }, { "epoch": 0.43030588235294115, "grad_norm": 0.6923822939616936, "learning_rate": 3.0496378321860063e-06, "loss": 0.018049827218055724, "step": 45720 }, { "epoch": 0.4303529411764706, "grad_norm": 0.5939022285284542, "learning_rate": 3.0494710859910343e-06, "loss": 0.015763068199157716, "step": 45725 }, { "epoch": 0.4304, "grad_norm": 0.49638136846674225, "learning_rate": 3.0493043671448046e-06, "loss": 0.02081429958343506, "step": 45730 }, { "epoch": 0.4304470588235294, "grad_norm": 0.6386389756219492, "learning_rate": 3.049137675639843e-06, "loss": 0.015957306325435638, "step": 45735 }, { "epoch": 0.4304941176470588, "grad_norm": 0.42868707545571755, "learning_rate": 3.048971011468676e-06, "loss": 0.014998742938041687, "step": 45740 }, { "epoch": 0.43054117647058826, "grad_norm": 0.8510016551004428, "learning_rate": 3.048804374623834e-06, "loss": 0.025603416562080383, "step": 45745 }, { "epoch": 0.43058823529411766, "grad_norm": 0.5434222510239738, "learning_rate": 3.0486377650978522e-06, "loss": 0.018690289556980134, "step": 45750 }, { "epoch": 0.43063529411764706, "grad_norm": 0.6249926939980854, "learning_rate": 3.048471182883265e-06, "loss": 0.017384400963783263, "step": 45755 }, { "epoch": 0.43068235294117646, "grad_norm": 0.6553069725340082, "learning_rate": 3.0483046279726133e-06, "loss": 0.02128264755010605, "step": 45760 }, { "epoch": 0.43072941176470586, "grad_norm": 0.40622669483485446, "learning_rate": 3.0481381003584375e-06, "loss": 0.018505582213401796, "step": 45765 }, { "epoch": 0.4307764705882353, "grad_norm": 0.7097511493519859, "learning_rate": 3.047971600033284e-06, "loss": 0.01772385835647583, "step": 45770 }, { "epoch": 0.4308235294117647, "grad_norm": 0.47165282069985953, "learning_rate": 3.047805126989698e-06, "loss": 0.018194699287414552, "step": 45775 }, { "epoch": 0.4308705882352941, "grad_norm": 0.7735080597615953, "learning_rate": 3.047638681220233e-06, "loss": 0.0206582173705101, "step": 45780 }, { "epoch": 0.4309176470588235, "grad_norm": 0.43100172982182383, "learning_rate": 3.04747226271744e-06, "loss": 0.021989183127880098, "step": 45785 }, { "epoch": 0.4309647058823529, "grad_norm": 0.7615885853885301, "learning_rate": 3.0473058714738767e-06, "loss": 0.014738443493843078, "step": 45790 }, { "epoch": 0.43101176470588237, "grad_norm": 0.526559008190437, "learning_rate": 3.0471395074821014e-06, "loss": 0.01430516242980957, "step": 45795 }, { "epoch": 0.43105882352941177, "grad_norm": 0.48086799791576135, "learning_rate": 3.0469731707346762e-06, "loss": 0.019825673103332518, "step": 45800 }, { "epoch": 0.43110588235294117, "grad_norm": 0.8646028980752715, "learning_rate": 3.0468068612241664e-06, "loss": 0.019887682795524598, "step": 45805 }, { "epoch": 0.43115294117647057, "grad_norm": 0.7969338377195514, "learning_rate": 3.046640578943139e-06, "loss": 0.027165764570236207, "step": 45810 }, { "epoch": 0.4312, "grad_norm": 0.6754242515207421, "learning_rate": 3.0464743238841644e-06, "loss": 0.026871633529663087, "step": 45815 }, { "epoch": 0.43124705882352943, "grad_norm": 0.5348064476606621, "learning_rate": 3.0463080960398166e-06, "loss": 0.020014627277851103, "step": 45820 }, { "epoch": 0.43129411764705883, "grad_norm": 0.38372542290441936, "learning_rate": 3.0461418954026705e-06, "loss": 0.014744585752487183, "step": 45825 }, { "epoch": 0.43134117647058823, "grad_norm": 0.5766391901072939, "learning_rate": 3.0459757219653063e-06, "loss": 0.023996224999427794, "step": 45830 }, { "epoch": 0.43138823529411763, "grad_norm": 0.4982918345459259, "learning_rate": 3.0458095757203056e-06, "loss": 0.026518818736076356, "step": 45835 }, { "epoch": 0.4314352941176471, "grad_norm": 0.5458777477356586, "learning_rate": 3.045643456660252e-06, "loss": 0.017947319149971008, "step": 45840 }, { "epoch": 0.4314823529411765, "grad_norm": 0.6469297242733743, "learning_rate": 3.045477364777734e-06, "loss": 0.02353675216436386, "step": 45845 }, { "epoch": 0.4315294117647059, "grad_norm": 0.7808272927254699, "learning_rate": 3.045311300065341e-06, "loss": 0.026718750596046448, "step": 45850 }, { "epoch": 0.4315764705882353, "grad_norm": 0.33129606173151566, "learning_rate": 3.045145262515667e-06, "loss": 0.015229122340679168, "step": 45855 }, { "epoch": 0.4316235294117647, "grad_norm": 0.47479901199397345, "learning_rate": 3.044979252121307e-06, "loss": 0.02146364450454712, "step": 45860 }, { "epoch": 0.43167058823529414, "grad_norm": 0.6129175619564415, "learning_rate": 3.0448132688748604e-06, "loss": 0.019870033860206603, "step": 45865 }, { "epoch": 0.43171764705882354, "grad_norm": 0.5592187300375336, "learning_rate": 3.04464731276893e-06, "loss": 0.02252902537584305, "step": 45870 }, { "epoch": 0.43176470588235294, "grad_norm": 0.5130056710676607, "learning_rate": 3.0444813837961167e-06, "loss": 0.016345678269863127, "step": 45875 }, { "epoch": 0.43181176470588234, "grad_norm": 0.5958779781969052, "learning_rate": 3.0443154819490313e-06, "loss": 0.017244914174079896, "step": 45880 }, { "epoch": 0.43185882352941174, "grad_norm": 0.4135533723134313, "learning_rate": 3.0441496072202818e-06, "loss": 0.014331358671188354, "step": 45885 }, { "epoch": 0.4319058823529412, "grad_norm": 0.6693893425730952, "learning_rate": 3.043983759602482e-06, "loss": 0.02207636833190918, "step": 45890 }, { "epoch": 0.4319529411764706, "grad_norm": 0.4204209364685436, "learning_rate": 3.0438179390882472e-06, "loss": 0.021215984225273134, "step": 45895 }, { "epoch": 0.432, "grad_norm": 0.44280307625112364, "learning_rate": 3.0436521456701955e-06, "loss": 0.021074922382831575, "step": 45900 }, { "epoch": 0.4320470588235294, "grad_norm": 0.4935386694594081, "learning_rate": 3.043486379340949e-06, "loss": 0.01659482717514038, "step": 45905 }, { "epoch": 0.43209411764705885, "grad_norm": 0.8202829669997977, "learning_rate": 3.043320640093132e-06, "loss": 0.019418655335903166, "step": 45910 }, { "epoch": 0.43214117647058825, "grad_norm": 0.6907820525156648, "learning_rate": 3.04315492791937e-06, "loss": 0.021323634684085845, "step": 45915 }, { "epoch": 0.43218823529411765, "grad_norm": 0.6558316331951217, "learning_rate": 3.042989242812295e-06, "loss": 0.018880876898765563, "step": 45920 }, { "epoch": 0.43223529411764705, "grad_norm": 0.5424318179513576, "learning_rate": 3.0428235847645365e-06, "loss": 0.01889667809009552, "step": 45925 }, { "epoch": 0.43228235294117645, "grad_norm": 0.7386489064009398, "learning_rate": 3.0426579537687323e-06, "loss": 0.019942180812358858, "step": 45930 }, { "epoch": 0.4323294117647059, "grad_norm": 0.5216277419398856, "learning_rate": 3.0424923498175195e-06, "loss": 0.01782170534133911, "step": 45935 }, { "epoch": 0.4323764705882353, "grad_norm": 0.5203184608296687, "learning_rate": 3.04232677290354e-06, "loss": 0.01553742289543152, "step": 45940 }, { "epoch": 0.4324235294117647, "grad_norm": 0.3940039905967517, "learning_rate": 3.0421612230194368e-06, "loss": 0.01934625655412674, "step": 45945 }, { "epoch": 0.4324705882352941, "grad_norm": 0.5163154956703777, "learning_rate": 3.0419957001578564e-06, "loss": 0.02498241513967514, "step": 45950 }, { "epoch": 0.4325176470588235, "grad_norm": 0.6348770477286378, "learning_rate": 3.041830204311448e-06, "loss": 0.021509578824043273, "step": 45955 }, { "epoch": 0.43256470588235296, "grad_norm": 0.5801242920032686, "learning_rate": 3.041664735472865e-06, "loss": 0.020848619937896728, "step": 45960 }, { "epoch": 0.43261176470588236, "grad_norm": 0.40126171159672885, "learning_rate": 3.0414992936347613e-06, "loss": 0.02150484323501587, "step": 45965 }, { "epoch": 0.43265882352941176, "grad_norm": 0.3909391614734122, "learning_rate": 3.0413338787897946e-06, "loss": 0.02019509971141815, "step": 45970 }, { "epoch": 0.43270588235294116, "grad_norm": 0.6495899170315163, "learning_rate": 3.0411684909306262e-06, "loss": 0.01861507594585419, "step": 45975 }, { "epoch": 0.43275294117647056, "grad_norm": 0.6745307452001059, "learning_rate": 3.041003130049919e-06, "loss": 0.017928844690322875, "step": 45980 }, { "epoch": 0.4328, "grad_norm": 0.6053408317156572, "learning_rate": 3.0408377961403386e-06, "loss": 0.021751239895820618, "step": 45985 }, { "epoch": 0.4328470588235294, "grad_norm": 0.3898936300790177, "learning_rate": 3.0406724891945546e-06, "loss": 0.016234397888183594, "step": 45990 }, { "epoch": 0.4328941176470588, "grad_norm": 0.6880424617180319, "learning_rate": 3.040507209205239e-06, "loss": 0.02260538935661316, "step": 45995 }, { "epoch": 0.4329411764705882, "grad_norm": 0.7138524348203232, "learning_rate": 3.040341956165066e-06, "loss": 0.021901363134384157, "step": 46000 }, { "epoch": 0.4329882352941177, "grad_norm": 0.42384810568374226, "learning_rate": 3.040176730066713e-06, "loss": 0.01764885485172272, "step": 46005 }, { "epoch": 0.4330352941176471, "grad_norm": 0.7451677459360049, "learning_rate": 3.040011530902859e-06, "loss": 0.019834786653518677, "step": 46010 }, { "epoch": 0.4330823529411765, "grad_norm": 0.5599773957791903, "learning_rate": 3.039846358666189e-06, "loss": 0.0209367498755455, "step": 46015 }, { "epoch": 0.4331294117647059, "grad_norm": 0.4669304326961253, "learning_rate": 3.0396812133493865e-06, "loss": 0.018279603123664855, "step": 46020 }, { "epoch": 0.4331764705882353, "grad_norm": 0.6135513667823219, "learning_rate": 3.0395160949451414e-06, "loss": 0.017178289592266083, "step": 46025 }, { "epoch": 0.43322352941176473, "grad_norm": 0.6090427879031557, "learning_rate": 3.0393510034461436e-06, "loss": 0.01859460175037384, "step": 46030 }, { "epoch": 0.43327058823529413, "grad_norm": 0.46017450489203915, "learning_rate": 3.039185938845089e-06, "loss": 0.019767215847969054, "step": 46035 }, { "epoch": 0.43331764705882353, "grad_norm": 0.578861174584341, "learning_rate": 3.0390209011346723e-06, "loss": 0.018433623015880585, "step": 46040 }, { "epoch": 0.43336470588235293, "grad_norm": 0.5696767342063924, "learning_rate": 3.038855890307595e-06, "loss": 0.016245535016059874, "step": 46045 }, { "epoch": 0.43341176470588233, "grad_norm": 0.5561405256579569, "learning_rate": 3.0386909063565577e-06, "loss": 0.018005579710006714, "step": 46050 }, { "epoch": 0.4334588235294118, "grad_norm": 0.4806882916700941, "learning_rate": 3.0385259492742665e-06, "loss": 0.016325145959854126, "step": 46055 }, { "epoch": 0.4335058823529412, "grad_norm": 0.480004546528363, "learning_rate": 3.038361019053429e-06, "loss": 0.02187441736459732, "step": 46060 }, { "epoch": 0.4335529411764706, "grad_norm": 0.7341423783890978, "learning_rate": 3.0381961156867556e-06, "loss": 0.02378038167953491, "step": 46065 }, { "epoch": 0.4336, "grad_norm": 0.566032245985305, "learning_rate": 3.0380312391669602e-06, "loss": 0.02400709092617035, "step": 46070 }, { "epoch": 0.4336470588235294, "grad_norm": 0.4935397946514024, "learning_rate": 3.037866389486758e-06, "loss": 0.015252698957920075, "step": 46075 }, { "epoch": 0.43369411764705884, "grad_norm": 0.39458488273260045, "learning_rate": 3.0377015666388696e-06, "loss": 0.01837814152240753, "step": 46080 }, { "epoch": 0.43374117647058824, "grad_norm": 0.7865504337839947, "learning_rate": 3.037536770616015e-06, "loss": 0.02197318375110626, "step": 46085 }, { "epoch": 0.43378823529411764, "grad_norm": 0.3889012282998445, "learning_rate": 3.0373720014109206e-06, "loss": 0.017584364116191863, "step": 46090 }, { "epoch": 0.43383529411764704, "grad_norm": 0.6700190211420369, "learning_rate": 3.037207259016311e-06, "loss": 0.019106051325798033, "step": 46095 }, { "epoch": 0.4338823529411765, "grad_norm": 0.24562392590228743, "learning_rate": 3.0370425434249185e-06, "loss": 0.017061330378055573, "step": 46100 }, { "epoch": 0.4339294117647059, "grad_norm": 0.7045210789119558, "learning_rate": 3.036877854629475e-06, "loss": 0.01927753984928131, "step": 46105 }, { "epoch": 0.4339764705882353, "grad_norm": 1.205378174291074, "learning_rate": 3.0367131926227163e-06, "loss": 0.020853981375694275, "step": 46110 }, { "epoch": 0.4340235294117647, "grad_norm": 0.5337077688952127, "learning_rate": 3.036548557397379e-06, "loss": 0.017038455605506896, "step": 46115 }, { "epoch": 0.4340705882352941, "grad_norm": 0.3878649323919339, "learning_rate": 3.0363839489462066e-06, "loss": 0.019886742532253265, "step": 46120 }, { "epoch": 0.43411764705882355, "grad_norm": 0.6951597812375474, "learning_rate": 3.036219367261941e-06, "loss": 0.020742124319076537, "step": 46125 }, { "epoch": 0.43416470588235295, "grad_norm": 0.506238591991724, "learning_rate": 3.03605481233733e-06, "loss": 0.01810859441757202, "step": 46130 }, { "epoch": 0.43421176470588235, "grad_norm": 0.3776702061322498, "learning_rate": 3.0358902841651226e-06, "loss": 0.016819503903388978, "step": 46135 }, { "epoch": 0.43425882352941175, "grad_norm": 0.6524368953169946, "learning_rate": 3.0357257827380703e-06, "loss": 0.019165627658367157, "step": 46140 }, { "epoch": 0.43430588235294115, "grad_norm": 0.46935863984982606, "learning_rate": 3.035561308048928e-06, "loss": 0.01870855540037155, "step": 46145 }, { "epoch": 0.4343529411764706, "grad_norm": 0.4511798822626293, "learning_rate": 3.035396860090453e-06, "loss": 0.015834857523441315, "step": 46150 }, { "epoch": 0.4344, "grad_norm": 0.6155408982219482, "learning_rate": 3.035232438855407e-06, "loss": 0.018348929286003113, "step": 46155 }, { "epoch": 0.4344470588235294, "grad_norm": 0.4731247151214322, "learning_rate": 3.035068044336551e-06, "loss": 0.017192316055297852, "step": 46160 }, { "epoch": 0.4344941176470588, "grad_norm": 0.6324091962830203, "learning_rate": 3.0349036765266526e-06, "loss": 0.01611742228269577, "step": 46165 }, { "epoch": 0.4345411764705882, "grad_norm": 0.35118361602425824, "learning_rate": 3.034739335418479e-06, "loss": 0.019755396246910095, "step": 46170 }, { "epoch": 0.43458823529411766, "grad_norm": 0.7216728214925044, "learning_rate": 3.034575021004802e-06, "loss": 0.020758169889450073, "step": 46175 }, { "epoch": 0.43463529411764706, "grad_norm": 0.7166901927401551, "learning_rate": 3.0344107332783956e-06, "loss": 0.01749923825263977, "step": 46180 }, { "epoch": 0.43468235294117646, "grad_norm": 0.6112884701615086, "learning_rate": 3.0342464722320363e-06, "loss": 0.019403135776519774, "step": 46185 }, { "epoch": 0.43472941176470586, "grad_norm": 0.6372552275051886, "learning_rate": 3.034082237858503e-06, "loss": 0.01912866234779358, "step": 46190 }, { "epoch": 0.4347764705882353, "grad_norm": 0.49263654395054624, "learning_rate": 3.03391803015058e-06, "loss": 0.02374087870121002, "step": 46195 }, { "epoch": 0.4348235294117647, "grad_norm": 0.5366957256404323, "learning_rate": 3.033753849101049e-06, "loss": 0.02154722809791565, "step": 46200 }, { "epoch": 0.4348705882352941, "grad_norm": 0.4686555319771251, "learning_rate": 3.0335896947027012e-06, "loss": 0.016967105865478515, "step": 46205 }, { "epoch": 0.4349176470588235, "grad_norm": 0.9722468655864628, "learning_rate": 3.0334255669483244e-06, "loss": 0.01693192571401596, "step": 46210 }, { "epoch": 0.4349647058823529, "grad_norm": 0.608732158178739, "learning_rate": 3.033261465830712e-06, "loss": 0.021718531847000122, "step": 46215 }, { "epoch": 0.4350117647058824, "grad_norm": 0.4708822452492049, "learning_rate": 3.033097391342661e-06, "loss": 0.019015392661094664, "step": 46220 }, { "epoch": 0.4350588235294118, "grad_norm": 0.512732870586244, "learning_rate": 3.032933343476969e-06, "loss": 0.021800732612609862, "step": 46225 }, { "epoch": 0.4351058823529412, "grad_norm": 0.5180200130384206, "learning_rate": 3.032769322226438e-06, "loss": 0.015540966391563415, "step": 46230 }, { "epoch": 0.4351529411764706, "grad_norm": 0.5531582908686465, "learning_rate": 3.032605327583871e-06, "loss": 0.021354210376739503, "step": 46235 }, { "epoch": 0.4352, "grad_norm": 0.58999756029918, "learning_rate": 3.032441359542077e-06, "loss": 0.017404389381408692, "step": 46240 }, { "epoch": 0.43524705882352943, "grad_norm": 0.5323749038430231, "learning_rate": 3.032277418093863e-06, "loss": 0.019296690821647644, "step": 46245 }, { "epoch": 0.43529411764705883, "grad_norm": 0.7396221949397892, "learning_rate": 3.032113503232042e-06, "loss": 0.019698673486709596, "step": 46250 }, { "epoch": 0.43534117647058823, "grad_norm": 0.40361237134542055, "learning_rate": 3.0319496149494294e-06, "loss": 0.022251787781715392, "step": 46255 }, { "epoch": 0.43538823529411763, "grad_norm": 0.52767035508066, "learning_rate": 3.0317857532388423e-06, "loss": 0.02086254358291626, "step": 46260 }, { "epoch": 0.43543529411764703, "grad_norm": 0.4102066511403038, "learning_rate": 3.031621918093101e-06, "loss": 0.024186575412750246, "step": 46265 }, { "epoch": 0.4354823529411765, "grad_norm": 0.3628449733357714, "learning_rate": 3.0314581095050295e-06, "loss": 0.023110049962997436, "step": 46270 }, { "epoch": 0.4355294117647059, "grad_norm": 0.5358179091130459, "learning_rate": 3.031294327467452e-06, "loss": 0.01766481399536133, "step": 46275 }, { "epoch": 0.4355764705882353, "grad_norm": 0.5436644854405832, "learning_rate": 3.0311305719731995e-06, "loss": 0.014981681108474731, "step": 46280 }, { "epoch": 0.4356235294117647, "grad_norm": 0.464876130208648, "learning_rate": 3.0309668430150996e-06, "loss": 0.016846466064453124, "step": 46285 }, { "epoch": 0.43567058823529414, "grad_norm": 0.6165470362113016, "learning_rate": 3.030803140585989e-06, "loss": 0.01993671655654907, "step": 46290 }, { "epoch": 0.43571764705882354, "grad_norm": 0.7034058538645073, "learning_rate": 3.0306394646787036e-06, "loss": 0.02085375338792801, "step": 46295 }, { "epoch": 0.43576470588235294, "grad_norm": 0.5892318348027871, "learning_rate": 3.0304758152860825e-06, "loss": 0.020292621850967408, "step": 46300 }, { "epoch": 0.43581176470588234, "grad_norm": 0.5273811355387857, "learning_rate": 3.0303121924009682e-06, "loss": 0.01654568612575531, "step": 46305 }, { "epoch": 0.43585882352941174, "grad_norm": 0.3282969526945249, "learning_rate": 3.0301485960162047e-06, "loss": 0.0162925124168396, "step": 46310 }, { "epoch": 0.4359058823529412, "grad_norm": 0.4725216637955321, "learning_rate": 3.02998502612464e-06, "loss": 0.016379067301750184, "step": 46315 }, { "epoch": 0.4359529411764706, "grad_norm": 0.45460052519534316, "learning_rate": 3.029821482719124e-06, "loss": 0.019781824946403504, "step": 46320 }, { "epoch": 0.436, "grad_norm": 0.334386721419621, "learning_rate": 3.02965796579251e-06, "loss": 0.016480150818824767, "step": 46325 }, { "epoch": 0.4360470588235294, "grad_norm": 0.4957410831136297, "learning_rate": 3.029494475337653e-06, "loss": 0.019830083847045897, "step": 46330 }, { "epoch": 0.4360941176470588, "grad_norm": 0.6357709127263934, "learning_rate": 3.029331011347412e-06, "loss": 0.01752924621105194, "step": 46335 }, { "epoch": 0.43614117647058825, "grad_norm": 0.4398815022009189, "learning_rate": 3.029167573814647e-06, "loss": 0.01895226240158081, "step": 46340 }, { "epoch": 0.43618823529411765, "grad_norm": 0.392720991472242, "learning_rate": 3.029004162732222e-06, "loss": 0.01580394208431244, "step": 46345 }, { "epoch": 0.43623529411764705, "grad_norm": 0.4423791690718075, "learning_rate": 3.0288407780930034e-06, "loss": 0.01801467090845108, "step": 46350 }, { "epoch": 0.43628235294117645, "grad_norm": 0.786865722641425, "learning_rate": 3.028677419889861e-06, "loss": 0.017903360724449157, "step": 46355 }, { "epoch": 0.4363294117647059, "grad_norm": 0.5976579343428144, "learning_rate": 3.0285140881156642e-06, "loss": 0.01942479908466339, "step": 46360 }, { "epoch": 0.4363764705882353, "grad_norm": 0.6022413249249475, "learning_rate": 3.0283507827632903e-06, "loss": 0.020751753449440004, "step": 46365 }, { "epoch": 0.4364235294117647, "grad_norm": 0.4551640041295769, "learning_rate": 3.0281875038256146e-06, "loss": 0.016902899742126463, "step": 46370 }, { "epoch": 0.4364705882352941, "grad_norm": 0.5896421981613575, "learning_rate": 3.0280242512955176e-06, "loss": 0.015075954794883727, "step": 46375 }, { "epoch": 0.4365176470588235, "grad_norm": 1.1440789639576807, "learning_rate": 3.0278610251658815e-06, "loss": 0.019104552268981934, "step": 46380 }, { "epoch": 0.43656470588235297, "grad_norm": 0.4060248284474695, "learning_rate": 3.027697825429591e-06, "loss": 0.015644656121730806, "step": 46385 }, { "epoch": 0.43661176470588237, "grad_norm": 0.38810934008955944, "learning_rate": 3.0275346520795353e-06, "loss": 0.017288410663604738, "step": 46390 }, { "epoch": 0.43665882352941177, "grad_norm": 0.6212596738169309, "learning_rate": 3.0273715051086026e-06, "loss": 0.017261016368865966, "step": 46395 }, { "epoch": 0.43670588235294117, "grad_norm": 0.5267299249646519, "learning_rate": 3.0272083845096893e-06, "loss": 0.023658041656017304, "step": 46400 }, { "epoch": 0.43675294117647057, "grad_norm": 0.6290669672340519, "learning_rate": 3.0270452902756886e-06, "loss": 0.0197933167219162, "step": 46405 }, { "epoch": 0.4368, "grad_norm": 0.4850402673403759, "learning_rate": 3.0268822223995004e-06, "loss": 0.016975146532058717, "step": 46410 }, { "epoch": 0.4368470588235294, "grad_norm": 0.6497837225876859, "learning_rate": 3.0267191808740252e-06, "loss": 0.0201400026679039, "step": 46415 }, { "epoch": 0.4368941176470588, "grad_norm": 0.4935225019777918, "learning_rate": 3.026556165692168e-06, "loss": 0.01812577247619629, "step": 46420 }, { "epoch": 0.4369411764705882, "grad_norm": 0.4347804472026776, "learning_rate": 3.0263931768468336e-06, "loss": 0.019615474343299865, "step": 46425 }, { "epoch": 0.4369882352941176, "grad_norm": 0.6517392663662238, "learning_rate": 3.026230214330933e-06, "loss": 0.027790915966033936, "step": 46430 }, { "epoch": 0.4370352941176471, "grad_norm": 0.8050834791543394, "learning_rate": 3.026067278137378e-06, "loss": 0.025942215323448183, "step": 46435 }, { "epoch": 0.4370823529411765, "grad_norm": 0.5381679430161932, "learning_rate": 3.0259043682590823e-06, "loss": 0.02330731153488159, "step": 46440 }, { "epoch": 0.4371294117647059, "grad_norm": 0.6299716460311657, "learning_rate": 3.0257414846889633e-06, "loss": 0.020539151132106782, "step": 46445 }, { "epoch": 0.4371764705882353, "grad_norm": 0.365619180371479, "learning_rate": 3.0255786274199415e-06, "loss": 0.014011237025260925, "step": 46450 }, { "epoch": 0.43722352941176473, "grad_norm": 0.7017284123554809, "learning_rate": 3.02541579644494e-06, "loss": 0.018846657872200013, "step": 46455 }, { "epoch": 0.43727058823529413, "grad_norm": 0.7256818287757975, "learning_rate": 3.0252529917568827e-06, "loss": 0.022659546136856078, "step": 46460 }, { "epoch": 0.43731764705882353, "grad_norm": 0.44158846464915175, "learning_rate": 3.025090213348698e-06, "loss": 0.026551532745361327, "step": 46465 }, { "epoch": 0.43736470588235293, "grad_norm": 0.4372255087658093, "learning_rate": 3.0249274612133172e-06, "loss": 0.015593907237052918, "step": 46470 }, { "epoch": 0.43741176470588233, "grad_norm": 0.6564799073029829, "learning_rate": 3.0247647353436734e-06, "loss": 0.016810691356658934, "step": 46475 }, { "epoch": 0.4374588235294118, "grad_norm": 0.5935570494741864, "learning_rate": 3.0246020357327022e-06, "loss": 0.024322253465652467, "step": 46480 }, { "epoch": 0.4375058823529412, "grad_norm": 0.8034527273663581, "learning_rate": 3.024439362373342e-06, "loss": 0.020564016699790955, "step": 46485 }, { "epoch": 0.4375529411764706, "grad_norm": 0.3521361467473651, "learning_rate": 3.024276715258534e-06, "loss": 0.019082808494567872, "step": 46490 }, { "epoch": 0.4376, "grad_norm": 0.7205424152997596, "learning_rate": 3.024114094381223e-06, "loss": 0.01952294409275055, "step": 46495 }, { "epoch": 0.4376470588235294, "grad_norm": 0.5448880861059912, "learning_rate": 3.0239514997343544e-06, "loss": 0.0235928475856781, "step": 46500 }, { "epoch": 0.43769411764705884, "grad_norm": 0.5643234894637955, "learning_rate": 3.023788931310879e-06, "loss": 0.01768123358488083, "step": 46505 }, { "epoch": 0.43774117647058824, "grad_norm": 0.6171617361290797, "learning_rate": 3.023626389103747e-06, "loss": 0.019274935126304626, "step": 46510 }, { "epoch": 0.43778823529411764, "grad_norm": 0.37460733296232973, "learning_rate": 3.0234638731059135e-06, "loss": 0.01865222454071045, "step": 46515 }, { "epoch": 0.43783529411764704, "grad_norm": 0.4543474974154355, "learning_rate": 3.023301383310336e-06, "loss": 0.016774210333824157, "step": 46520 }, { "epoch": 0.43788235294117644, "grad_norm": 0.42964532594391625, "learning_rate": 3.0231389197099743e-06, "loss": 0.019574442505836488, "step": 46525 }, { "epoch": 0.4379294117647059, "grad_norm": 0.4905148374535941, "learning_rate": 3.0229764822977902e-06, "loss": 0.01674961894750595, "step": 46530 }, { "epoch": 0.4379764705882353, "grad_norm": 0.40983893310202646, "learning_rate": 3.0228140710667496e-06, "loss": 0.016684381663799285, "step": 46535 }, { "epoch": 0.4380235294117647, "grad_norm": 0.3209357764354984, "learning_rate": 3.0226516860098202e-06, "loss": 0.012726123631000518, "step": 46540 }, { "epoch": 0.4380705882352941, "grad_norm": 0.5056852776417193, "learning_rate": 3.0224893271199723e-06, "loss": 0.019171842932701112, "step": 46545 }, { "epoch": 0.43811764705882356, "grad_norm": 0.5731886332261997, "learning_rate": 3.022326994390179e-06, "loss": 0.015878826379776, "step": 46550 }, { "epoch": 0.43816470588235296, "grad_norm": 0.5756982124662292, "learning_rate": 3.0221646878134154e-06, "loss": 0.023146525025367737, "step": 46555 }, { "epoch": 0.43821176470588236, "grad_norm": 0.46335485552504113, "learning_rate": 3.0220024073826602e-06, "loss": 0.01753949373960495, "step": 46560 }, { "epoch": 0.43825882352941176, "grad_norm": 0.5406731400053346, "learning_rate": 3.0218401530908946e-06, "loss": 0.013573405146598817, "step": 46565 }, { "epoch": 0.43830588235294116, "grad_norm": 0.467001973734131, "learning_rate": 3.021677924931103e-06, "loss": 0.016501984000205992, "step": 46570 }, { "epoch": 0.4383529411764706, "grad_norm": 0.3893573504581273, "learning_rate": 3.02151572289627e-06, "loss": 0.0183506041765213, "step": 46575 }, { "epoch": 0.4384, "grad_norm": 0.5704586518260536, "learning_rate": 3.0213535469793855e-06, "loss": 0.02199641764163971, "step": 46580 }, { "epoch": 0.4384470588235294, "grad_norm": 0.9372089104914464, "learning_rate": 3.0211913971734397e-06, "loss": 0.024500489234924316, "step": 46585 }, { "epoch": 0.4384941176470588, "grad_norm": 0.579912859856434, "learning_rate": 3.0210292734714286e-06, "loss": 0.021367521584033967, "step": 46590 }, { "epoch": 0.4385411764705882, "grad_norm": 0.8124390957517161, "learning_rate": 3.020867175866348e-06, "loss": 0.01869719326496124, "step": 46595 }, { "epoch": 0.43858823529411767, "grad_norm": 0.4240703803447635, "learning_rate": 3.0207051043511977e-06, "loss": 0.019234907627105714, "step": 46600 }, { "epoch": 0.43863529411764707, "grad_norm": 0.8307599878928342, "learning_rate": 3.0205430589189793e-06, "loss": 0.028111612796783446, "step": 46605 }, { "epoch": 0.43868235294117647, "grad_norm": 0.4759185473412998, "learning_rate": 3.0203810395626974e-06, "loss": 0.024042731523513793, "step": 46610 }, { "epoch": 0.43872941176470587, "grad_norm": 0.5984315922904971, "learning_rate": 3.0202190462753604e-06, "loss": 0.018225717544555663, "step": 46615 }, { "epoch": 0.43877647058823527, "grad_norm": 0.5575400990486165, "learning_rate": 3.0200570790499774e-06, "loss": 0.013372202217578889, "step": 46620 }, { "epoch": 0.4388235294117647, "grad_norm": 0.6319724096105171, "learning_rate": 3.0198951378795604e-06, "loss": 0.023589694499969484, "step": 46625 }, { "epoch": 0.4388705882352941, "grad_norm": 0.5797221957592603, "learning_rate": 3.0197332227571247e-06, "loss": 0.021998032927513123, "step": 46630 }, { "epoch": 0.4389176470588235, "grad_norm": 0.8956100003786842, "learning_rate": 3.019571333675689e-06, "loss": 0.016946855187416076, "step": 46635 }, { "epoch": 0.4389647058823529, "grad_norm": 0.4665878711317061, "learning_rate": 3.0194094706282732e-06, "loss": 0.017422589659690856, "step": 46640 }, { "epoch": 0.4390117647058824, "grad_norm": 0.6648031566184637, "learning_rate": 3.0192476336079013e-06, "loss": 0.023498845100402833, "step": 46645 }, { "epoch": 0.4390588235294118, "grad_norm": 0.5116875679068168, "learning_rate": 3.019085822607597e-06, "loss": 0.012937264144420623, "step": 46650 }, { "epoch": 0.4391058823529412, "grad_norm": 0.4780993054065091, "learning_rate": 3.0189240376203892e-06, "loss": 0.016048938035964966, "step": 46655 }, { "epoch": 0.4391529411764706, "grad_norm": 0.3971810137667673, "learning_rate": 3.01876227863931e-06, "loss": 0.014026510715484618, "step": 46660 }, { "epoch": 0.4392, "grad_norm": 0.4402797293188203, "learning_rate": 3.0186005456573915e-06, "loss": 0.014837396144866944, "step": 46665 }, { "epoch": 0.43924705882352943, "grad_norm": 0.6320698035605496, "learning_rate": 3.0184388386676706e-06, "loss": 0.022638076543807985, "step": 46670 }, { "epoch": 0.43929411764705883, "grad_norm": 0.5696446353657557, "learning_rate": 3.0182771576631853e-06, "loss": 0.020317891240119935, "step": 46675 }, { "epoch": 0.43934117647058823, "grad_norm": 0.2786810113122501, "learning_rate": 3.0181155026369773e-06, "loss": 0.018517330288887024, "step": 46680 }, { "epoch": 0.43938823529411764, "grad_norm": 0.5124888849464191, "learning_rate": 3.017953873582091e-06, "loss": 0.016674768924713135, "step": 46685 }, { "epoch": 0.43943529411764704, "grad_norm": 0.4674186557761458, "learning_rate": 3.0177922704915724e-06, "loss": 0.017632679641246797, "step": 46690 }, { "epoch": 0.4394823529411765, "grad_norm": 0.5430551683570517, "learning_rate": 3.0176306933584702e-06, "loss": 0.01819922626018524, "step": 46695 }, { "epoch": 0.4395294117647059, "grad_norm": 0.6529364085260465, "learning_rate": 3.0174691421758377e-06, "loss": 0.02192317247390747, "step": 46700 }, { "epoch": 0.4395764705882353, "grad_norm": 0.7213082189741339, "learning_rate": 3.017307616936727e-06, "loss": 0.01643683612346649, "step": 46705 }, { "epoch": 0.4396235294117647, "grad_norm": 0.46926920319018844, "learning_rate": 3.0171461176341975e-06, "loss": 0.02106413245201111, "step": 46710 }, { "epoch": 0.4396705882352941, "grad_norm": 0.9783884836677094, "learning_rate": 3.016984644261307e-06, "loss": 0.02415185570716858, "step": 46715 }, { "epoch": 0.43971764705882355, "grad_norm": 0.6494335671613303, "learning_rate": 3.0168231968111183e-06, "loss": 0.022870945930480956, "step": 46720 }, { "epoch": 0.43976470588235295, "grad_norm": 0.6463135403577117, "learning_rate": 3.016661775276696e-06, "loss": 0.022945576906204225, "step": 46725 }, { "epoch": 0.43981176470588235, "grad_norm": 0.25995294574590816, "learning_rate": 3.016500379651108e-06, "loss": 0.015714564919471742, "step": 46730 }, { "epoch": 0.43985882352941175, "grad_norm": 0.5905014269579858, "learning_rate": 3.016339009927423e-06, "loss": 0.022704926133155823, "step": 46735 }, { "epoch": 0.4399058823529412, "grad_norm": 0.933134083004328, "learning_rate": 3.016177666098715e-06, "loss": 0.018367899954319, "step": 46740 }, { "epoch": 0.4399529411764706, "grad_norm": 0.5604869429452511, "learning_rate": 3.016016348158058e-06, "loss": 0.01670689582824707, "step": 46745 }, { "epoch": 0.44, "grad_norm": 0.49803437935567874, "learning_rate": 3.0158550560985305e-06, "loss": 0.01961713582277298, "step": 46750 }, { "epoch": 0.4400470588235294, "grad_norm": 0.6215615019502241, "learning_rate": 3.015693789913212e-06, "loss": 0.02405277043581009, "step": 46755 }, { "epoch": 0.4400941176470588, "grad_norm": 0.46608864629747726, "learning_rate": 3.015532549595186e-06, "loss": 0.017220622301101683, "step": 46760 }, { "epoch": 0.44014117647058826, "grad_norm": 0.5442941776307936, "learning_rate": 3.0153713351375385e-06, "loss": 0.016330817341804506, "step": 46765 }, { "epoch": 0.44018823529411766, "grad_norm": 0.5956038306666115, "learning_rate": 3.0152101465333564e-06, "loss": 0.02050510048866272, "step": 46770 }, { "epoch": 0.44023529411764706, "grad_norm": 0.7561918165088048, "learning_rate": 3.015048983775731e-06, "loss": 0.022567299008369446, "step": 46775 }, { "epoch": 0.44028235294117646, "grad_norm": 0.40980469768303057, "learning_rate": 3.014887846857756e-06, "loss": 0.01928100436925888, "step": 46780 }, { "epoch": 0.44032941176470586, "grad_norm": 0.3770127425933107, "learning_rate": 3.0147267357725256e-06, "loss": 0.016193398833274843, "step": 46785 }, { "epoch": 0.4403764705882353, "grad_norm": 0.5325649965238901, "learning_rate": 3.0145656505131406e-06, "loss": 0.016372594237327575, "step": 46790 }, { "epoch": 0.4404235294117647, "grad_norm": 0.5705871657702637, "learning_rate": 3.0144045910727005e-06, "loss": 0.022345203161239623, "step": 46795 }, { "epoch": 0.4404705882352941, "grad_norm": 0.6040583006998005, "learning_rate": 3.0142435574443092e-06, "loss": 0.01736484169960022, "step": 46800 }, { "epoch": 0.4405176470588235, "grad_norm": 0.4920520791258662, "learning_rate": 3.0140825496210723e-06, "loss": 0.016585272550582886, "step": 46805 }, { "epoch": 0.4405647058823529, "grad_norm": 0.5776914537525961, "learning_rate": 3.013921567596099e-06, "loss": 0.02243647277355194, "step": 46810 }, { "epoch": 0.44061176470588237, "grad_norm": 0.4905003249926997, "learning_rate": 3.013760611362502e-06, "loss": 0.018956781923770906, "step": 46815 }, { "epoch": 0.44065882352941177, "grad_norm": 0.5016063303433779, "learning_rate": 3.013599680913392e-06, "loss": 0.015107381343841552, "step": 46820 }, { "epoch": 0.44070588235294117, "grad_norm": 0.7800139434607763, "learning_rate": 3.0134387762418888e-06, "loss": 0.019707761704921722, "step": 46825 }, { "epoch": 0.44075294117647057, "grad_norm": 0.5682691824783984, "learning_rate": 3.0132778973411093e-06, "loss": 0.015639349818229675, "step": 46830 }, { "epoch": 0.4408, "grad_norm": 0.5926737228990714, "learning_rate": 3.0131170442041756e-06, "loss": 0.015128949284553527, "step": 46835 }, { "epoch": 0.4408470588235294, "grad_norm": 0.7072039216656167, "learning_rate": 3.012956216824212e-06, "loss": 0.018734964728355407, "step": 46840 }, { "epoch": 0.4408941176470588, "grad_norm": 0.7726408777762663, "learning_rate": 3.012795415194346e-06, "loss": 0.01803765296936035, "step": 46845 }, { "epoch": 0.4409411764705882, "grad_norm": 0.4830471965868001, "learning_rate": 3.012634639307706e-06, "loss": 0.017678654193878172, "step": 46850 }, { "epoch": 0.4409882352941176, "grad_norm": 0.6474888166327851, "learning_rate": 3.012473889157424e-06, "loss": 0.025581377744674682, "step": 46855 }, { "epoch": 0.4410352941176471, "grad_norm": 0.695120394530761, "learning_rate": 3.0123131647366343e-06, "loss": 0.0176015704870224, "step": 46860 }, { "epoch": 0.4410823529411765, "grad_norm": 0.6019365966586259, "learning_rate": 3.012152466038474e-06, "loss": 0.022738023102283476, "step": 46865 }, { "epoch": 0.4411294117647059, "grad_norm": 0.4368095431038198, "learning_rate": 3.0119917930560832e-06, "loss": 0.021119070053100587, "step": 46870 }, { "epoch": 0.4411764705882353, "grad_norm": 0.5054522284167249, "learning_rate": 3.0118311457826033e-06, "loss": 0.01941361129283905, "step": 46875 }, { "epoch": 0.4412235294117647, "grad_norm": 0.7484384596869108, "learning_rate": 3.01167052421118e-06, "loss": 0.018054869771003724, "step": 46880 }, { "epoch": 0.44127058823529414, "grad_norm": 0.9151151129581695, "learning_rate": 3.011509928334959e-06, "loss": 0.02191929519176483, "step": 46885 }, { "epoch": 0.44131764705882354, "grad_norm": 0.47862399917477066, "learning_rate": 3.0113493581470915e-06, "loss": 0.017390862107276917, "step": 46890 }, { "epoch": 0.44136470588235294, "grad_norm": 0.49577163176690137, "learning_rate": 3.0111888136407292e-06, "loss": 0.019487738609313965, "step": 46895 }, { "epoch": 0.44141176470588234, "grad_norm": 0.4284795786568615, "learning_rate": 3.0110282948090274e-06, "loss": 0.015880221128463747, "step": 46900 }, { "epoch": 0.4414588235294118, "grad_norm": 0.556951797439945, "learning_rate": 3.0108678016451433e-06, "loss": 0.020507782697677612, "step": 46905 }, { "epoch": 0.4415058823529412, "grad_norm": 0.35773416238804734, "learning_rate": 3.0107073341422367e-06, "loss": 0.01734783351421356, "step": 46910 }, { "epoch": 0.4415529411764706, "grad_norm": 0.5608212096868825, "learning_rate": 3.0105468922934707e-06, "loss": 0.018121051788330077, "step": 46915 }, { "epoch": 0.4416, "grad_norm": 0.519891730387389, "learning_rate": 3.0103864760920103e-06, "loss": 0.01826901137828827, "step": 46920 }, { "epoch": 0.4416470588235294, "grad_norm": 0.31297687264765733, "learning_rate": 3.010226085531023e-06, "loss": 0.019820740818977355, "step": 46925 }, { "epoch": 0.44169411764705885, "grad_norm": 0.5976982097950474, "learning_rate": 3.0100657206036786e-06, "loss": 0.016411978006362914, "step": 46930 }, { "epoch": 0.44174117647058825, "grad_norm": 0.4030371701137701, "learning_rate": 3.009905381303151e-06, "loss": 0.019703149795532227, "step": 46935 }, { "epoch": 0.44178823529411765, "grad_norm": 0.604296568146163, "learning_rate": 3.0097450676226144e-06, "loss": 0.016018347442150117, "step": 46940 }, { "epoch": 0.44183529411764705, "grad_norm": 0.5108042940212555, "learning_rate": 3.0095847795552474e-06, "loss": 0.013564322888851166, "step": 46945 }, { "epoch": 0.44188235294117645, "grad_norm": 0.5349850357477638, "learning_rate": 3.0094245170942305e-06, "loss": 0.02696489095687866, "step": 46950 }, { "epoch": 0.4419294117647059, "grad_norm": 0.3910579013504491, "learning_rate": 3.009264280232746e-06, "loss": 0.014738065004348756, "step": 46955 }, { "epoch": 0.4419764705882353, "grad_norm": 0.5022595207291606, "learning_rate": 3.0091040689639795e-06, "loss": 0.02976558208465576, "step": 46960 }, { "epoch": 0.4420235294117647, "grad_norm": 0.5516844495305311, "learning_rate": 3.00894388328112e-06, "loss": 0.019838543236255647, "step": 46965 }, { "epoch": 0.4420705882352941, "grad_norm": 0.418223547153923, "learning_rate": 3.008783723177356e-06, "loss": 0.01793481260538101, "step": 46970 }, { "epoch": 0.4421176470588235, "grad_norm": 0.7220895376506046, "learning_rate": 3.0086235886458824e-06, "loss": 0.021812546253204345, "step": 46975 }, { "epoch": 0.44216470588235296, "grad_norm": 0.5974868118650533, "learning_rate": 3.0084634796798946e-06, "loss": 0.017190787196159362, "step": 46980 }, { "epoch": 0.44221176470588236, "grad_norm": 0.4789121363231913, "learning_rate": 3.00830339627259e-06, "loss": 0.014229904115200042, "step": 46985 }, { "epoch": 0.44225882352941176, "grad_norm": 0.43277964981292205, "learning_rate": 3.0081433384171704e-06, "loss": 0.016027608513832094, "step": 46990 }, { "epoch": 0.44230588235294116, "grad_norm": 0.5866733099975161, "learning_rate": 3.0079833061068384e-06, "loss": 0.01738477945327759, "step": 46995 }, { "epoch": 0.4423529411764706, "grad_norm": 0.6282835977757475, "learning_rate": 3.0078232993347992e-06, "loss": 0.019215717911720276, "step": 47000 }, { "epoch": 0.4424, "grad_norm": 0.6463945080334484, "learning_rate": 3.0076633180942622e-06, "loss": 0.02007670998573303, "step": 47005 }, { "epoch": 0.4424470588235294, "grad_norm": 0.6135562649796391, "learning_rate": 3.007503362378438e-06, "loss": 0.020509856939315795, "step": 47010 }, { "epoch": 0.4424941176470588, "grad_norm": 0.6863767130532131, "learning_rate": 3.0073434321805396e-06, "loss": 0.020351925492286684, "step": 47015 }, { "epoch": 0.4425411764705882, "grad_norm": 0.6348503624432019, "learning_rate": 3.0071835274937824e-06, "loss": 0.017653998732566834, "step": 47020 }, { "epoch": 0.44258823529411767, "grad_norm": 0.7323229147055534, "learning_rate": 3.007023648311385e-06, "loss": 0.020717087388038635, "step": 47025 }, { "epoch": 0.44263529411764707, "grad_norm": 0.2936830392317325, "learning_rate": 3.00686379462657e-06, "loss": 0.017173659801483155, "step": 47030 }, { "epoch": 0.44268235294117647, "grad_norm": 0.4116216393881146, "learning_rate": 3.0067039664325587e-06, "loss": 0.015269637107849121, "step": 47035 }, { "epoch": 0.44272941176470587, "grad_norm": 0.5229443762508178, "learning_rate": 3.0065441637225777e-06, "loss": 0.02339009791612625, "step": 47040 }, { "epoch": 0.44277647058823527, "grad_norm": 0.49559076467075375, "learning_rate": 3.006384386489856e-06, "loss": 0.01508568525314331, "step": 47045 }, { "epoch": 0.4428235294117647, "grad_norm": 0.5644113977451384, "learning_rate": 3.0062246347276253e-06, "loss": 0.017274850606918336, "step": 47050 }, { "epoch": 0.4428705882352941, "grad_norm": 0.5508516882180085, "learning_rate": 3.0060649084291167e-06, "loss": 0.021932160854339598, "step": 47055 }, { "epoch": 0.4429176470588235, "grad_norm": 0.6636896375625562, "learning_rate": 3.0059052075875684e-06, "loss": 0.023274558782577514, "step": 47060 }, { "epoch": 0.4429647058823529, "grad_norm": 0.36175854485990083, "learning_rate": 3.005745532196218e-06, "loss": 0.01672725975513458, "step": 47065 }, { "epoch": 0.4430117647058823, "grad_norm": 0.3596217784613284, "learning_rate": 3.0055858822483068e-06, "loss": 0.021666276454925536, "step": 47070 }, { "epoch": 0.4430588235294118, "grad_norm": 0.44731426960213616, "learning_rate": 3.0054262577370787e-06, "loss": 0.015974685549736023, "step": 47075 }, { "epoch": 0.4431058823529412, "grad_norm": 0.6400411203651156, "learning_rate": 3.0052666586557795e-06, "loss": 0.020744794607162477, "step": 47080 }, { "epoch": 0.4431529411764706, "grad_norm": 0.6779604809176786, "learning_rate": 3.0051070849976575e-06, "loss": 0.018657350540161134, "step": 47085 }, { "epoch": 0.4432, "grad_norm": 0.5034276251855491, "learning_rate": 3.0049475367559645e-06, "loss": 0.016967129707336426, "step": 47090 }, { "epoch": 0.44324705882352944, "grad_norm": 0.5207218319368058, "learning_rate": 3.0047880139239537e-06, "loss": 0.015736132860183716, "step": 47095 }, { "epoch": 0.44329411764705884, "grad_norm": 0.6226657337410978, "learning_rate": 3.0046285164948812e-06, "loss": 0.020094600319862366, "step": 47100 }, { "epoch": 0.44334117647058824, "grad_norm": 0.7337305596121755, "learning_rate": 3.004469044462006e-06, "loss": 0.01936337649822235, "step": 47105 }, { "epoch": 0.44338823529411764, "grad_norm": 0.4342894843992012, "learning_rate": 3.0043095978185876e-06, "loss": 0.018602527678012848, "step": 47110 }, { "epoch": 0.44343529411764704, "grad_norm": 0.7274168386713147, "learning_rate": 3.004150176557893e-06, "loss": 0.017339813709259033, "step": 47115 }, { "epoch": 0.4434823529411765, "grad_norm": 0.4984243544589889, "learning_rate": 3.003990780673185e-06, "loss": 0.021474918723106383, "step": 47120 }, { "epoch": 0.4435294117647059, "grad_norm": 0.5941631488987018, "learning_rate": 3.0038314101577342e-06, "loss": 0.02062073051929474, "step": 47125 }, { "epoch": 0.4435764705882353, "grad_norm": 0.5925738178153862, "learning_rate": 3.0036720650048106e-06, "loss": 0.02065320312976837, "step": 47130 }, { "epoch": 0.4436235294117647, "grad_norm": 0.41241296009173817, "learning_rate": 3.003512745207689e-06, "loss": 0.01517806649208069, "step": 47135 }, { "epoch": 0.4436705882352941, "grad_norm": 0.5248451118423717, "learning_rate": 3.0033534507596444e-06, "loss": 0.016605693101882934, "step": 47140 }, { "epoch": 0.44371764705882355, "grad_norm": 0.7640959815831407, "learning_rate": 3.0031941816539567e-06, "loss": 0.02111588716506958, "step": 47145 }, { "epoch": 0.44376470588235295, "grad_norm": 0.8226951241942917, "learning_rate": 3.003034937883906e-06, "loss": 0.017674222588539124, "step": 47150 }, { "epoch": 0.44381176470588235, "grad_norm": 0.6843948585673947, "learning_rate": 3.002875719442776e-06, "loss": 0.014971524477005005, "step": 47155 }, { "epoch": 0.44385882352941175, "grad_norm": 0.4688840788354789, "learning_rate": 3.0027165263238534e-06, "loss": 0.01781664937734604, "step": 47160 }, { "epoch": 0.44390588235294115, "grad_norm": 0.6852392420113449, "learning_rate": 3.002557358520426e-06, "loss": 0.018891599774360657, "step": 47165 }, { "epoch": 0.4439529411764706, "grad_norm": 0.5222325831469911, "learning_rate": 3.002398216025786e-06, "loss": 0.015135794878005981, "step": 47170 }, { "epoch": 0.444, "grad_norm": 0.5255178578396266, "learning_rate": 3.0022390988332254e-06, "loss": 0.01608496904373169, "step": 47175 }, { "epoch": 0.4440470588235294, "grad_norm": 0.49083399196316563, "learning_rate": 3.0020800069360423e-06, "loss": 0.021558085083961488, "step": 47180 }, { "epoch": 0.4440941176470588, "grad_norm": 0.40403182363710244, "learning_rate": 3.0019209403275334e-06, "loss": 0.01791670620441437, "step": 47185 }, { "epoch": 0.44414117647058826, "grad_norm": 0.4681561808571909, "learning_rate": 3.001761899001001e-06, "loss": 0.0177891343832016, "step": 47190 }, { "epoch": 0.44418823529411766, "grad_norm": 0.4720431264111082, "learning_rate": 3.001602882949748e-06, "loss": 0.019269637763500214, "step": 47195 }, { "epoch": 0.44423529411764706, "grad_norm": 0.4212265654276464, "learning_rate": 3.001443892167081e-06, "loss": 0.015026426315307618, "step": 47200 }, { "epoch": 0.44428235294117646, "grad_norm": 0.437077595097249, "learning_rate": 3.0012849266463074e-06, "loss": 0.019031625986099244, "step": 47205 }, { "epoch": 0.44432941176470586, "grad_norm": 0.42950179878899936, "learning_rate": 3.0011259863807396e-06, "loss": 0.017275851964950562, "step": 47210 }, { "epoch": 0.4443764705882353, "grad_norm": 0.546399692210806, "learning_rate": 3.0009670713636895e-06, "loss": 0.01753840744495392, "step": 47215 }, { "epoch": 0.4444235294117647, "grad_norm": 0.4104701638162854, "learning_rate": 3.0008081815884745e-06, "loss": 0.01638706624507904, "step": 47220 }, { "epoch": 0.4444705882352941, "grad_norm": 0.3298758113567028, "learning_rate": 3.000649317048412e-06, "loss": 0.023002050817012787, "step": 47225 }, { "epoch": 0.4445176470588235, "grad_norm": 0.7616649110948885, "learning_rate": 3.0004904777368238e-06, "loss": 0.02470722496509552, "step": 47230 }, { "epoch": 0.4445647058823529, "grad_norm": 0.4006694636756377, "learning_rate": 3.000331663647032e-06, "loss": 0.01646813452243805, "step": 47235 }, { "epoch": 0.4446117647058824, "grad_norm": 0.3980902837147353, "learning_rate": 3.0001728747723633e-06, "loss": 0.016114935278892517, "step": 47240 }, { "epoch": 0.4446588235294118, "grad_norm": 0.5486018736700478, "learning_rate": 3.0000141111061465e-06, "loss": 0.017103339731693267, "step": 47245 }, { "epoch": 0.4447058823529412, "grad_norm": 0.45763388336507427, "learning_rate": 2.999855372641712e-06, "loss": 0.017758694291114808, "step": 47250 }, { "epoch": 0.4447529411764706, "grad_norm": 0.5073587499090809, "learning_rate": 2.9996966593723914e-06, "loss": 0.019719162583351137, "step": 47255 }, { "epoch": 0.4448, "grad_norm": 0.6448900140574566, "learning_rate": 2.999537971291523e-06, "loss": 0.01816413998603821, "step": 47260 }, { "epoch": 0.44484705882352943, "grad_norm": 0.5737533491759569, "learning_rate": 2.9993793083924426e-06, "loss": 0.021703942120075224, "step": 47265 }, { "epoch": 0.44489411764705883, "grad_norm": 0.46122576955105693, "learning_rate": 2.999220670668493e-06, "loss": 0.015232060849666596, "step": 47270 }, { "epoch": 0.44494117647058823, "grad_norm": 0.5188080413191098, "learning_rate": 2.9990620581130162e-06, "loss": 0.016030336916446685, "step": 47275 }, { "epoch": 0.44498823529411763, "grad_norm": 0.5359472615863924, "learning_rate": 2.9989034707193583e-06, "loss": 0.018858769536018373, "step": 47280 }, { "epoch": 0.4450352941176471, "grad_norm": 0.6794761925061839, "learning_rate": 2.9987449084808666e-06, "loss": 0.021271079778671265, "step": 47285 }, { "epoch": 0.4450823529411765, "grad_norm": 0.5876916996476598, "learning_rate": 2.998586371390892e-06, "loss": 0.016626596450805664, "step": 47290 }, { "epoch": 0.4451294117647059, "grad_norm": 0.5457647072263248, "learning_rate": 2.998427859442788e-06, "loss": 0.017617839574813842, "step": 47295 }, { "epoch": 0.4451764705882353, "grad_norm": 0.5113724750565415, "learning_rate": 2.9982693726299088e-06, "loss": 0.018662461638450624, "step": 47300 }, { "epoch": 0.4452235294117647, "grad_norm": 0.4302936152344842, "learning_rate": 2.9981109109456137e-06, "loss": 0.014823079109191895, "step": 47305 }, { "epoch": 0.44527058823529414, "grad_norm": 0.8648470114907093, "learning_rate": 2.9979524743832618e-06, "loss": 0.02091261148452759, "step": 47310 }, { "epoch": 0.44531764705882354, "grad_norm": 0.3559141415834475, "learning_rate": 2.997794062936217e-06, "loss": 0.017796233296394348, "step": 47315 }, { "epoch": 0.44536470588235294, "grad_norm": 0.7526927826170319, "learning_rate": 2.997635676597843e-06, "loss": 0.01991443932056427, "step": 47320 }, { "epoch": 0.44541176470588234, "grad_norm": 0.5983812640130624, "learning_rate": 2.997477315361509e-06, "loss": 0.02386108785867691, "step": 47325 }, { "epoch": 0.44545882352941174, "grad_norm": 0.5600195462229843, "learning_rate": 2.9973189792205847e-06, "loss": 0.016689443588256837, "step": 47330 }, { "epoch": 0.4455058823529412, "grad_norm": 0.4776728401538244, "learning_rate": 2.997160668168443e-06, "loss": 0.017878493666648863, "step": 47335 }, { "epoch": 0.4455529411764706, "grad_norm": 0.45109266615277166, "learning_rate": 2.997002382198458e-06, "loss": 0.0189371258020401, "step": 47340 }, { "epoch": 0.4456, "grad_norm": 0.471667071405267, "learning_rate": 2.9968441213040083e-06, "loss": 0.015945327281951905, "step": 47345 }, { "epoch": 0.4456470588235294, "grad_norm": 0.5333807363151015, "learning_rate": 2.9966858854784732e-06, "loss": 0.016668964922428132, "step": 47350 }, { "epoch": 0.4456941176470588, "grad_norm": 0.4437770619386143, "learning_rate": 2.996527674715235e-06, "loss": 0.018804807960987092, "step": 47355 }, { "epoch": 0.44574117647058825, "grad_norm": 0.5117645504817571, "learning_rate": 2.9963694890076788e-06, "loss": 0.014416776597499847, "step": 47360 }, { "epoch": 0.44578823529411765, "grad_norm": 0.729574019711309, "learning_rate": 2.996211328349192e-06, "loss": 0.017856432497501372, "step": 47365 }, { "epoch": 0.44583529411764705, "grad_norm": 0.5135973080666204, "learning_rate": 2.996053192733165e-06, "loss": 0.017900218069553376, "step": 47370 }, { "epoch": 0.44588235294117645, "grad_norm": 0.4641519012725298, "learning_rate": 2.9958950821529883e-06, "loss": 0.01971530020236969, "step": 47375 }, { "epoch": 0.4459294117647059, "grad_norm": 0.5731414053344948, "learning_rate": 2.9957369966020582e-06, "loss": 0.016613098978996276, "step": 47380 }, { "epoch": 0.4459764705882353, "grad_norm": 0.5018114213235473, "learning_rate": 2.99557893607377e-06, "loss": 0.014786356687545776, "step": 47385 }, { "epoch": 0.4460235294117647, "grad_norm": 0.5026668777331933, "learning_rate": 2.9954209005615254e-06, "loss": 0.02171505391597748, "step": 47390 }, { "epoch": 0.4460705882352941, "grad_norm": 0.59052528020854, "learning_rate": 2.9952628900587246e-06, "loss": 0.020281988382339477, "step": 47395 }, { "epoch": 0.4461176470588235, "grad_norm": 0.6227016441612339, "learning_rate": 2.9951049045587727e-06, "loss": 0.02177133858203888, "step": 47400 }, { "epoch": 0.44616470588235296, "grad_norm": 0.41674741209262356, "learning_rate": 2.9949469440550753e-06, "loss": 0.01719471514225006, "step": 47405 }, { "epoch": 0.44621176470588236, "grad_norm": 0.5907988172646537, "learning_rate": 2.994789008541044e-06, "loss": 0.016626256704330444, "step": 47410 }, { "epoch": 0.44625882352941176, "grad_norm": 0.4913841789863448, "learning_rate": 2.9946310980100886e-06, "loss": 0.017997664213180543, "step": 47415 }, { "epoch": 0.44630588235294116, "grad_norm": 0.7686730230624886, "learning_rate": 2.994473212455624e-06, "loss": 0.016662317514419555, "step": 47420 }, { "epoch": 0.44635294117647056, "grad_norm": 0.5144682291650127, "learning_rate": 2.994315351871066e-06, "loss": 0.017695248126983643, "step": 47425 }, { "epoch": 0.4464, "grad_norm": 0.4976693074569012, "learning_rate": 2.9941575162498338e-06, "loss": 0.017150643467903137, "step": 47430 }, { "epoch": 0.4464470588235294, "grad_norm": 0.5339797198560099, "learning_rate": 2.993999705585349e-06, "loss": 0.019919028878211974, "step": 47435 }, { "epoch": 0.4464941176470588, "grad_norm": 0.6015435852732208, "learning_rate": 2.9938419198710363e-06, "loss": 0.016512319445610046, "step": 47440 }, { "epoch": 0.4465411764705882, "grad_norm": 0.6726482935886443, "learning_rate": 2.9936841591003205e-06, "loss": 0.013837823271751403, "step": 47445 }, { "epoch": 0.4465882352941177, "grad_norm": 0.5034587071194552, "learning_rate": 2.9935264232666308e-06, "loss": 0.01755283325910568, "step": 47450 }, { "epoch": 0.4466352941176471, "grad_norm": 0.6803914876707255, "learning_rate": 2.9933687123633987e-06, "loss": 0.01805071234703064, "step": 47455 }, { "epoch": 0.4466823529411765, "grad_norm": 0.6629786085611258, "learning_rate": 2.993211026384057e-06, "loss": 0.017882910370826722, "step": 47460 }, { "epoch": 0.4467294117647059, "grad_norm": 0.6856420274272755, "learning_rate": 2.993053365322042e-06, "loss": 0.018157121539115906, "step": 47465 }, { "epoch": 0.4467764705882353, "grad_norm": 0.4715266965846946, "learning_rate": 2.9928957291707917e-06, "loss": 0.015767404437065126, "step": 47470 }, { "epoch": 0.44682352941176473, "grad_norm": 0.366406027352664, "learning_rate": 2.992738117923748e-06, "loss": 0.016939327120780945, "step": 47475 }, { "epoch": 0.44687058823529413, "grad_norm": 1.0313851956709896, "learning_rate": 2.992580531574353e-06, "loss": 0.020377324521541597, "step": 47480 }, { "epoch": 0.44691764705882353, "grad_norm": 0.37712573220266143, "learning_rate": 2.9924229701160524e-06, "loss": 0.01729244887828827, "step": 47485 }, { "epoch": 0.44696470588235293, "grad_norm": 0.48991515368443106, "learning_rate": 2.992265433542294e-06, "loss": 0.02098665535449982, "step": 47490 }, { "epoch": 0.44701176470588233, "grad_norm": 0.6738477596750254, "learning_rate": 2.9921079218465293e-06, "loss": 0.020651434361934663, "step": 47495 }, { "epoch": 0.4470588235294118, "grad_norm": 0.5533241614025142, "learning_rate": 2.9919504350222107e-06, "loss": 0.019977986812591553, "step": 47500 }, { "epoch": 0.4471058823529412, "grad_norm": 0.542432666687729, "learning_rate": 2.991792973062793e-06, "loss": 0.018925243616104127, "step": 47505 }, { "epoch": 0.4471529411764706, "grad_norm": 0.7676940447220871, "learning_rate": 2.991635535961733e-06, "loss": 0.024283915758132935, "step": 47510 }, { "epoch": 0.4472, "grad_norm": 0.5146673355550605, "learning_rate": 2.9914781237124936e-06, "loss": 0.01879759132862091, "step": 47515 }, { "epoch": 0.4472470588235294, "grad_norm": 0.6772121825447973, "learning_rate": 2.9913207363085346e-06, "loss": 0.01832144260406494, "step": 47520 }, { "epoch": 0.44729411764705884, "grad_norm": 0.4194186037170863, "learning_rate": 2.991163373743322e-06, "loss": 0.0181702122092247, "step": 47525 }, { "epoch": 0.44734117647058824, "grad_norm": 0.3579204589795283, "learning_rate": 2.9910060360103238e-06, "loss": 0.015853026509284975, "step": 47530 }, { "epoch": 0.44738823529411764, "grad_norm": 0.8239210068296255, "learning_rate": 2.990848723103008e-06, "loss": 0.02345140129327774, "step": 47535 }, { "epoch": 0.44743529411764704, "grad_norm": 0.6181522993215861, "learning_rate": 2.9906914350148485e-06, "loss": 0.017992639541625978, "step": 47540 }, { "epoch": 0.4474823529411765, "grad_norm": 0.5658109472849691, "learning_rate": 2.9905341717393183e-06, "loss": 0.014803573489189148, "step": 47545 }, { "epoch": 0.4475294117647059, "grad_norm": 0.5151775151124927, "learning_rate": 2.990376933269895e-06, "loss": 0.019118700921535493, "step": 47550 }, { "epoch": 0.4475764705882353, "grad_norm": 0.6400997962945045, "learning_rate": 2.9902197196000583e-06, "loss": 0.020482854545116426, "step": 47555 }, { "epoch": 0.4476235294117647, "grad_norm": 0.44831868225725885, "learning_rate": 2.990062530723289e-06, "loss": 0.01915455162525177, "step": 47560 }, { "epoch": 0.4476705882352941, "grad_norm": 0.5705692988160825, "learning_rate": 2.9899053666330717e-06, "loss": 0.01988404393196106, "step": 47565 }, { "epoch": 0.44771764705882355, "grad_norm": 0.5798469111279984, "learning_rate": 2.9897482273228933e-06, "loss": 0.016487264633178712, "step": 47570 }, { "epoch": 0.44776470588235295, "grad_norm": 0.6293139243598501, "learning_rate": 2.9895911127862428e-06, "loss": 0.015269403159618378, "step": 47575 }, { "epoch": 0.44781176470588235, "grad_norm": 0.4642032859949368, "learning_rate": 2.9894340230166103e-06, "loss": 0.02423706352710724, "step": 47580 }, { "epoch": 0.44785882352941175, "grad_norm": 0.5529199196531474, "learning_rate": 2.9892769580074903e-06, "loss": 0.020653939247131346, "step": 47585 }, { "epoch": 0.44790588235294115, "grad_norm": 0.4220623544807114, "learning_rate": 2.9891199177523787e-06, "loss": 0.016611790657043456, "step": 47590 }, { "epoch": 0.4479529411764706, "grad_norm": 0.48479076219828826, "learning_rate": 2.9889629022447743e-06, "loss": 0.016681718826293945, "step": 47595 }, { "epoch": 0.448, "grad_norm": 0.4444165966208294, "learning_rate": 2.988805911478178e-06, "loss": 0.01816345453262329, "step": 47600 }, { "epoch": 0.4480470588235294, "grad_norm": 0.4315881935833627, "learning_rate": 2.9886489454460925e-06, "loss": 0.017378327250480653, "step": 47605 }, { "epoch": 0.4480941176470588, "grad_norm": 0.5108874410076796, "learning_rate": 2.988492004142024e-06, "loss": 0.018943320214748382, "step": 47610 }, { "epoch": 0.4481411764705882, "grad_norm": 0.3876349763808941, "learning_rate": 2.9883350875594807e-06, "loss": 0.01755979061126709, "step": 47615 }, { "epoch": 0.44818823529411767, "grad_norm": 0.38623612454122297, "learning_rate": 2.988178195691972e-06, "loss": 0.017078956961631774, "step": 47620 }, { "epoch": 0.44823529411764707, "grad_norm": 0.5292372421255261, "learning_rate": 2.9880213285330123e-06, "loss": 0.01815263330936432, "step": 47625 }, { "epoch": 0.44828235294117647, "grad_norm": 0.44247560280039305, "learning_rate": 2.9878644860761147e-06, "loss": 0.014076414704322814, "step": 47630 }, { "epoch": 0.44832941176470587, "grad_norm": 0.7103182234466461, "learning_rate": 2.9877076683147986e-06, "loss": 0.021436913311481474, "step": 47635 }, { "epoch": 0.4483764705882353, "grad_norm": 0.4358544225740819, "learning_rate": 2.9875508752425832e-06, "loss": 0.016767486929893494, "step": 47640 }, { "epoch": 0.4484235294117647, "grad_norm": 0.45611824273644525, "learning_rate": 2.987394106852991e-06, "loss": 0.02086697518825531, "step": 47645 }, { "epoch": 0.4484705882352941, "grad_norm": 0.4162012363096434, "learning_rate": 2.9872373631395462e-06, "loss": 0.018091395497322083, "step": 47650 }, { "epoch": 0.4485176470588235, "grad_norm": 0.6185168867520271, "learning_rate": 2.9870806440957772e-06, "loss": 0.023288848996162414, "step": 47655 }, { "epoch": 0.4485647058823529, "grad_norm": 0.8671421901243636, "learning_rate": 2.986923949715212e-06, "loss": 0.021703901886940002, "step": 47660 }, { "epoch": 0.4486117647058824, "grad_norm": 0.448461141389067, "learning_rate": 2.9867672799913825e-06, "loss": 0.014590352773666382, "step": 47665 }, { "epoch": 0.4486588235294118, "grad_norm": 0.491494735539079, "learning_rate": 2.9866106349178247e-06, "loss": 0.026870256662368773, "step": 47670 }, { "epoch": 0.4487058823529412, "grad_norm": 0.6578014831833805, "learning_rate": 2.986454014488073e-06, "loss": 0.016115634143352507, "step": 47675 }, { "epoch": 0.4487529411764706, "grad_norm": 0.3854236274647887, "learning_rate": 2.986297418695668e-06, "loss": 0.016944944858551025, "step": 47680 }, { "epoch": 0.4488, "grad_norm": 0.44779041518729695, "learning_rate": 2.9861408475341497e-06, "loss": 0.015544053912162781, "step": 47685 }, { "epoch": 0.44884705882352943, "grad_norm": 0.431959042547098, "learning_rate": 2.9859843009970633e-06, "loss": 0.02231714129447937, "step": 47690 }, { "epoch": 0.44889411764705883, "grad_norm": 0.5086540736215254, "learning_rate": 2.9858277790779532e-06, "loss": 0.016399282217025756, "step": 47695 }, { "epoch": 0.44894117647058823, "grad_norm": 0.8010030779256975, "learning_rate": 2.9856712817703692e-06, "loss": 0.02019668519496918, "step": 47700 }, { "epoch": 0.44898823529411763, "grad_norm": 0.5842721784674438, "learning_rate": 2.9855148090678616e-06, "loss": 0.016732637584209443, "step": 47705 }, { "epoch": 0.44903529411764703, "grad_norm": 0.3551623991978046, "learning_rate": 2.9853583609639836e-06, "loss": 0.01795867383480072, "step": 47710 }, { "epoch": 0.4490823529411765, "grad_norm": 0.5870704007593528, "learning_rate": 2.98520193745229e-06, "loss": 0.01582581102848053, "step": 47715 }, { "epoch": 0.4491294117647059, "grad_norm": 0.7523762831884142, "learning_rate": 2.98504553852634e-06, "loss": 0.020284485816955567, "step": 47720 }, { "epoch": 0.4491764705882353, "grad_norm": 0.5728173962734567, "learning_rate": 2.9848891641796935e-06, "loss": 0.019326835870742798, "step": 47725 }, { "epoch": 0.4492235294117647, "grad_norm": 0.6479863328540603, "learning_rate": 2.984732814405913e-06, "loss": 0.02168242782354355, "step": 47730 }, { "epoch": 0.44927058823529414, "grad_norm": 0.45282206900739264, "learning_rate": 2.984576489198563e-06, "loss": 0.018293070793151855, "step": 47735 }, { "epoch": 0.44931764705882354, "grad_norm": 0.5948453072996888, "learning_rate": 2.9844201885512115e-06, "loss": 0.01875249445438385, "step": 47740 }, { "epoch": 0.44936470588235294, "grad_norm": 0.6722154407630007, "learning_rate": 2.9842639124574274e-06, "loss": 0.021710146963596345, "step": 47745 }, { "epoch": 0.44941176470588234, "grad_norm": 0.5487266054617829, "learning_rate": 2.984107660910784e-06, "loss": 0.02028906047344208, "step": 47750 }, { "epoch": 0.44945882352941174, "grad_norm": 0.4824543962619236, "learning_rate": 2.983951433904854e-06, "loss": 0.017819583415985107, "step": 47755 }, { "epoch": 0.4495058823529412, "grad_norm": 0.9825479756417301, "learning_rate": 2.9837952314332157e-06, "loss": 0.01689170449972153, "step": 47760 }, { "epoch": 0.4495529411764706, "grad_norm": 0.5660533003982433, "learning_rate": 2.983639053489449e-06, "loss": 0.022514894604682922, "step": 47765 }, { "epoch": 0.4496, "grad_norm": 0.5125641747951274, "learning_rate": 2.983482900067132e-06, "loss": 0.014392396807670594, "step": 47770 }, { "epoch": 0.4496470588235294, "grad_norm": 0.5969313807399502, "learning_rate": 2.983326771159852e-06, "loss": 0.025650086998939513, "step": 47775 }, { "epoch": 0.4496941176470588, "grad_norm": 0.3088009518243508, "learning_rate": 2.9831706667611944e-06, "loss": 0.01826140582561493, "step": 47780 }, { "epoch": 0.44974117647058826, "grad_norm": 0.34676879234649316, "learning_rate": 2.983014586864746e-06, "loss": 0.013125170767307282, "step": 47785 }, { "epoch": 0.44978823529411766, "grad_norm": 0.4824631106464629, "learning_rate": 2.982858531464099e-06, "loss": 0.019676178693771362, "step": 47790 }, { "epoch": 0.44983529411764706, "grad_norm": 0.5967732781958477, "learning_rate": 2.9827025005528466e-06, "loss": 0.01711927354335785, "step": 47795 }, { "epoch": 0.44988235294117646, "grad_norm": 0.5901051597634375, "learning_rate": 2.9825464941245843e-06, "loss": 0.01947924941778183, "step": 47800 }, { "epoch": 0.44992941176470586, "grad_norm": 0.45723446807317863, "learning_rate": 2.982390512172909e-06, "loss": 0.017396873235702513, "step": 47805 }, { "epoch": 0.4499764705882353, "grad_norm": 0.43807304875606956, "learning_rate": 2.9822345546914226e-06, "loss": 0.016520272195339202, "step": 47810 }, { "epoch": 0.4500235294117647, "grad_norm": 0.5222784797384612, "learning_rate": 2.9820786216737275e-06, "loss": 0.020163971185684203, "step": 47815 }, { "epoch": 0.4500705882352941, "grad_norm": 0.3897519090431378, "learning_rate": 2.9819227131134278e-06, "loss": 0.016627442836761475, "step": 47820 }, { "epoch": 0.4501176470588235, "grad_norm": 0.6362061524061375, "learning_rate": 2.981766829004131e-06, "loss": 0.020911672711372377, "step": 47825 }, { "epoch": 0.45016470588235297, "grad_norm": 0.4983191069319398, "learning_rate": 2.981610969339446e-06, "loss": 0.02211824357509613, "step": 47830 }, { "epoch": 0.45021176470588237, "grad_norm": 0.75211275658938, "learning_rate": 2.9814551341129873e-06, "loss": 0.02255590707063675, "step": 47835 }, { "epoch": 0.45025882352941177, "grad_norm": 0.4112297022412315, "learning_rate": 2.981299323318366e-06, "loss": 0.011914916336536407, "step": 47840 }, { "epoch": 0.45030588235294117, "grad_norm": 0.5416702511108692, "learning_rate": 2.9811435369492005e-06, "loss": 0.018162983655929565, "step": 47845 }, { "epoch": 0.45035294117647057, "grad_norm": 0.33950417621234197, "learning_rate": 2.98098777499911e-06, "loss": 0.017088279128074646, "step": 47850 }, { "epoch": 0.4504, "grad_norm": 0.5410171870050593, "learning_rate": 2.9808320374617147e-06, "loss": 0.022875893115997314, "step": 47855 }, { "epoch": 0.4504470588235294, "grad_norm": 0.6065887916095024, "learning_rate": 2.980676324330639e-06, "loss": 0.019206684827804566, "step": 47860 }, { "epoch": 0.4504941176470588, "grad_norm": 0.45044939683055013, "learning_rate": 2.980520635599509e-06, "loss": 0.026095163822174073, "step": 47865 }, { "epoch": 0.4505411764705882, "grad_norm": 0.4440082067045768, "learning_rate": 2.980364971261952e-06, "loss": 0.016548529267311096, "step": 47870 }, { "epoch": 0.4505882352941176, "grad_norm": 0.5510645306763228, "learning_rate": 2.9802093313115992e-06, "loss": 0.016543203592300416, "step": 47875 }, { "epoch": 0.4506352941176471, "grad_norm": 0.4201016323256125, "learning_rate": 2.9800537157420843e-06, "loss": 0.016884973645210265, "step": 47880 }, { "epoch": 0.4506823529411765, "grad_norm": 0.6309306967391324, "learning_rate": 2.9798981245470414e-06, "loss": 0.015553969144821166, "step": 47885 }, { "epoch": 0.4507294117647059, "grad_norm": 0.5385815147372979, "learning_rate": 2.979742557720109e-06, "loss": 0.018115234375, "step": 47890 }, { "epoch": 0.4507764705882353, "grad_norm": 0.6283991417355294, "learning_rate": 2.9795870152549267e-06, "loss": 0.01660716235637665, "step": 47895 }, { "epoch": 0.4508235294117647, "grad_norm": 0.4289410927090408, "learning_rate": 2.979431497145136e-06, "loss": 0.019501760601997375, "step": 47900 }, { "epoch": 0.45087058823529413, "grad_norm": 0.6541123435432991, "learning_rate": 2.9792760033843826e-06, "loss": 0.02336309552192688, "step": 47905 }, { "epoch": 0.45091764705882353, "grad_norm": 0.5541920832955645, "learning_rate": 2.979120533966313e-06, "loss": 0.021782398223876953, "step": 47910 }, { "epoch": 0.45096470588235293, "grad_norm": 1.598244312937818, "learning_rate": 2.9789650888845757e-06, "loss": 0.019750910997390746, "step": 47915 }, { "epoch": 0.45101176470588233, "grad_norm": 0.4349677963148348, "learning_rate": 2.9788096681328233e-06, "loss": 0.018831139802932738, "step": 47920 }, { "epoch": 0.4510588235294118, "grad_norm": 0.4490984766427603, "learning_rate": 2.9786542717047096e-06, "loss": 0.016975840926170348, "step": 47925 }, { "epoch": 0.4511058823529412, "grad_norm": 0.5610361037377514, "learning_rate": 2.9784988995938902e-06, "loss": 0.019662293791770934, "step": 47930 }, { "epoch": 0.4511529411764706, "grad_norm": 0.5288714370828365, "learning_rate": 2.9783435517940236e-06, "loss": 0.01785026341676712, "step": 47935 }, { "epoch": 0.4512, "grad_norm": 0.4124329458043564, "learning_rate": 2.9781882282987707e-06, "loss": 0.01829254925251007, "step": 47940 }, { "epoch": 0.4512470588235294, "grad_norm": 0.5864435604785886, "learning_rate": 2.978032929101795e-06, "loss": 0.014821627736091613, "step": 47945 }, { "epoch": 0.45129411764705885, "grad_norm": 0.41147991435662307, "learning_rate": 2.977877654196762e-06, "loss": 0.012715208530426025, "step": 47950 }, { "epoch": 0.45134117647058825, "grad_norm": 0.5777239612230909, "learning_rate": 2.977722403577339e-06, "loss": 0.02011953592300415, "step": 47955 }, { "epoch": 0.45138823529411765, "grad_norm": 0.472950782711193, "learning_rate": 2.9775671772371954e-06, "loss": 0.01742965877056122, "step": 47960 }, { "epoch": 0.45143529411764705, "grad_norm": 0.29222766213307266, "learning_rate": 2.9774119751700046e-06, "loss": 0.016907483339309692, "step": 47965 }, { "epoch": 0.45148235294117645, "grad_norm": 0.47129348419445954, "learning_rate": 2.977256797369441e-06, "loss": 0.015496726334095, "step": 47970 }, { "epoch": 0.4515294117647059, "grad_norm": 0.5570075264959257, "learning_rate": 2.9771016438291817e-06, "loss": 0.01776207685470581, "step": 47975 }, { "epoch": 0.4515764705882353, "grad_norm": 0.8003207132713521, "learning_rate": 2.976946514542906e-06, "loss": 0.014558432996273041, "step": 47980 }, { "epoch": 0.4516235294117647, "grad_norm": 1.2586620432641864, "learning_rate": 2.9767914095042947e-06, "loss": 0.015511643886566163, "step": 47985 }, { "epoch": 0.4516705882352941, "grad_norm": 0.5548701142778764, "learning_rate": 2.9766363287070333e-06, "loss": 0.017675362527370453, "step": 47990 }, { "epoch": 0.45171764705882356, "grad_norm": 0.5444731475559935, "learning_rate": 2.9764812721448063e-06, "loss": 0.0175994411110878, "step": 47995 }, { "epoch": 0.45176470588235296, "grad_norm": 0.48110743221114105, "learning_rate": 2.976326239811303e-06, "loss": 0.017213866114616394, "step": 48000 }, { "epoch": 0.45181176470588236, "grad_norm": 0.5571627704561755, "learning_rate": 2.976171231700214e-06, "loss": 0.01982744038105011, "step": 48005 }, { "epoch": 0.45185882352941176, "grad_norm": 0.37399110206410263, "learning_rate": 2.9760162478052324e-06, "loss": 0.016682824492454527, "step": 48010 }, { "epoch": 0.45190588235294116, "grad_norm": 0.6061742093587725, "learning_rate": 2.9758612881200537e-06, "loss": 0.011991523206233978, "step": 48015 }, { "epoch": 0.4519529411764706, "grad_norm": 0.5703583555846669, "learning_rate": 2.975706352638376e-06, "loss": 0.019893923401832582, "step": 48020 }, { "epoch": 0.452, "grad_norm": 0.6343306844263972, "learning_rate": 2.9755514413538987e-06, "loss": 0.018205906450748443, "step": 48025 }, { "epoch": 0.4520470588235294, "grad_norm": 0.4373731553149519, "learning_rate": 2.975396554260324e-06, "loss": 0.020373176038265228, "step": 48030 }, { "epoch": 0.4520941176470588, "grad_norm": 0.35815714454815006, "learning_rate": 2.975241691351357e-06, "loss": 0.01848031282424927, "step": 48035 }, { "epoch": 0.4521411764705882, "grad_norm": 0.4971225044218395, "learning_rate": 2.975086852620704e-06, "loss": 0.023489055037498475, "step": 48040 }, { "epoch": 0.45218823529411767, "grad_norm": 0.36629074643915954, "learning_rate": 2.974932038062075e-06, "loss": 0.01506679505109787, "step": 48045 }, { "epoch": 0.45223529411764707, "grad_norm": 0.6821223373041417, "learning_rate": 2.9747772476691806e-06, "loss": 0.023678651452064513, "step": 48050 }, { "epoch": 0.45228235294117647, "grad_norm": 0.41036448557419897, "learning_rate": 2.974622481435735e-06, "loss": 0.01609800159931183, "step": 48055 }, { "epoch": 0.45232941176470587, "grad_norm": 1.0134484134825243, "learning_rate": 2.974467739355454e-06, "loss": 0.020567408204078673, "step": 48060 }, { "epoch": 0.45237647058823527, "grad_norm": 0.5151963064355041, "learning_rate": 2.974313021422056e-06, "loss": 0.02102244794368744, "step": 48065 }, { "epoch": 0.4524235294117647, "grad_norm": 0.6493079651561712, "learning_rate": 2.974158327629262e-06, "loss": 0.01636010706424713, "step": 48070 }, { "epoch": 0.4524705882352941, "grad_norm": 0.5979167698414628, "learning_rate": 2.9740036579707945e-06, "loss": 0.01679532080888748, "step": 48075 }, { "epoch": 0.4525176470588235, "grad_norm": 0.37365951255372365, "learning_rate": 2.9738490124403785e-06, "loss": 0.017731407284736635, "step": 48080 }, { "epoch": 0.4525647058823529, "grad_norm": 0.4845966941684637, "learning_rate": 2.973694391031742e-06, "loss": 0.018914231657981874, "step": 48085 }, { "epoch": 0.4526117647058824, "grad_norm": 0.487274847806611, "learning_rate": 2.9735397937386134e-06, "loss": 0.014433787763118744, "step": 48090 }, { "epoch": 0.4526588235294118, "grad_norm": 0.41204300662116006, "learning_rate": 2.9733852205547278e-06, "loss": 0.01466335654258728, "step": 48095 }, { "epoch": 0.4527058823529412, "grad_norm": 0.5196036761097279, "learning_rate": 2.9732306714738156e-06, "loss": 0.01923827826976776, "step": 48100 }, { "epoch": 0.4527529411764706, "grad_norm": 0.5826213603660081, "learning_rate": 2.9730761464896164e-06, "loss": 0.015504150092601776, "step": 48105 }, { "epoch": 0.4528, "grad_norm": 0.49784923477983384, "learning_rate": 2.9729216455958678e-06, "loss": 0.021576398611068727, "step": 48110 }, { "epoch": 0.45284705882352944, "grad_norm": 0.5119849423912936, "learning_rate": 2.972767168786311e-06, "loss": 0.014647234976291657, "step": 48115 }, { "epoch": 0.45289411764705884, "grad_norm": 0.578057440309718, "learning_rate": 2.9726127160546888e-06, "loss": 0.03420516848564148, "step": 48120 }, { "epoch": 0.45294117647058824, "grad_norm": 0.4073020973334218, "learning_rate": 2.972458287394749e-06, "loss": 0.01967441737651825, "step": 48125 }, { "epoch": 0.45298823529411764, "grad_norm": 0.44886685816789623, "learning_rate": 2.972303882800237e-06, "loss": 0.01703401356935501, "step": 48130 }, { "epoch": 0.45303529411764704, "grad_norm": 0.447302995282153, "learning_rate": 2.972149502264905e-06, "loss": 0.016220180690288542, "step": 48135 }, { "epoch": 0.4530823529411765, "grad_norm": 0.5964935879108801, "learning_rate": 2.971995145782504e-06, "loss": 0.02351861447095871, "step": 48140 }, { "epoch": 0.4531294117647059, "grad_norm": 0.796235827718152, "learning_rate": 2.9718408133467907e-06, "loss": 0.015706735849380492, "step": 48145 }, { "epoch": 0.4531764705882353, "grad_norm": 0.5916714564258354, "learning_rate": 2.9716865049515204e-06, "loss": 0.019134128093719484, "step": 48150 }, { "epoch": 0.4532235294117647, "grad_norm": 0.33368117817516146, "learning_rate": 2.971532220590453e-06, "loss": 0.016339246928691865, "step": 48155 }, { "epoch": 0.4532705882352941, "grad_norm": 0.839125982547553, "learning_rate": 2.97137796025735e-06, "loss": 0.013871723413467407, "step": 48160 }, { "epoch": 0.45331764705882355, "grad_norm": 0.47169302177047384, "learning_rate": 2.9712237239459755e-06, "loss": 0.02325960099697113, "step": 48165 }, { "epoch": 0.45336470588235295, "grad_norm": 0.5609353621023433, "learning_rate": 2.971069511650096e-06, "loss": 0.020222926139831544, "step": 48170 }, { "epoch": 0.45341176470588235, "grad_norm": 0.4931399959714548, "learning_rate": 2.9709153233634784e-06, "loss": 0.01994670033454895, "step": 48175 }, { "epoch": 0.45345882352941175, "grad_norm": 0.558849484740476, "learning_rate": 2.9707611590798958e-06, "loss": 0.021771934628486634, "step": 48180 }, { "epoch": 0.4535058823529412, "grad_norm": 0.4630944523726324, "learning_rate": 2.9706070187931187e-06, "loss": 0.020276221632957458, "step": 48185 }, { "epoch": 0.4535529411764706, "grad_norm": 0.6789431541705787, "learning_rate": 2.9704529024969235e-06, "loss": 0.021451081335544585, "step": 48190 }, { "epoch": 0.4536, "grad_norm": 0.5790462829776657, "learning_rate": 2.970298810185087e-06, "loss": 0.01487543135881424, "step": 48195 }, { "epoch": 0.4536470588235294, "grad_norm": 0.5149793739542006, "learning_rate": 2.9701447418513902e-06, "loss": 0.017444032430648803, "step": 48200 }, { "epoch": 0.4536941176470588, "grad_norm": 0.6919435033048132, "learning_rate": 2.969990697489614e-06, "loss": 0.02449858784675598, "step": 48205 }, { "epoch": 0.45374117647058826, "grad_norm": 0.4996588706290997, "learning_rate": 2.969836677093542e-06, "loss": 0.018719878792762757, "step": 48210 }, { "epoch": 0.45378823529411766, "grad_norm": 0.34044825311720006, "learning_rate": 2.969682680656962e-06, "loss": 0.021466919779777528, "step": 48215 }, { "epoch": 0.45383529411764706, "grad_norm": 0.43827274892020185, "learning_rate": 2.969528708173662e-06, "loss": 0.01775389313697815, "step": 48220 }, { "epoch": 0.45388235294117646, "grad_norm": 0.43191716447852685, "learning_rate": 2.9693747596374333e-06, "loss": 0.019569383561611177, "step": 48225 }, { "epoch": 0.45392941176470586, "grad_norm": 0.5455133677231208, "learning_rate": 2.9692208350420693e-06, "loss": 0.019581788778305055, "step": 48230 }, { "epoch": 0.4539764705882353, "grad_norm": 0.5346166327068111, "learning_rate": 2.969066934381365e-06, "loss": 0.018430210649967194, "step": 48235 }, { "epoch": 0.4540235294117647, "grad_norm": 0.6393505732837255, "learning_rate": 2.968913057649118e-06, "loss": 0.017488978803157806, "step": 48240 }, { "epoch": 0.4540705882352941, "grad_norm": 0.7249306822919267, "learning_rate": 2.968759204839129e-06, "loss": 0.022709178924560546, "step": 48245 }, { "epoch": 0.4541176470588235, "grad_norm": 0.3290734263000635, "learning_rate": 2.9686053759451994e-06, "loss": 0.016290609538555146, "step": 48250 }, { "epoch": 0.4541647058823529, "grad_norm": 0.49470492986962417, "learning_rate": 2.9684515709611345e-06, "loss": 0.018760675191879274, "step": 48255 }, { "epoch": 0.45421176470588237, "grad_norm": 0.5095778140213426, "learning_rate": 2.96829778988074e-06, "loss": 0.015769632160663606, "step": 48260 }, { "epoch": 0.45425882352941177, "grad_norm": 0.44889257966516505, "learning_rate": 2.968144032697827e-06, "loss": 0.015751193463802337, "step": 48265 }, { "epoch": 0.45430588235294117, "grad_norm": 0.4852307750734581, "learning_rate": 2.967990299406204e-06, "loss": 0.014247429370880128, "step": 48270 }, { "epoch": 0.45435294117647057, "grad_norm": 0.6636030390107329, "learning_rate": 2.967836589999686e-06, "loss": 0.024738892912864685, "step": 48275 }, { "epoch": 0.4544, "grad_norm": 0.7215867487376076, "learning_rate": 2.9676829044720877e-06, "loss": 0.02047661244869232, "step": 48280 }, { "epoch": 0.4544470588235294, "grad_norm": 0.4943339776948087, "learning_rate": 2.967529242817229e-06, "loss": 0.017773526906967162, "step": 48285 }, { "epoch": 0.4544941176470588, "grad_norm": 0.6425521260256123, "learning_rate": 2.9673756050289284e-06, "loss": 0.02144869267940521, "step": 48290 }, { "epoch": 0.4545411764705882, "grad_norm": 0.6151257115224844, "learning_rate": 2.9672219911010087e-06, "loss": 0.019925248622894288, "step": 48295 }, { "epoch": 0.4545882352941176, "grad_norm": 0.6036287339928349, "learning_rate": 2.9670684010272948e-06, "loss": 0.016828857362270355, "step": 48300 }, { "epoch": 0.4546352941176471, "grad_norm": 0.5977658952285464, "learning_rate": 2.966914834801613e-06, "loss": 0.025871264934539794, "step": 48305 }, { "epoch": 0.4546823529411765, "grad_norm": 0.5696849314384506, "learning_rate": 2.9667612924177934e-06, "loss": 0.017343616485595702, "step": 48310 }, { "epoch": 0.4547294117647059, "grad_norm": 0.35979014429373696, "learning_rate": 2.9666077738696668e-06, "loss": 0.02293391525745392, "step": 48315 }, { "epoch": 0.4547764705882353, "grad_norm": 0.5801424142573351, "learning_rate": 2.9664542791510664e-06, "loss": 0.015693199634552003, "step": 48320 }, { "epoch": 0.4548235294117647, "grad_norm": 0.7460314344020382, "learning_rate": 2.966300808255829e-06, "loss": 0.02321188449859619, "step": 48325 }, { "epoch": 0.45487058823529414, "grad_norm": 0.6598954526623528, "learning_rate": 2.966147361177792e-06, "loss": 0.018995091319084167, "step": 48330 }, { "epoch": 0.45491764705882354, "grad_norm": 0.5571844821596951, "learning_rate": 2.965993937910796e-06, "loss": 0.01902315616607666, "step": 48335 }, { "epoch": 0.45496470588235294, "grad_norm": 1.3677148673194952, "learning_rate": 2.9658405384486834e-06, "loss": 0.014877790212631225, "step": 48340 }, { "epoch": 0.45501176470588234, "grad_norm": 0.6298354291103667, "learning_rate": 2.965687162785299e-06, "loss": 0.01871388256549835, "step": 48345 }, { "epoch": 0.45505882352941174, "grad_norm": 0.5150147689807288, "learning_rate": 2.9655338109144892e-06, "loss": 0.017987149953842162, "step": 48350 }, { "epoch": 0.4551058823529412, "grad_norm": 0.40310597045617447, "learning_rate": 2.9653804828301047e-06, "loss": 0.01776789128780365, "step": 48355 }, { "epoch": 0.4551529411764706, "grad_norm": 0.26131705729035154, "learning_rate": 2.965227178525996e-06, "loss": 0.01994897425174713, "step": 48360 }, { "epoch": 0.4552, "grad_norm": 0.5459258548189844, "learning_rate": 2.9650738979960166e-06, "loss": 0.01600418984889984, "step": 48365 }, { "epoch": 0.4552470588235294, "grad_norm": 0.6227503044893141, "learning_rate": 2.9649206412340225e-06, "loss": 0.015190936625003815, "step": 48370 }, { "epoch": 0.45529411764705885, "grad_norm": 0.9235740872625366, "learning_rate": 2.964767408233871e-06, "loss": 0.01730729788541794, "step": 48375 }, { "epoch": 0.45534117647058825, "grad_norm": 0.4909419288582165, "learning_rate": 2.964614198989425e-06, "loss": 0.016387830674648284, "step": 48380 }, { "epoch": 0.45538823529411765, "grad_norm": 0.5485722111170911, "learning_rate": 2.9644610134945445e-06, "loss": 0.02023587077856064, "step": 48385 }, { "epoch": 0.45543529411764705, "grad_norm": 0.7037528866105607, "learning_rate": 2.9643078517430956e-06, "loss": 0.018956714868545534, "step": 48390 }, { "epoch": 0.45548235294117645, "grad_norm": 0.6991441262302918, "learning_rate": 2.9641547137289446e-06, "loss": 0.020588961243629456, "step": 48395 }, { "epoch": 0.4555294117647059, "grad_norm": 0.4805985970922043, "learning_rate": 2.964001599445961e-06, "loss": 0.015353316068649292, "step": 48400 }, { "epoch": 0.4555764705882353, "grad_norm": 0.5637939037782089, "learning_rate": 2.9638485088880163e-06, "loss": 0.020789319276809694, "step": 48405 }, { "epoch": 0.4556235294117647, "grad_norm": 0.4936100626648712, "learning_rate": 2.963695442048984e-06, "loss": 0.01438586413860321, "step": 48410 }, { "epoch": 0.4556705882352941, "grad_norm": 0.6315651285390459, "learning_rate": 2.9635423989227403e-06, "loss": 0.019206908345222474, "step": 48415 }, { "epoch": 0.4557176470588235, "grad_norm": 0.4050160804708131, "learning_rate": 2.9633893795031625e-06, "loss": 0.014944952726364136, "step": 48420 }, { "epoch": 0.45576470588235296, "grad_norm": 0.44142914347551887, "learning_rate": 2.963236383784132e-06, "loss": 0.016015174984931945, "step": 48425 }, { "epoch": 0.45581176470588236, "grad_norm": 0.5311865864429853, "learning_rate": 2.9630834117595303e-06, "loss": 0.01875246912240982, "step": 48430 }, { "epoch": 0.45585882352941176, "grad_norm": 0.5470555829334862, "learning_rate": 2.9629304634232426e-06, "loss": 0.021270528435707092, "step": 48435 }, { "epoch": 0.45590588235294116, "grad_norm": 0.5290583254195165, "learning_rate": 2.9627775387691558e-06, "loss": 0.01944440007209778, "step": 48440 }, { "epoch": 0.45595294117647056, "grad_norm": 0.3576503775740148, "learning_rate": 2.9626246377911594e-06, "loss": 0.025342467427253722, "step": 48445 }, { "epoch": 0.456, "grad_norm": 0.6326828061868476, "learning_rate": 2.962471760483144e-06, "loss": 0.020736238360404967, "step": 48450 }, { "epoch": 0.4560470588235294, "grad_norm": 0.5614951108462497, "learning_rate": 2.9623189068390037e-06, "loss": 0.021747463941574098, "step": 48455 }, { "epoch": 0.4560941176470588, "grad_norm": 0.580043569107998, "learning_rate": 2.9621660768526335e-06, "loss": 0.01895674467086792, "step": 48460 }, { "epoch": 0.4561411764705882, "grad_norm": 0.8510850348984077, "learning_rate": 2.9620132705179317e-06, "loss": 0.021516837179660797, "step": 48465 }, { "epoch": 0.45618823529411767, "grad_norm": 0.5795096659315855, "learning_rate": 2.9618604878287988e-06, "loss": 0.02133476734161377, "step": 48470 }, { "epoch": 0.45623529411764707, "grad_norm": 0.5506597604416508, "learning_rate": 2.961707728779137e-06, "loss": 0.028191068768501283, "step": 48475 }, { "epoch": 0.4562823529411765, "grad_norm": 0.5077419653784654, "learning_rate": 2.96155499336285e-06, "loss": 0.019307085871696474, "step": 48480 }, { "epoch": 0.4563294117647059, "grad_norm": 0.9979552544714013, "learning_rate": 2.961402281573847e-06, "loss": 0.019710955023765565, "step": 48485 }, { "epoch": 0.4563764705882353, "grad_norm": 0.46465931294501805, "learning_rate": 2.961249593406034e-06, "loss": 0.018441206216812132, "step": 48490 }, { "epoch": 0.45642352941176473, "grad_norm": 0.6023197361440997, "learning_rate": 2.9610969288533236e-06, "loss": 0.01928023099899292, "step": 48495 }, { "epoch": 0.45647058823529413, "grad_norm": 0.7102767236452343, "learning_rate": 2.9609442879096297e-06, "loss": 0.022094698250293733, "step": 48500 }, { "epoch": 0.45651764705882353, "grad_norm": 0.6877310033268699, "learning_rate": 2.9607916705688666e-06, "loss": 0.021106314659118653, "step": 48505 }, { "epoch": 0.45656470588235293, "grad_norm": 0.295114819287115, "learning_rate": 2.960639076824953e-06, "loss": 0.023101162910461426, "step": 48510 }, { "epoch": 0.45661176470588233, "grad_norm": 0.5866201430373289, "learning_rate": 2.960486506671808e-06, "loss": 0.018716251850128172, "step": 48515 }, { "epoch": 0.4566588235294118, "grad_norm": 0.6849910355666959, "learning_rate": 2.9603339601033543e-06, "loss": 0.0210041344165802, "step": 48520 }, { "epoch": 0.4567058823529412, "grad_norm": 0.5011911840609092, "learning_rate": 2.9601814371135155e-06, "loss": 0.02291940152645111, "step": 48525 }, { "epoch": 0.4567529411764706, "grad_norm": 0.5395837202784034, "learning_rate": 2.96002893769622e-06, "loss": 0.021055194735527038, "step": 48530 }, { "epoch": 0.4568, "grad_norm": 0.6792460458905352, "learning_rate": 2.9598764618453947e-06, "loss": 0.018001043796539308, "step": 48535 }, { "epoch": 0.45684705882352944, "grad_norm": 0.6412941652666785, "learning_rate": 2.9597240095549704e-06, "loss": 0.021526840329170228, "step": 48540 }, { "epoch": 0.45689411764705884, "grad_norm": 0.47949661430830737, "learning_rate": 2.959571580818881e-06, "loss": 0.019557851552963256, "step": 48545 }, { "epoch": 0.45694117647058824, "grad_norm": 0.7676593988557187, "learning_rate": 2.9594191756310618e-06, "loss": 0.015994033217430113, "step": 48550 }, { "epoch": 0.45698823529411764, "grad_norm": 0.5159034382872509, "learning_rate": 2.9592667939854503e-06, "loss": 0.022116081416606904, "step": 48555 }, { "epoch": 0.45703529411764704, "grad_norm": 0.5835149306255641, "learning_rate": 2.9591144358759855e-06, "loss": 0.02248816341161728, "step": 48560 }, { "epoch": 0.4570823529411765, "grad_norm": 0.37579229453415597, "learning_rate": 2.958962101296609e-06, "loss": 0.01533021628856659, "step": 48565 }, { "epoch": 0.4571294117647059, "grad_norm": 0.37795858617225175, "learning_rate": 2.9588097902412664e-06, "loss": 0.018318626284599304, "step": 48570 }, { "epoch": 0.4571764705882353, "grad_norm": 0.4478927409065902, "learning_rate": 2.958657502703902e-06, "loss": 0.030525654554367065, "step": 48575 }, { "epoch": 0.4572235294117647, "grad_norm": 0.4845118305892065, "learning_rate": 2.958505238678465e-06, "loss": 0.016808746755123137, "step": 48580 }, { "epoch": 0.4572705882352941, "grad_norm": 0.5447100906720163, "learning_rate": 2.9583529981589067e-06, "loss": 0.01837375611066818, "step": 48585 }, { "epoch": 0.45731764705882355, "grad_norm": 0.5967526091761575, "learning_rate": 2.958200781139179e-06, "loss": 0.01588616669178009, "step": 48590 }, { "epoch": 0.45736470588235295, "grad_norm": 0.5111515130004596, "learning_rate": 2.958048587613236e-06, "loss": 0.01753759980201721, "step": 48595 }, { "epoch": 0.45741176470588235, "grad_norm": 0.43348298623833, "learning_rate": 2.9578964175750364e-06, "loss": 0.015692776441574095, "step": 48600 }, { "epoch": 0.45745882352941175, "grad_norm": 0.4797821007610328, "learning_rate": 2.957744271018539e-06, "loss": 0.01595316231250763, "step": 48605 }, { "epoch": 0.45750588235294115, "grad_norm": 0.38640905676687054, "learning_rate": 2.9575921479377034e-06, "loss": 0.019284191727638244, "step": 48610 }, { "epoch": 0.4575529411764706, "grad_norm": 0.6645218216978181, "learning_rate": 2.9574400483264963e-06, "loss": 0.022588464617729186, "step": 48615 }, { "epoch": 0.4576, "grad_norm": 0.6459213756462048, "learning_rate": 2.957287972178881e-06, "loss": 0.021036557853221893, "step": 48620 }, { "epoch": 0.4576470588235294, "grad_norm": 0.6284755796945797, "learning_rate": 2.9571359194888274e-06, "loss": 0.020032112300395966, "step": 48625 }, { "epoch": 0.4576941176470588, "grad_norm": 0.3778519230402318, "learning_rate": 2.9569838902503033e-06, "loss": 0.018439510464668275, "step": 48630 }, { "epoch": 0.45774117647058826, "grad_norm": 0.4576576995590177, "learning_rate": 2.9568318844572837e-06, "loss": 0.018121038377285004, "step": 48635 }, { "epoch": 0.45778823529411766, "grad_norm": 0.4426031580765838, "learning_rate": 2.9566799021037405e-06, "loss": 0.015377384424209595, "step": 48640 }, { "epoch": 0.45783529411764706, "grad_norm": 0.512180670257431, "learning_rate": 2.9565279431836526e-06, "loss": 0.01692555546760559, "step": 48645 }, { "epoch": 0.45788235294117646, "grad_norm": 0.7165302840362465, "learning_rate": 2.9563760076909964e-06, "loss": 0.017676839232444765, "step": 48650 }, { "epoch": 0.45792941176470586, "grad_norm": 0.5075848424635808, "learning_rate": 2.9562240956197545e-06, "loss": 0.017790576815605162, "step": 48655 }, { "epoch": 0.4579764705882353, "grad_norm": 0.4058619471813486, "learning_rate": 2.95607220696391e-06, "loss": 0.019024291634559633, "step": 48660 }, { "epoch": 0.4580235294117647, "grad_norm": 0.5581103231669489, "learning_rate": 2.9559203417174474e-06, "loss": 0.02036674916744232, "step": 48665 }, { "epoch": 0.4580705882352941, "grad_norm": 0.4012415684455606, "learning_rate": 2.955768499874355e-06, "loss": 0.021058614552021026, "step": 48670 }, { "epoch": 0.4581176470588235, "grad_norm": 0.43534469585174607, "learning_rate": 2.955616681428622e-06, "loss": 0.02228032946586609, "step": 48675 }, { "epoch": 0.4581647058823529, "grad_norm": 0.4152781021170506, "learning_rate": 2.9554648863742397e-06, "loss": 0.019069044291973113, "step": 48680 }, { "epoch": 0.4582117647058824, "grad_norm": 0.39071777160904303, "learning_rate": 2.9553131147052026e-06, "loss": 0.01896452158689499, "step": 48685 }, { "epoch": 0.4582588235294118, "grad_norm": 0.5142488054514133, "learning_rate": 2.9551613664155073e-06, "loss": 0.018238724768161775, "step": 48690 }, { "epoch": 0.4583058823529412, "grad_norm": 0.4802210102914288, "learning_rate": 2.955009641499151e-06, "loss": 0.019760134816169738, "step": 48695 }, { "epoch": 0.4583529411764706, "grad_norm": 1.1064406942600864, "learning_rate": 2.954857939950135e-06, "loss": 0.020910274982452393, "step": 48700 }, { "epoch": 0.4584, "grad_norm": 0.5188586405517732, "learning_rate": 2.95470626176246e-06, "loss": 0.016896122694015504, "step": 48705 }, { "epoch": 0.45844705882352943, "grad_norm": 0.493332097592801, "learning_rate": 2.9545546069301336e-06, "loss": 0.014450795948505402, "step": 48710 }, { "epoch": 0.45849411764705883, "grad_norm": 0.6553628072927203, "learning_rate": 2.9544029754471605e-06, "loss": 0.01643882542848587, "step": 48715 }, { "epoch": 0.45854117647058823, "grad_norm": 0.46818627300575716, "learning_rate": 2.9542513673075506e-06, "loss": 0.015204326808452606, "step": 48720 }, { "epoch": 0.45858823529411763, "grad_norm": 0.49993313317453797, "learning_rate": 2.9540997825053145e-06, "loss": 0.023181343078613283, "step": 48725 }, { "epoch": 0.4586352941176471, "grad_norm": 0.4691975112719832, "learning_rate": 2.9539482210344667e-06, "loss": 0.019962599873542784, "step": 48730 }, { "epoch": 0.4586823529411765, "grad_norm": 0.8541014415757228, "learning_rate": 2.953796682889022e-06, "loss": 0.021660515666007997, "step": 48735 }, { "epoch": 0.4587294117647059, "grad_norm": 0.7642055841925496, "learning_rate": 2.9536451680629967e-06, "loss": 0.021358418464660644, "step": 48740 }, { "epoch": 0.4587764705882353, "grad_norm": 0.5955574505343959, "learning_rate": 2.9534936765504123e-06, "loss": 0.01670677065849304, "step": 48745 }, { "epoch": 0.4588235294117647, "grad_norm": 0.6392166496720618, "learning_rate": 2.953342208345291e-06, "loss": 0.019759993255138397, "step": 48750 }, { "epoch": 0.45887058823529414, "grad_norm": 0.5493691247078203, "learning_rate": 2.9531907634416557e-06, "loss": 0.017806172370910645, "step": 48755 }, { "epoch": 0.45891764705882354, "grad_norm": 0.6164898700549283, "learning_rate": 2.9530393418335323e-06, "loss": 0.017258983850479127, "step": 48760 }, { "epoch": 0.45896470588235294, "grad_norm": 0.39740608363009566, "learning_rate": 2.9528879435149504e-06, "loss": 0.0207108736038208, "step": 48765 }, { "epoch": 0.45901176470588234, "grad_norm": 0.4699450313958163, "learning_rate": 2.9527365684799397e-06, "loss": 0.016757789254188537, "step": 48770 }, { "epoch": 0.45905882352941174, "grad_norm": 0.4011149442459342, "learning_rate": 2.952585216722533e-06, "loss": 0.017065295577049257, "step": 48775 }, { "epoch": 0.4591058823529412, "grad_norm": 0.6210528212586587, "learning_rate": 2.952433888236766e-06, "loss": 0.023823854327201844, "step": 48780 }, { "epoch": 0.4591529411764706, "grad_norm": 0.3361075878646636, "learning_rate": 2.952282583016674e-06, "loss": 0.02002509534358978, "step": 48785 }, { "epoch": 0.4592, "grad_norm": 0.7696973223205478, "learning_rate": 2.9521313010562964e-06, "loss": 0.020011743903160094, "step": 48790 }, { "epoch": 0.4592470588235294, "grad_norm": 0.6754774756263123, "learning_rate": 2.951980042349675e-06, "loss": 0.021186131238937377, "step": 48795 }, { "epoch": 0.4592941176470588, "grad_norm": 0.6111921155219063, "learning_rate": 2.951828806890854e-06, "loss": 0.017046666145324706, "step": 48800 }, { "epoch": 0.45934117647058825, "grad_norm": 0.6514504013140459, "learning_rate": 2.951677594673877e-06, "loss": 0.02018304467201233, "step": 48805 }, { "epoch": 0.45938823529411765, "grad_norm": 0.7708780064718121, "learning_rate": 2.951526405692793e-06, "loss": 0.019432665407657625, "step": 48810 }, { "epoch": 0.45943529411764705, "grad_norm": 0.713963243677372, "learning_rate": 2.9513752399416502e-06, "loss": 0.02501806616783142, "step": 48815 }, { "epoch": 0.45948235294117645, "grad_norm": 0.5502549934344949, "learning_rate": 2.9512240974145018e-06, "loss": 0.021979884803295137, "step": 48820 }, { "epoch": 0.4595294117647059, "grad_norm": 0.5155773205420613, "learning_rate": 2.9510729781054014e-06, "loss": 0.01901586353778839, "step": 48825 }, { "epoch": 0.4595764705882353, "grad_norm": 0.49052686126677597, "learning_rate": 2.9509218820084055e-06, "loss": 0.018975254893302918, "step": 48830 }, { "epoch": 0.4596235294117647, "grad_norm": 0.4501104255720517, "learning_rate": 2.950770809117572e-06, "loss": 0.01730058789253235, "step": 48835 }, { "epoch": 0.4596705882352941, "grad_norm": 0.5507584306513325, "learning_rate": 2.9506197594269613e-06, "loss": 0.016651690006256104, "step": 48840 }, { "epoch": 0.4597176470588235, "grad_norm": 0.4620670254519424, "learning_rate": 2.950468732930636e-06, "loss": 0.014828836917877198, "step": 48845 }, { "epoch": 0.45976470588235296, "grad_norm": 1.1083487836221904, "learning_rate": 2.9503177296226605e-06, "loss": 0.02181880474090576, "step": 48850 }, { "epoch": 0.45981176470588236, "grad_norm": 0.5319889275906237, "learning_rate": 2.950166749497102e-06, "loss": 0.016590949892997742, "step": 48855 }, { "epoch": 0.45985882352941176, "grad_norm": 0.6204181215002856, "learning_rate": 2.950015792548029e-06, "loss": 0.01494123637676239, "step": 48860 }, { "epoch": 0.45990588235294116, "grad_norm": 0.4984062399576716, "learning_rate": 2.9498648587695126e-06, "loss": 0.018327364325523378, "step": 48865 }, { "epoch": 0.45995294117647056, "grad_norm": 0.45443890928991937, "learning_rate": 2.9497139481556263e-06, "loss": 0.022232401371002197, "step": 48870 }, { "epoch": 0.46, "grad_norm": 0.5762951979413163, "learning_rate": 2.9495630607004445e-06, "loss": 0.025685572624206544, "step": 48875 }, { "epoch": 0.4600470588235294, "grad_norm": 0.33669886630121026, "learning_rate": 2.949412196398046e-06, "loss": 0.015309493243694305, "step": 48880 }, { "epoch": 0.4600941176470588, "grad_norm": 0.46581243843229064, "learning_rate": 2.9492613552425093e-06, "loss": 0.017285919189453124, "step": 48885 }, { "epoch": 0.4601411764705882, "grad_norm": 0.35187615126576516, "learning_rate": 2.949110537227916e-06, "loss": 0.018313467502593994, "step": 48890 }, { "epoch": 0.4601882352941176, "grad_norm": 0.40426696448560456, "learning_rate": 2.9489597423483503e-06, "loss": 0.02187931090593338, "step": 48895 }, { "epoch": 0.4602352941176471, "grad_norm": 0.5006607842409211, "learning_rate": 2.9488089705978974e-06, "loss": 0.015590709447860718, "step": 48900 }, { "epoch": 0.4602823529411765, "grad_norm": 0.583635314915505, "learning_rate": 2.9486582219706466e-06, "loss": 0.01767575740814209, "step": 48905 }, { "epoch": 0.4603294117647059, "grad_norm": 0.6845318765603862, "learning_rate": 2.9485074964606863e-06, "loss": 0.020084089040756224, "step": 48910 }, { "epoch": 0.4603764705882353, "grad_norm": 0.5210585898957335, "learning_rate": 2.94835679406211e-06, "loss": 0.02307666540145874, "step": 48915 }, { "epoch": 0.46042352941176473, "grad_norm": 0.5213842883313002, "learning_rate": 2.948206114769012e-06, "loss": 0.01954820454120636, "step": 48920 }, { "epoch": 0.46047058823529413, "grad_norm": 0.4397390707636191, "learning_rate": 2.948055458575488e-06, "loss": 0.013985280692577363, "step": 48925 }, { "epoch": 0.46051764705882353, "grad_norm": 0.6968583650372595, "learning_rate": 2.947904825475636e-06, "loss": 0.02085593342781067, "step": 48930 }, { "epoch": 0.46056470588235293, "grad_norm": 0.5883045988323026, "learning_rate": 2.9477542154635587e-06, "loss": 0.02168208509683609, "step": 48935 }, { "epoch": 0.46061176470588233, "grad_norm": 0.5202008125230716, "learning_rate": 2.947603628533357e-06, "loss": 0.015430045127868653, "step": 48940 }, { "epoch": 0.4606588235294118, "grad_norm": 0.4708856686491524, "learning_rate": 2.9474530646791368e-06, "loss": 0.02123645842075348, "step": 48945 }, { "epoch": 0.4607058823529412, "grad_norm": 0.45654480245946955, "learning_rate": 2.9473025238950047e-06, "loss": 0.013325119018554687, "step": 48950 }, { "epoch": 0.4607529411764706, "grad_norm": 0.640231863704859, "learning_rate": 2.9471520061750695e-06, "loss": 0.01979425400495529, "step": 48955 }, { "epoch": 0.4608, "grad_norm": 0.8331615722882907, "learning_rate": 2.947001511513443e-06, "loss": 0.0190990686416626, "step": 48960 }, { "epoch": 0.4608470588235294, "grad_norm": 0.5234414561949567, "learning_rate": 2.9468510399042387e-06, "loss": 0.013946539163589478, "step": 48965 }, { "epoch": 0.46089411764705884, "grad_norm": 0.3239756428904435, "learning_rate": 2.946700591341571e-06, "loss": 0.019975697994232176, "step": 48970 }, { "epoch": 0.46094117647058824, "grad_norm": 0.524096823005412, "learning_rate": 2.9465501658195584e-06, "loss": 0.016865769028663637, "step": 48975 }, { "epoch": 0.46098823529411764, "grad_norm": 0.4053173791654897, "learning_rate": 2.9463997633323198e-06, "loss": 0.020462548732757567, "step": 48980 }, { "epoch": 0.46103529411764704, "grad_norm": 0.5499108302046904, "learning_rate": 2.946249383873977e-06, "loss": 0.02569498121738434, "step": 48985 }, { "epoch": 0.46108235294117644, "grad_norm": 0.8986739667181516, "learning_rate": 2.9460990274386547e-06, "loss": 0.013182958960533142, "step": 48990 }, { "epoch": 0.4611294117647059, "grad_norm": 0.6636197926360421, "learning_rate": 2.9459486940204773e-06, "loss": 0.0179168239235878, "step": 48995 }, { "epoch": 0.4611764705882353, "grad_norm": 0.5971349442077695, "learning_rate": 2.945798383613574e-06, "loss": 0.018017162382602692, "step": 49000 }, { "epoch": 0.4612235294117647, "grad_norm": 0.4440702932889133, "learning_rate": 2.945648096212075e-06, "loss": 0.018466010689735413, "step": 49005 }, { "epoch": 0.4612705882352941, "grad_norm": 0.7184544443771848, "learning_rate": 2.9454978318101115e-06, "loss": 0.01954086571931839, "step": 49010 }, { "epoch": 0.46131764705882355, "grad_norm": 0.6229466324214651, "learning_rate": 2.945347590401818e-06, "loss": 0.017730581760406493, "step": 49015 }, { "epoch": 0.46136470588235295, "grad_norm": 1.395246535003392, "learning_rate": 2.9451973719813322e-06, "loss": 0.018185794353485107, "step": 49020 }, { "epoch": 0.46141176470588235, "grad_norm": 0.5015157782544921, "learning_rate": 2.9450471765427908e-06, "loss": 0.022666031122207643, "step": 49025 }, { "epoch": 0.46145882352941175, "grad_norm": 0.4228007256865156, "learning_rate": 2.9448970040803352e-06, "loss": 0.019823217391967775, "step": 49030 }, { "epoch": 0.46150588235294115, "grad_norm": 0.3242425955892193, "learning_rate": 2.944746854588109e-06, "loss": 0.018292896449565887, "step": 49035 }, { "epoch": 0.4615529411764706, "grad_norm": 0.4919929437808084, "learning_rate": 2.9445967280602555e-06, "loss": 0.018690893054008485, "step": 49040 }, { "epoch": 0.4616, "grad_norm": 0.5010450200193318, "learning_rate": 2.9444466244909215e-06, "loss": 0.02081523835659027, "step": 49045 }, { "epoch": 0.4616470588235294, "grad_norm": 0.5515880091133387, "learning_rate": 2.944296543874257e-06, "loss": 0.019831711053848268, "step": 49050 }, { "epoch": 0.4616941176470588, "grad_norm": 0.4851275354285051, "learning_rate": 2.944146486204413e-06, "loss": 0.017821061611175536, "step": 49055 }, { "epoch": 0.4617411764705882, "grad_norm": 0.4963733157742374, "learning_rate": 2.9439964514755413e-06, "loss": 0.01871362030506134, "step": 49060 }, { "epoch": 0.46178823529411767, "grad_norm": 0.4544177075452495, "learning_rate": 2.9438464396817985e-06, "loss": 0.018380984663963318, "step": 49065 }, { "epoch": 0.46183529411764707, "grad_norm": 0.7612160741910446, "learning_rate": 2.9436964508173405e-06, "loss": 0.023859408497810364, "step": 49070 }, { "epoch": 0.46188235294117647, "grad_norm": 0.705476627419819, "learning_rate": 2.9435464848763283e-06, "loss": 0.03270023167133331, "step": 49075 }, { "epoch": 0.46192941176470587, "grad_norm": 0.4659890108115456, "learning_rate": 2.943396541852922e-06, "loss": 0.02088947147130966, "step": 49080 }, { "epoch": 0.4619764705882353, "grad_norm": 0.4199825683159019, "learning_rate": 2.9432466217412865e-06, "loss": 0.01544485092163086, "step": 49085 }, { "epoch": 0.4620235294117647, "grad_norm": 0.5158234411563113, "learning_rate": 2.9430967245355855e-06, "loss": 0.024082447588443755, "step": 49090 }, { "epoch": 0.4620705882352941, "grad_norm": 0.4955481472361008, "learning_rate": 2.9429468502299886e-06, "loss": 0.017973220348358153, "step": 49095 }, { "epoch": 0.4621176470588235, "grad_norm": 0.5175286741955143, "learning_rate": 2.942796998818664e-06, "loss": 0.02027503550052643, "step": 49100 }, { "epoch": 0.4621647058823529, "grad_norm": 0.5068438841076426, "learning_rate": 2.942647170295785e-06, "loss": 0.016393613815307618, "step": 49105 }, { "epoch": 0.4622117647058824, "grad_norm": 0.4877926078988353, "learning_rate": 2.942497364655524e-06, "loss": 0.018745297193527223, "step": 49110 }, { "epoch": 0.4622588235294118, "grad_norm": 0.4574533128257326, "learning_rate": 2.942347581892059e-06, "loss": 0.01392717957496643, "step": 49115 }, { "epoch": 0.4623058823529412, "grad_norm": 0.45942141804497044, "learning_rate": 2.942197821999566e-06, "loss": 0.016602000594139098, "step": 49120 }, { "epoch": 0.4623529411764706, "grad_norm": 0.5417620775496373, "learning_rate": 2.942048084972226e-06, "loss": 0.018736404180526734, "step": 49125 }, { "epoch": 0.4624, "grad_norm": 0.43489721583904356, "learning_rate": 2.941898370804221e-06, "loss": 0.017084848880767823, "step": 49130 }, { "epoch": 0.46244705882352943, "grad_norm": 0.2424227319571002, "learning_rate": 2.941748679489736e-06, "loss": 0.01731787919998169, "step": 49135 }, { "epoch": 0.46249411764705883, "grad_norm": 0.6477376088846154, "learning_rate": 2.9415990110229565e-06, "loss": 0.0204420804977417, "step": 49140 }, { "epoch": 0.46254117647058823, "grad_norm": 0.6488258693159049, "learning_rate": 2.9414493653980713e-06, "loss": 0.01819174289703369, "step": 49145 }, { "epoch": 0.46258823529411763, "grad_norm": 0.5767103909621779, "learning_rate": 2.9412997426092715e-06, "loss": 0.014715611934661865, "step": 49150 }, { "epoch": 0.46263529411764703, "grad_norm": 0.3215430826996934, "learning_rate": 2.9411501426507484e-06, "loss": 0.016203837096691133, "step": 49155 }, { "epoch": 0.4626823529411765, "grad_norm": 0.49191768342568815, "learning_rate": 2.941000565516698e-06, "loss": 0.013555347919464111, "step": 49160 }, { "epoch": 0.4627294117647059, "grad_norm": 0.5374240841036959, "learning_rate": 2.9408510112013155e-06, "loss": 0.0209355428814888, "step": 49165 }, { "epoch": 0.4627764705882353, "grad_norm": 0.41595618025170367, "learning_rate": 2.9407014796988014e-06, "loss": 0.020375680923461915, "step": 49170 }, { "epoch": 0.4628235294117647, "grad_norm": 0.6051377481143897, "learning_rate": 2.9405519710033553e-06, "loss": 0.022073978185653688, "step": 49175 }, { "epoch": 0.46287058823529414, "grad_norm": 0.47525570106886283, "learning_rate": 2.9404024851091807e-06, "loss": 0.016947075724601746, "step": 49180 }, { "epoch": 0.46291764705882354, "grad_norm": 0.7852464311710158, "learning_rate": 2.9402530220104817e-06, "loss": 0.01975988447666168, "step": 49185 }, { "epoch": 0.46296470588235294, "grad_norm": 0.679456788490381, "learning_rate": 2.9401035817014667e-06, "loss": 0.01852273941040039, "step": 49190 }, { "epoch": 0.46301176470588234, "grad_norm": 0.6520010834119369, "learning_rate": 2.9399541641763433e-06, "loss": 0.0181901752948761, "step": 49195 }, { "epoch": 0.46305882352941174, "grad_norm": 0.38849999757419923, "learning_rate": 2.9398047694293238e-06, "loss": 0.017105868458747862, "step": 49200 }, { "epoch": 0.4631058823529412, "grad_norm": 0.7977987472368496, "learning_rate": 2.939655397454621e-06, "loss": 0.022086499631404875, "step": 49205 }, { "epoch": 0.4631529411764706, "grad_norm": 0.434537958682162, "learning_rate": 2.93950604824645e-06, "loss": 0.016034872829914094, "step": 49210 }, { "epoch": 0.4632, "grad_norm": 0.4305706530205568, "learning_rate": 2.9393567217990284e-06, "loss": 0.01972614973783493, "step": 49215 }, { "epoch": 0.4632470588235294, "grad_norm": 0.34170021669664363, "learning_rate": 2.939207418106575e-06, "loss": 0.017315953969955444, "step": 49220 }, { "epoch": 0.4632941176470588, "grad_norm": 0.9115740249546448, "learning_rate": 2.9390581371633114e-06, "loss": 0.01781800091266632, "step": 49225 }, { "epoch": 0.46334117647058826, "grad_norm": 0.463245026724174, "learning_rate": 2.9389088789634616e-06, "loss": 0.016712456941604614, "step": 49230 }, { "epoch": 0.46338823529411766, "grad_norm": 0.46183566558860156, "learning_rate": 2.9387596435012516e-06, "loss": 0.020303040742874146, "step": 49235 }, { "epoch": 0.46343529411764706, "grad_norm": 0.6505042728611645, "learning_rate": 2.938610430770907e-06, "loss": 0.024274078011512757, "step": 49240 }, { "epoch": 0.46348235294117646, "grad_norm": 0.5688254166115004, "learning_rate": 2.93846124076666e-06, "loss": 0.01663280725479126, "step": 49245 }, { "epoch": 0.46352941176470586, "grad_norm": 0.37051816189768344, "learning_rate": 2.93831207348274e-06, "loss": 0.018968662619590758, "step": 49250 }, { "epoch": 0.4635764705882353, "grad_norm": 0.4257670502781986, "learning_rate": 2.938162928913383e-06, "loss": 0.018136271834373476, "step": 49255 }, { "epoch": 0.4636235294117647, "grad_norm": 0.5718574432881269, "learning_rate": 2.9380138070528225e-06, "loss": 0.018775510787963866, "step": 49260 }, { "epoch": 0.4636705882352941, "grad_norm": 0.47045851313023657, "learning_rate": 2.9378647078952976e-06, "loss": 0.017785090208053588, "step": 49265 }, { "epoch": 0.4637176470588235, "grad_norm": 0.4872707868276873, "learning_rate": 2.9377156314350475e-06, "loss": 0.021792113780975342, "step": 49270 }, { "epoch": 0.46376470588235297, "grad_norm": 0.580845304791274, "learning_rate": 2.9375665776663153e-06, "loss": 0.01935269683599472, "step": 49275 }, { "epoch": 0.46381176470588237, "grad_norm": 0.7448979205412402, "learning_rate": 2.937417546583344e-06, "loss": 0.019148197770118714, "step": 49280 }, { "epoch": 0.46385882352941177, "grad_norm": 0.5383467658450277, "learning_rate": 2.9372685381803793e-06, "loss": 0.019607040286064147, "step": 49285 }, { "epoch": 0.46390588235294117, "grad_norm": 0.36097956340837495, "learning_rate": 2.9371195524516706e-06, "loss": 0.017332501709461212, "step": 49290 }, { "epoch": 0.46395294117647057, "grad_norm": 0.5193002719966108, "learning_rate": 2.9369705893914672e-06, "loss": 0.02191545069217682, "step": 49295 }, { "epoch": 0.464, "grad_norm": 0.32715314752276875, "learning_rate": 2.936821648994021e-06, "loss": 0.01677544414997101, "step": 49300 }, { "epoch": 0.4640470588235294, "grad_norm": 0.6531228493575402, "learning_rate": 2.936672731253587e-06, "loss": 0.021874403953552245, "step": 49305 }, { "epoch": 0.4640941176470588, "grad_norm": 0.43638891262131857, "learning_rate": 2.93652383616442e-06, "loss": 0.015842241048812867, "step": 49310 }, { "epoch": 0.4641411764705882, "grad_norm": 0.5901252696873511, "learning_rate": 2.9363749637207796e-06, "loss": 0.018017053604125977, "step": 49315 }, { "epoch": 0.4641882352941176, "grad_norm": 0.6017029858919365, "learning_rate": 2.9362261139169256e-06, "loss": 0.017414119839668275, "step": 49320 }, { "epoch": 0.4642352941176471, "grad_norm": 0.6865801323913123, "learning_rate": 2.9360772867471205e-06, "loss": 0.017566724121570586, "step": 49325 }, { "epoch": 0.4642823529411765, "grad_norm": 0.6504813371427612, "learning_rate": 2.935928482205628e-06, "loss": 0.0176341712474823, "step": 49330 }, { "epoch": 0.4643294117647059, "grad_norm": 0.6194170444892793, "learning_rate": 2.935779700286716e-06, "loss": 0.016182079911231995, "step": 49335 }, { "epoch": 0.4643764705882353, "grad_norm": 0.41699097675876073, "learning_rate": 2.9356309409846516e-06, "loss": 0.014749568700790406, "step": 49340 }, { "epoch": 0.4644235294117647, "grad_norm": 0.5276665572907312, "learning_rate": 2.935482204293706e-06, "loss": 0.017161676287651063, "step": 49345 }, { "epoch": 0.46447058823529414, "grad_norm": 0.3825398545714966, "learning_rate": 2.935333490208151e-06, "loss": 0.018363901972770692, "step": 49350 }, { "epoch": 0.46451764705882354, "grad_norm": 0.5049661797497614, "learning_rate": 2.9351847987222616e-06, "loss": 0.01673901081085205, "step": 49355 }, { "epoch": 0.46456470588235294, "grad_norm": 0.449844904802672, "learning_rate": 2.935036129830314e-06, "loss": 0.01866903007030487, "step": 49360 }, { "epoch": 0.46461176470588234, "grad_norm": 0.5676836909965601, "learning_rate": 2.9348874835265878e-06, "loss": 0.017364843189716338, "step": 49365 }, { "epoch": 0.4646588235294118, "grad_norm": 0.6739765121872475, "learning_rate": 2.9347388598053624e-06, "loss": 0.021036137640476228, "step": 49370 }, { "epoch": 0.4647058823529412, "grad_norm": 0.7396408465593567, "learning_rate": 2.934590258660921e-06, "loss": 0.020383326709270476, "step": 49375 }, { "epoch": 0.4647529411764706, "grad_norm": 0.529431974889856, "learning_rate": 2.934441680087548e-06, "loss": 0.016919538378715515, "step": 49380 }, { "epoch": 0.4648, "grad_norm": 0.44603592572080314, "learning_rate": 2.934293124079531e-06, "loss": 0.01969994604587555, "step": 49385 }, { "epoch": 0.4648470588235294, "grad_norm": 0.7115077032699547, "learning_rate": 2.934144590631157e-06, "loss": 0.02301558554172516, "step": 49390 }, { "epoch": 0.46489411764705885, "grad_norm": 0.4709049931657765, "learning_rate": 2.933996079736719e-06, "loss": 0.01794876605272293, "step": 49395 }, { "epoch": 0.46494117647058825, "grad_norm": 0.5690862288553682, "learning_rate": 2.9338475913905077e-06, "loss": 0.020985320210456848, "step": 49400 }, { "epoch": 0.46498823529411765, "grad_norm": 0.4735446183877282, "learning_rate": 2.93369912558682e-06, "loss": 0.015185984969139098, "step": 49405 }, { "epoch": 0.46503529411764705, "grad_norm": 0.5101551377700303, "learning_rate": 2.9335506823199504e-06, "loss": 0.020506507158279418, "step": 49410 }, { "epoch": 0.46508235294117645, "grad_norm": 0.2813546246158876, "learning_rate": 2.933402261584199e-06, "loss": 0.017276710271835326, "step": 49415 }, { "epoch": 0.4651294117647059, "grad_norm": 0.4726026433809124, "learning_rate": 2.9332538633738668e-06, "loss": 0.018866288661956786, "step": 49420 }, { "epoch": 0.4651764705882353, "grad_norm": 0.46650813239520283, "learning_rate": 2.933105487683256e-06, "loss": 0.01632537543773651, "step": 49425 }, { "epoch": 0.4652235294117647, "grad_norm": 0.5793001658991468, "learning_rate": 2.932957134506672e-06, "loss": 0.020242154598236084, "step": 49430 }, { "epoch": 0.4652705882352941, "grad_norm": 0.6577542915994722, "learning_rate": 2.9328088038384215e-06, "loss": 0.018619942665100097, "step": 49435 }, { "epoch": 0.4653176470588235, "grad_norm": 0.528386142004992, "learning_rate": 2.9326604956728138e-06, "loss": 0.016054390370845793, "step": 49440 }, { "epoch": 0.46536470588235296, "grad_norm": 0.5864045374672217, "learning_rate": 2.9325122100041593e-06, "loss": 0.01834368109703064, "step": 49445 }, { "epoch": 0.46541176470588236, "grad_norm": 0.5333047625524333, "learning_rate": 2.932363946826771e-06, "loss": 0.02038256824016571, "step": 49450 }, { "epoch": 0.46545882352941176, "grad_norm": 0.6430873298154655, "learning_rate": 2.932215706134964e-06, "loss": 0.023288647830486297, "step": 49455 }, { "epoch": 0.46550588235294116, "grad_norm": 0.5391283118299259, "learning_rate": 2.932067487923056e-06, "loss": 0.016599768400192262, "step": 49460 }, { "epoch": 0.4655529411764706, "grad_norm": 0.5059907544472037, "learning_rate": 2.9319192921853646e-06, "loss": 0.018457946181297303, "step": 49465 }, { "epoch": 0.4656, "grad_norm": 0.7752654057307415, "learning_rate": 2.931771118916211e-06, "loss": 0.02291947603225708, "step": 49470 }, { "epoch": 0.4656470588235294, "grad_norm": 0.6582624097796758, "learning_rate": 2.9316229681099194e-06, "loss": 0.018046024441719054, "step": 49475 }, { "epoch": 0.4656941176470588, "grad_norm": 0.4928849894440569, "learning_rate": 2.931474839760814e-06, "loss": 0.016219648718833923, "step": 49480 }, { "epoch": 0.4657411764705882, "grad_norm": 0.5243593845387771, "learning_rate": 2.931326733863222e-06, "loss": 0.01438501924276352, "step": 49485 }, { "epoch": 0.46578823529411767, "grad_norm": 1.2428967188365727, "learning_rate": 2.931178650411472e-06, "loss": 0.025228604674339294, "step": 49490 }, { "epoch": 0.46583529411764707, "grad_norm": 0.600311470564356, "learning_rate": 2.931030589399895e-06, "loss": 0.02133094072341919, "step": 49495 }, { "epoch": 0.46588235294117647, "grad_norm": 0.37637204614054637, "learning_rate": 2.9308825508228258e-06, "loss": 0.01625431329011917, "step": 49500 }, { "epoch": 0.46592941176470587, "grad_norm": 0.48253341858800775, "learning_rate": 2.930734534674597e-06, "loss": 0.014607596397399902, "step": 49505 }, { "epoch": 0.46597647058823527, "grad_norm": 0.5825918795358674, "learning_rate": 2.9305865409495466e-06, "loss": 0.017838437855243684, "step": 49510 }, { "epoch": 0.4660235294117647, "grad_norm": 0.6053808287734354, "learning_rate": 2.930438569642014e-06, "loss": 0.02318723499774933, "step": 49515 }, { "epoch": 0.4660705882352941, "grad_norm": 0.4548927713009794, "learning_rate": 2.9302906207463394e-06, "loss": 0.01374937742948532, "step": 49520 }, { "epoch": 0.4661176470588235, "grad_norm": 0.6215958323898271, "learning_rate": 2.9301426942568667e-06, "loss": 0.018504735827445985, "step": 49525 }, { "epoch": 0.4661647058823529, "grad_norm": 0.6407314746584333, "learning_rate": 2.929994790167941e-06, "loss": 0.016623571515083313, "step": 49530 }, { "epoch": 0.4662117647058823, "grad_norm": 0.49148903485874745, "learning_rate": 2.9298469084739084e-06, "loss": 0.018907569348812103, "step": 49535 }, { "epoch": 0.4662588235294118, "grad_norm": 0.38257679731379707, "learning_rate": 2.9296990491691185e-06, "loss": 0.014117318391799926, "step": 49540 }, { "epoch": 0.4663058823529412, "grad_norm": 0.6541950203599716, "learning_rate": 2.9295512122479227e-06, "loss": 0.02579348683357239, "step": 49545 }, { "epoch": 0.4663529411764706, "grad_norm": 0.6199676227532054, "learning_rate": 2.9294033977046736e-06, "loss": 0.014190615713596344, "step": 49550 }, { "epoch": 0.4664, "grad_norm": 0.6950793488420782, "learning_rate": 2.9292556055337255e-06, "loss": 0.020385251939296724, "step": 49555 }, { "epoch": 0.46644705882352944, "grad_norm": 0.4421229260019973, "learning_rate": 2.9291078357294374e-06, "loss": 0.01882123202085495, "step": 49560 }, { "epoch": 0.46649411764705884, "grad_norm": 0.4886734001931936, "learning_rate": 2.9289600882861657e-06, "loss": 0.019084826111793518, "step": 49565 }, { "epoch": 0.46654117647058824, "grad_norm": 0.40343568420383186, "learning_rate": 2.9288123631982733e-06, "loss": 0.013107357919216156, "step": 49570 }, { "epoch": 0.46658823529411764, "grad_norm": 0.8413905048937586, "learning_rate": 2.9286646604601233e-06, "loss": 0.02100343406200409, "step": 49575 }, { "epoch": 0.46663529411764704, "grad_norm": 0.5980064954515047, "learning_rate": 2.928516980066079e-06, "loss": 0.01642358601093292, "step": 49580 }, { "epoch": 0.4666823529411765, "grad_norm": 0.6187375310477504, "learning_rate": 2.9283693220105092e-06, "loss": 0.016535937786102295, "step": 49585 }, { "epoch": 0.4667294117647059, "grad_norm": 0.475318471448734, "learning_rate": 2.9282216862877814e-06, "loss": 0.01974779963493347, "step": 49590 }, { "epoch": 0.4667764705882353, "grad_norm": 0.4933722168062447, "learning_rate": 2.928074072892268e-06, "loss": 0.027929258346557618, "step": 49595 }, { "epoch": 0.4668235294117647, "grad_norm": 0.4536609068600945, "learning_rate": 2.9279264818183407e-06, "loss": 0.016141256690025328, "step": 49600 }, { "epoch": 0.4668705882352941, "grad_norm": 0.44911684668866664, "learning_rate": 2.9277789130603746e-06, "loss": 0.017470116913318633, "step": 49605 }, { "epoch": 0.46691764705882355, "grad_norm": 0.5855366471011937, "learning_rate": 2.9276313666127464e-06, "loss": 0.023466646671295166, "step": 49610 }, { "epoch": 0.46696470588235295, "grad_norm": 0.49783257905106887, "learning_rate": 2.9274838424698364e-06, "loss": 0.01761094927787781, "step": 49615 }, { "epoch": 0.46701176470588235, "grad_norm": 0.4756724549548516, "learning_rate": 2.9273363406260243e-06, "loss": 0.022049924731254576, "step": 49620 }, { "epoch": 0.46705882352941175, "grad_norm": 0.4571809302821744, "learning_rate": 2.9271888610756932e-06, "loss": 0.014586219191551208, "step": 49625 }, { "epoch": 0.4671058823529412, "grad_norm": 0.8278664129725041, "learning_rate": 2.927041403813227e-06, "loss": 0.022027257084846496, "step": 49630 }, { "epoch": 0.4671529411764706, "grad_norm": 0.3668295171783547, "learning_rate": 2.9268939688330137e-06, "loss": 0.01727452278137207, "step": 49635 }, { "epoch": 0.4672, "grad_norm": 0.6577128829798946, "learning_rate": 2.926746556129442e-06, "loss": 0.018486224114894867, "step": 49640 }, { "epoch": 0.4672470588235294, "grad_norm": 0.3605712208232452, "learning_rate": 2.926599165696902e-06, "loss": 0.013794051110744476, "step": 49645 }, { "epoch": 0.4672941176470588, "grad_norm": 0.6756841253947734, "learning_rate": 2.9264517975297876e-06, "loss": 0.018371762335300447, "step": 49650 }, { "epoch": 0.46734117647058826, "grad_norm": 0.5789936461608853, "learning_rate": 2.926304451622492e-06, "loss": 0.01837468147277832, "step": 49655 }, { "epoch": 0.46738823529411766, "grad_norm": 0.580581718704694, "learning_rate": 2.9261571279694127e-06, "loss": 0.023688119649887086, "step": 49660 }, { "epoch": 0.46743529411764706, "grad_norm": 0.4802721323290267, "learning_rate": 2.9260098265649484e-06, "loss": 0.01789110004901886, "step": 49665 }, { "epoch": 0.46748235294117646, "grad_norm": 0.5698898324223666, "learning_rate": 2.9258625474035002e-06, "loss": 0.023653388023376465, "step": 49670 }, { "epoch": 0.46752941176470586, "grad_norm": 0.5675631768993102, "learning_rate": 2.9257152904794693e-06, "loss": 0.018960517644882203, "step": 49675 }, { "epoch": 0.4675764705882353, "grad_norm": 0.7995882925392932, "learning_rate": 2.925568055787261e-06, "loss": 0.019398523867130278, "step": 49680 }, { "epoch": 0.4676235294117647, "grad_norm": 0.5348140576818873, "learning_rate": 2.925420843321283e-06, "loss": 0.013214388489723205, "step": 49685 }, { "epoch": 0.4676705882352941, "grad_norm": 0.8337673741115346, "learning_rate": 2.925273653075942e-06, "loss": 0.023306258022785187, "step": 49690 }, { "epoch": 0.4677176470588235, "grad_norm": 0.3803467289838294, "learning_rate": 2.9251264850456496e-06, "loss": 0.017029422521591186, "step": 49695 }, { "epoch": 0.4677647058823529, "grad_norm": 0.6588750808653786, "learning_rate": 2.924979339224818e-06, "loss": 0.01617983877658844, "step": 49700 }, { "epoch": 0.46781176470588237, "grad_norm": 0.4684524890673321, "learning_rate": 2.9248322156078613e-06, "loss": 0.018401485681533814, "step": 49705 }, { "epoch": 0.46785882352941177, "grad_norm": 0.6049564500376493, "learning_rate": 2.9246851141891964e-06, "loss": 0.01867537796497345, "step": 49710 }, { "epoch": 0.46790588235294117, "grad_norm": 0.5936901427287662, "learning_rate": 2.9245380349632414e-06, "loss": 0.018100404739379884, "step": 49715 }, { "epoch": 0.46795294117647057, "grad_norm": 0.6724057532710893, "learning_rate": 2.9243909779244163e-06, "loss": 0.022020848095417024, "step": 49720 }, { "epoch": 0.468, "grad_norm": 0.4513590667151765, "learning_rate": 2.9242439430671445e-06, "loss": 0.012606817483901977, "step": 49725 }, { "epoch": 0.4680470588235294, "grad_norm": 0.5929212469002226, "learning_rate": 2.924096930385849e-06, "loss": 0.018560510873794556, "step": 49730 }, { "epoch": 0.4680941176470588, "grad_norm": 0.46497945105461364, "learning_rate": 2.9239499398749567e-06, "loss": 0.02034746706485748, "step": 49735 }, { "epoch": 0.4681411764705882, "grad_norm": 0.4576807461147305, "learning_rate": 2.923802971528895e-06, "loss": 0.014753937721252441, "step": 49740 }, { "epoch": 0.4681882352941176, "grad_norm": 0.6383902395855298, "learning_rate": 2.9236560253420955e-06, "loss": 0.02052272856235504, "step": 49745 }, { "epoch": 0.4682352941176471, "grad_norm": 0.5613366917549757, "learning_rate": 2.923509101308989e-06, "loss": 0.016509231925010682, "step": 49750 }, { "epoch": 0.4682823529411765, "grad_norm": 0.6178547231358302, "learning_rate": 2.9233621994240103e-06, "loss": 0.017563238739967346, "step": 49755 }, { "epoch": 0.4683294117647059, "grad_norm": 0.47322883084872397, "learning_rate": 2.9232153196815947e-06, "loss": 0.021511748433113098, "step": 49760 }, { "epoch": 0.4683764705882353, "grad_norm": 0.5541309901409059, "learning_rate": 2.9230684620761806e-06, "loss": 0.01701813340187073, "step": 49765 }, { "epoch": 0.4684235294117647, "grad_norm": 0.5364254169877442, "learning_rate": 2.922921626602208e-06, "loss": 0.018803611397743225, "step": 49770 }, { "epoch": 0.46847058823529414, "grad_norm": 0.5673502963245441, "learning_rate": 2.922774813254119e-06, "loss": 0.01907455027103424, "step": 49775 }, { "epoch": 0.46851764705882354, "grad_norm": 0.5012270435504566, "learning_rate": 2.9226280220263563e-06, "loss": 0.019304680824279784, "step": 49780 }, { "epoch": 0.46856470588235294, "grad_norm": 0.5410143580954304, "learning_rate": 2.922481252913367e-06, "loss": 0.016496965289115907, "step": 49785 }, { "epoch": 0.46861176470588234, "grad_norm": 0.3823501106767705, "learning_rate": 2.9223345059095983e-06, "loss": 0.015887543559074402, "step": 49790 }, { "epoch": 0.46865882352941174, "grad_norm": 0.5299994343250799, "learning_rate": 2.9221877810095e-06, "loss": 0.014512142539024353, "step": 49795 }, { "epoch": 0.4687058823529412, "grad_norm": 0.6187513955147215, "learning_rate": 2.922041078207523e-06, "loss": 0.019264230132102968, "step": 49800 }, { "epoch": 0.4687529411764706, "grad_norm": 0.4017469384040676, "learning_rate": 2.921894397498122e-06, "loss": 0.017119356989860536, "step": 49805 }, { "epoch": 0.4688, "grad_norm": 0.4419391604011463, "learning_rate": 2.921747738875752e-06, "loss": 0.019342687726020814, "step": 49810 }, { "epoch": 0.4688470588235294, "grad_norm": 0.5258806004311258, "learning_rate": 2.921601102334871e-06, "loss": 0.01790478080511093, "step": 49815 }, { "epoch": 0.46889411764705885, "grad_norm": 0.6527995936344358, "learning_rate": 2.9214544878699376e-06, "loss": 0.01753348410129547, "step": 49820 }, { "epoch": 0.46894117647058825, "grad_norm": 0.9494983172326867, "learning_rate": 2.9213078954754136e-06, "loss": 0.02274460643529892, "step": 49825 }, { "epoch": 0.46898823529411765, "grad_norm": 0.47499067172128234, "learning_rate": 2.921161325145762e-06, "loss": 0.022866247594356535, "step": 49830 }, { "epoch": 0.46903529411764705, "grad_norm": 0.6647064773287121, "learning_rate": 2.9210147768754487e-06, "loss": 0.027905791997909546, "step": 49835 }, { "epoch": 0.46908235294117645, "grad_norm": 0.4584685451247122, "learning_rate": 2.920868250658941e-06, "loss": 0.016565322875976562, "step": 49840 }, { "epoch": 0.4691294117647059, "grad_norm": 0.4664775051901706, "learning_rate": 2.9207217464907067e-06, "loss": 0.018371817469596863, "step": 49845 }, { "epoch": 0.4691764705882353, "grad_norm": 0.5311643391799361, "learning_rate": 2.920575264365218e-06, "loss": 0.014970070123672486, "step": 49850 }, { "epoch": 0.4692235294117647, "grad_norm": 0.4386736129975326, "learning_rate": 2.920428804276948e-06, "loss": 0.016517797112464906, "step": 49855 }, { "epoch": 0.4692705882352941, "grad_norm": 0.4335935216002794, "learning_rate": 2.9202823662203717e-06, "loss": 0.017598655819892884, "step": 49860 }, { "epoch": 0.4693176470588235, "grad_norm": 0.41977420276492994, "learning_rate": 2.920135950189965e-06, "loss": 0.016181951761245726, "step": 49865 }, { "epoch": 0.46936470588235296, "grad_norm": 0.7108223620976489, "learning_rate": 2.919989556180208e-06, "loss": 0.019143424928188324, "step": 49870 }, { "epoch": 0.46941176470588236, "grad_norm": 0.5139629854099528, "learning_rate": 2.919843184185581e-06, "loss": 0.013003763556480408, "step": 49875 }, { "epoch": 0.46945882352941176, "grad_norm": 0.5717123650551852, "learning_rate": 2.919696834200567e-06, "loss": 0.019635289907455444, "step": 49880 }, { "epoch": 0.46950588235294116, "grad_norm": 0.4038289852665006, "learning_rate": 2.9195505062196494e-06, "loss": 0.014927969872951507, "step": 49885 }, { "epoch": 0.46955294117647056, "grad_norm": 0.4037290719828494, "learning_rate": 2.919404200237317e-06, "loss": 0.022574372589588165, "step": 49890 }, { "epoch": 0.4696, "grad_norm": 0.6329071571286472, "learning_rate": 2.919257916248056e-06, "loss": 0.018738996982574464, "step": 49895 }, { "epoch": 0.4696470588235294, "grad_norm": 0.5857869664438349, "learning_rate": 2.919111654246359e-06, "loss": 0.014558400213718414, "step": 49900 }, { "epoch": 0.4696941176470588, "grad_norm": 0.4700603517586451, "learning_rate": 2.9189654142267173e-06, "loss": 0.018277710676193236, "step": 49905 }, { "epoch": 0.4697411764705882, "grad_norm": 0.4518887232439264, "learning_rate": 2.918819196183624e-06, "loss": 0.015100979804992675, "step": 49910 }, { "epoch": 0.4697882352941177, "grad_norm": 0.4317639843460732, "learning_rate": 2.9186730001115784e-06, "loss": 0.01877800077199936, "step": 49915 }, { "epoch": 0.4698352941176471, "grad_norm": 0.664463720336413, "learning_rate": 2.918526826005076e-06, "loss": 0.01716054379940033, "step": 49920 }, { "epoch": 0.4698823529411765, "grad_norm": 0.5809053501066557, "learning_rate": 2.9183806738586178e-06, "loss": 0.018716858327388765, "step": 49925 }, { "epoch": 0.4699294117647059, "grad_norm": 0.6525070178666134, "learning_rate": 2.918234543666706e-06, "loss": 0.016004832088947298, "step": 49930 }, { "epoch": 0.4699764705882353, "grad_norm": 0.3927601204772992, "learning_rate": 2.918088435423845e-06, "loss": 0.016838392615318297, "step": 49935 }, { "epoch": 0.47002352941176473, "grad_norm": 0.745351295201622, "learning_rate": 2.917942349124539e-06, "loss": 0.01781548261642456, "step": 49940 }, { "epoch": 0.47007058823529413, "grad_norm": 0.6327754330413473, "learning_rate": 2.9177962847632986e-06, "loss": 0.013816836476325988, "step": 49945 }, { "epoch": 0.47011764705882353, "grad_norm": 0.5345647413436874, "learning_rate": 2.9176502423346307e-06, "loss": 0.01936405897140503, "step": 49950 }, { "epoch": 0.47016470588235293, "grad_norm": 0.43011103884019863, "learning_rate": 2.9175042218330484e-06, "loss": 0.01887066662311554, "step": 49955 }, { "epoch": 0.47021176470588233, "grad_norm": 0.6880266347063194, "learning_rate": 2.9173582232530655e-06, "loss": 0.017553865909576416, "step": 49960 }, { "epoch": 0.4702588235294118, "grad_norm": 0.4931770242112109, "learning_rate": 2.9172122465891973e-06, "loss": 0.019717295467853547, "step": 49965 }, { "epoch": 0.4703058823529412, "grad_norm": 0.5457532221265781, "learning_rate": 2.9170662918359603e-06, "loss": 0.025653940439224244, "step": 49970 }, { "epoch": 0.4703529411764706, "grad_norm": 0.6569617698489265, "learning_rate": 2.9169203589878754e-06, "loss": 0.020021077990531922, "step": 49975 }, { "epoch": 0.4704, "grad_norm": 0.6147261621207234, "learning_rate": 2.9167744480394622e-06, "loss": 0.025109392404556275, "step": 49980 }, { "epoch": 0.4704470588235294, "grad_norm": 0.5172119097280009, "learning_rate": 2.9166285589852456e-06, "loss": 0.01723812222480774, "step": 49985 }, { "epoch": 0.47049411764705884, "grad_norm": 0.4409088508686949, "learning_rate": 2.9164826918197495e-06, "loss": 0.015040871500968934, "step": 49990 }, { "epoch": 0.47054117647058824, "grad_norm": 0.45711077072732775, "learning_rate": 2.9163368465375012e-06, "loss": 0.01568681001663208, "step": 49995 }, { "epoch": 0.47058823529411764, "grad_norm": 0.40304828047535934, "learning_rate": 2.91619102313303e-06, "loss": 0.019670401513576508, "step": 50000 }, { "epoch": 0.47063529411764704, "grad_norm": 0.8167771558033836, "learning_rate": 2.9160452216008667e-06, "loss": 0.018098515272140504, "step": 50005 }, { "epoch": 0.4706823529411765, "grad_norm": 0.44312312241854224, "learning_rate": 2.915899441935544e-06, "loss": 0.02062993049621582, "step": 50010 }, { "epoch": 0.4707294117647059, "grad_norm": 0.3624770792833036, "learning_rate": 2.9157536841315957e-06, "loss": 0.01811746507883072, "step": 50015 }, { "epoch": 0.4707764705882353, "grad_norm": 0.6604981491404227, "learning_rate": 2.9156079481835598e-06, "loss": 0.020115284621715544, "step": 50020 }, { "epoch": 0.4708235294117647, "grad_norm": 0.48108958762924214, "learning_rate": 2.915462234085974e-06, "loss": 0.01436736136674881, "step": 50025 }, { "epoch": 0.4708705882352941, "grad_norm": 0.4764305812666284, "learning_rate": 2.915316541833379e-06, "loss": 0.02221371829509735, "step": 50030 }, { "epoch": 0.47091764705882355, "grad_norm": 0.5461750282343008, "learning_rate": 2.9151708714203163e-06, "loss": 0.018348616361618043, "step": 50035 }, { "epoch": 0.47096470588235295, "grad_norm": 0.45239565420248196, "learning_rate": 2.915025222841332e-06, "loss": 0.017285402119159698, "step": 50040 }, { "epoch": 0.47101176470588235, "grad_norm": 0.608894958618745, "learning_rate": 2.9148795960909704e-06, "loss": 0.018771138787269593, "step": 50045 }, { "epoch": 0.47105882352941175, "grad_norm": 0.6206944812243885, "learning_rate": 2.91473399116378e-06, "loss": 0.02153090238571167, "step": 50050 }, { "epoch": 0.47110588235294115, "grad_norm": 0.6509023181013628, "learning_rate": 2.914588408054312e-06, "loss": 0.019149976968765258, "step": 50055 }, { "epoch": 0.4711529411764706, "grad_norm": 0.44152988939627874, "learning_rate": 2.9144428467571164e-06, "loss": 0.014085863530635834, "step": 50060 }, { "epoch": 0.4712, "grad_norm": 0.702029730973643, "learning_rate": 2.914297307266748e-06, "loss": 0.021606463193893432, "step": 50065 }, { "epoch": 0.4712470588235294, "grad_norm": 0.44556058692301226, "learning_rate": 2.9141517895777628e-06, "loss": 0.021717706322669984, "step": 50070 }, { "epoch": 0.4712941176470588, "grad_norm": 0.5546317336007376, "learning_rate": 2.9140062936847175e-06, "loss": 0.018271011114120484, "step": 50075 }, { "epoch": 0.4713411764705882, "grad_norm": 0.46138922177772534, "learning_rate": 2.9138608195821717e-06, "loss": 0.014912649989128113, "step": 50080 }, { "epoch": 0.47138823529411766, "grad_norm": 1.1351938787923421, "learning_rate": 2.9137153672646873e-06, "loss": 0.017894598841667175, "step": 50085 }, { "epoch": 0.47143529411764706, "grad_norm": 0.4388658153029646, "learning_rate": 2.9135699367268273e-06, "loss": 0.019045607745647432, "step": 50090 }, { "epoch": 0.47148235294117646, "grad_norm": 0.5638484464558791, "learning_rate": 2.913424527963157e-06, "loss": 0.017758908867835998, "step": 50095 }, { "epoch": 0.47152941176470586, "grad_norm": 0.4885618130703533, "learning_rate": 2.913279140968243e-06, "loss": 0.02357872426509857, "step": 50100 }, { "epoch": 0.4715764705882353, "grad_norm": 0.43968025009274747, "learning_rate": 2.9131337757366547e-06, "loss": 0.015961825847625732, "step": 50105 }, { "epoch": 0.4716235294117647, "grad_norm": 0.46986910548892985, "learning_rate": 2.912988432262963e-06, "loss": 0.019366271793842316, "step": 50110 }, { "epoch": 0.4716705882352941, "grad_norm": 0.5777744755653509, "learning_rate": 2.912843110541741e-06, "loss": 0.01936628669500351, "step": 50115 }, { "epoch": 0.4717176470588235, "grad_norm": 0.6641763126572667, "learning_rate": 2.9126978105675624e-06, "loss": 0.016520041227340698, "step": 50120 }, { "epoch": 0.4717647058823529, "grad_norm": 0.5986787742578135, "learning_rate": 2.912552532335004e-06, "loss": 0.020017576217651368, "step": 50125 }, { "epoch": 0.4718117647058824, "grad_norm": 0.5609222984485319, "learning_rate": 2.9124072758386453e-06, "loss": 0.01830998957157135, "step": 50130 }, { "epoch": 0.4718588235294118, "grad_norm": 0.4474676483729195, "learning_rate": 2.912262041073065e-06, "loss": 0.01930058002471924, "step": 50135 }, { "epoch": 0.4719058823529412, "grad_norm": 0.5356087834350826, "learning_rate": 2.912116828032847e-06, "loss": 0.022334212064743043, "step": 50140 }, { "epoch": 0.4719529411764706, "grad_norm": 0.4994014590449411, "learning_rate": 2.911971636712574e-06, "loss": 0.01716228127479553, "step": 50145 }, { "epoch": 0.472, "grad_norm": 0.5269887865217006, "learning_rate": 2.911826467106833e-06, "loss": 0.017195504903793336, "step": 50150 }, { "epoch": 0.47204705882352943, "grad_norm": 0.5546509965608546, "learning_rate": 2.9116813192102117e-06, "loss": 0.015173581242561341, "step": 50155 }, { "epoch": 0.47209411764705883, "grad_norm": 0.40116033775888754, "learning_rate": 2.9115361930172993e-06, "loss": 0.013042980432510376, "step": 50160 }, { "epoch": 0.47214117647058823, "grad_norm": 0.5469558282568466, "learning_rate": 2.9113910885226876e-06, "loss": 0.016423732042312622, "step": 50165 }, { "epoch": 0.47218823529411763, "grad_norm": 0.4131180653252582, "learning_rate": 2.9112460057209712e-06, "loss": 0.014839068055152893, "step": 50170 }, { "epoch": 0.4722352941176471, "grad_norm": 0.4787130135827261, "learning_rate": 2.9111009446067445e-06, "loss": 0.026813000440597534, "step": 50175 }, { "epoch": 0.4722823529411765, "grad_norm": 0.5421904294018269, "learning_rate": 2.910955905174605e-06, "loss": 0.017327475547790527, "step": 50180 }, { "epoch": 0.4723294117647059, "grad_norm": 0.52533306833641, "learning_rate": 2.910810887419152e-06, "loss": 0.018346732854843138, "step": 50185 }, { "epoch": 0.4723764705882353, "grad_norm": 0.6614929199156027, "learning_rate": 2.9106658913349867e-06, "loss": 0.021228787302970887, "step": 50190 }, { "epoch": 0.4724235294117647, "grad_norm": 0.6074604049555749, "learning_rate": 2.910520916916712e-06, "loss": 0.019217994809150696, "step": 50195 }, { "epoch": 0.47247058823529414, "grad_norm": 0.7375772546253617, "learning_rate": 2.9103759641589336e-06, "loss": 0.023287883400917052, "step": 50200 }, { "epoch": 0.47251764705882354, "grad_norm": 0.4032062476010554, "learning_rate": 2.9102310330562565e-06, "loss": 0.017149874567985536, "step": 50205 }, { "epoch": 0.47256470588235294, "grad_norm": 0.4496320472139608, "learning_rate": 2.910086123603291e-06, "loss": 0.015535029768943786, "step": 50210 }, { "epoch": 0.47261176470588234, "grad_norm": 0.46545728545953274, "learning_rate": 2.909941235794646e-06, "loss": 0.018785575032234193, "step": 50215 }, { "epoch": 0.47265882352941174, "grad_norm": 0.46063930634436095, "learning_rate": 2.909796369624935e-06, "loss": 0.0191192090511322, "step": 50220 }, { "epoch": 0.4727058823529412, "grad_norm": 0.41876092363985146, "learning_rate": 2.9096515250887725e-06, "loss": 0.01939181685447693, "step": 50225 }, { "epoch": 0.4727529411764706, "grad_norm": 0.41799189530608927, "learning_rate": 2.909506702180774e-06, "loss": 0.019280397891998292, "step": 50230 }, { "epoch": 0.4728, "grad_norm": 0.48910912622079883, "learning_rate": 2.9093619008955577e-06, "loss": 0.017952768504619597, "step": 50235 }, { "epoch": 0.4728470588235294, "grad_norm": 0.5034125924246681, "learning_rate": 2.9092171212277432e-06, "loss": 0.020630496740341186, "step": 50240 }, { "epoch": 0.4728941176470588, "grad_norm": 0.689002973078607, "learning_rate": 2.9090723631719524e-06, "loss": 0.016973009705543517, "step": 50245 }, { "epoch": 0.47294117647058825, "grad_norm": 0.4271808899360729, "learning_rate": 2.9089276267228095e-06, "loss": 0.022078007459640503, "step": 50250 }, { "epoch": 0.47298823529411765, "grad_norm": 0.4976924363078599, "learning_rate": 2.908782911874939e-06, "loss": 0.02898634076118469, "step": 50255 }, { "epoch": 0.47303529411764705, "grad_norm": 0.404115587979994, "learning_rate": 2.9086382186229694e-06, "loss": 0.01410975307226181, "step": 50260 }, { "epoch": 0.47308235294117645, "grad_norm": 0.6373560500445372, "learning_rate": 2.9084935469615292e-06, "loss": 0.015921801328659058, "step": 50265 }, { "epoch": 0.4731294117647059, "grad_norm": 1.9052286559179794, "learning_rate": 2.90834889688525e-06, "loss": 0.02115418016910553, "step": 50270 }, { "epoch": 0.4731764705882353, "grad_norm": 0.5657707203636282, "learning_rate": 2.9082042683887633e-06, "loss": 0.01903403103351593, "step": 50275 }, { "epoch": 0.4732235294117647, "grad_norm": 0.35975562784179455, "learning_rate": 2.908059661466706e-06, "loss": 0.01633523255586624, "step": 50280 }, { "epoch": 0.4732705882352941, "grad_norm": 0.45689795033347885, "learning_rate": 2.907915076113715e-06, "loss": 0.017148202657699584, "step": 50285 }, { "epoch": 0.4733176470588235, "grad_norm": 1.4512934881881103, "learning_rate": 2.9077705123244264e-06, "loss": 0.022641612589359282, "step": 50290 }, { "epoch": 0.47336470588235297, "grad_norm": 0.5908970969203742, "learning_rate": 2.9076259700934823e-06, "loss": 0.015000754594802856, "step": 50295 }, { "epoch": 0.47341176470588237, "grad_norm": 0.7603437429304981, "learning_rate": 2.907481449415525e-06, "loss": 0.017222392559051513, "step": 50300 }, { "epoch": 0.47345882352941177, "grad_norm": 0.6261902443238115, "learning_rate": 2.907336950285199e-06, "loss": 0.019470104575157167, "step": 50305 }, { "epoch": 0.47350588235294117, "grad_norm": 0.6690150780157826, "learning_rate": 2.907192472697149e-06, "loss": 0.019677376747131346, "step": 50310 }, { "epoch": 0.47355294117647057, "grad_norm": 0.40701866114432095, "learning_rate": 2.907048016646025e-06, "loss": 0.016119164228439332, "step": 50315 }, { "epoch": 0.4736, "grad_norm": 0.6400529756057834, "learning_rate": 2.906903582126474e-06, "loss": 0.017527014017105103, "step": 50320 }, { "epoch": 0.4736470588235294, "grad_norm": 0.4211613613056271, "learning_rate": 2.9067591691331496e-06, "loss": 0.022984494268894196, "step": 50325 }, { "epoch": 0.4736941176470588, "grad_norm": 0.6110371339907295, "learning_rate": 2.9066147776607056e-06, "loss": 0.015338091552257538, "step": 50330 }, { "epoch": 0.4737411764705882, "grad_norm": 0.46106107448138617, "learning_rate": 2.906470407703796e-06, "loss": 0.015151247382164001, "step": 50335 }, { "epoch": 0.4737882352941176, "grad_norm": 0.38225959025278083, "learning_rate": 2.906326059257079e-06, "loss": 0.017734462022781373, "step": 50340 }, { "epoch": 0.4738352941176471, "grad_norm": 1.3801696413208249, "learning_rate": 2.906181732315213e-06, "loss": 0.023148655891418457, "step": 50345 }, { "epoch": 0.4738823529411765, "grad_norm": 0.45157535381555525, "learning_rate": 2.9060374268728592e-06, "loss": 0.018750789761543273, "step": 50350 }, { "epoch": 0.4739294117647059, "grad_norm": 0.6296745543135285, "learning_rate": 2.9058931429246808e-06, "loss": 0.017362111806869508, "step": 50355 }, { "epoch": 0.4739764705882353, "grad_norm": 0.5656552462995978, "learning_rate": 2.905748880465341e-06, "loss": 0.017452114820480348, "step": 50360 }, { "epoch": 0.47402352941176473, "grad_norm": 0.5717657892865309, "learning_rate": 2.905604639489508e-06, "loss": 0.016860683262348176, "step": 50365 }, { "epoch": 0.47407058823529413, "grad_norm": 0.46563250612699375, "learning_rate": 2.9054604199918494e-06, "loss": 0.016084393858909606, "step": 50370 }, { "epoch": 0.47411764705882353, "grad_norm": 0.5696284342599495, "learning_rate": 2.905316221967035e-06, "loss": 0.01750798225402832, "step": 50375 }, { "epoch": 0.47416470588235293, "grad_norm": 1.1463649941452512, "learning_rate": 2.9051720454097377e-06, "loss": 0.016724927723407744, "step": 50380 }, { "epoch": 0.47421176470588233, "grad_norm": 0.6420068776002397, "learning_rate": 2.905027890314631e-06, "loss": 0.02338138073682785, "step": 50385 }, { "epoch": 0.4742588235294118, "grad_norm": 0.597209836137833, "learning_rate": 2.9048837566763904e-06, "loss": 0.01692189872264862, "step": 50390 }, { "epoch": 0.4743058823529412, "grad_norm": 0.650495554980778, "learning_rate": 2.9047396444896936e-06, "loss": 0.019433289766311646, "step": 50395 }, { "epoch": 0.4743529411764706, "grad_norm": 0.6204917299296949, "learning_rate": 2.9045955537492205e-06, "loss": 0.02493709623813629, "step": 50400 }, { "epoch": 0.4744, "grad_norm": 0.42428276017364447, "learning_rate": 2.9044514844496518e-06, "loss": 0.015836921334266663, "step": 50405 }, { "epoch": 0.4744470588235294, "grad_norm": 0.50163729253717, "learning_rate": 2.9043074365856705e-06, "loss": 0.01677587628364563, "step": 50410 }, { "epoch": 0.47449411764705884, "grad_norm": 0.6199908771099623, "learning_rate": 2.904163410151963e-06, "loss": 0.0243963286280632, "step": 50415 }, { "epoch": 0.47454117647058824, "grad_norm": 0.4513491180452007, "learning_rate": 2.9040194051432134e-06, "loss": 0.019706109166145326, "step": 50420 }, { "epoch": 0.47458823529411764, "grad_norm": 0.5073312165044036, "learning_rate": 2.9038754215541132e-06, "loss": 0.01500619500875473, "step": 50425 }, { "epoch": 0.47463529411764704, "grad_norm": 0.9266544001256602, "learning_rate": 2.903731459379351e-06, "loss": 0.01739400178194046, "step": 50430 }, { "epoch": 0.47468235294117644, "grad_norm": 0.39002608429140134, "learning_rate": 2.90358751861362e-06, "loss": 0.021528109908103943, "step": 50435 }, { "epoch": 0.4747294117647059, "grad_norm": 0.6188959116055937, "learning_rate": 2.9034435992516153e-06, "loss": 0.014780578017234803, "step": 50440 }, { "epoch": 0.4747764705882353, "grad_norm": 0.5341536298463084, "learning_rate": 2.903299701288031e-06, "loss": 0.019464045763015747, "step": 50445 }, { "epoch": 0.4748235294117647, "grad_norm": 0.44700140500072827, "learning_rate": 2.9031558247175655e-06, "loss": 0.015047729015350342, "step": 50450 }, { "epoch": 0.4748705882352941, "grad_norm": 0.4914843314143401, "learning_rate": 2.9030119695349196e-06, "loss": 0.01776829957962036, "step": 50455 }, { "epoch": 0.47491764705882356, "grad_norm": 0.2984513875487213, "learning_rate": 2.9028681357347938e-06, "loss": 0.018027415871620177, "step": 50460 }, { "epoch": 0.47496470588235296, "grad_norm": 0.6528763455297568, "learning_rate": 2.9027243233118924e-06, "loss": 0.019035470485687257, "step": 50465 }, { "epoch": 0.47501176470588236, "grad_norm": 0.4822535660289456, "learning_rate": 2.9025805322609197e-06, "loss": 0.016581957042217255, "step": 50470 }, { "epoch": 0.47505882352941176, "grad_norm": 0.6329320500393699, "learning_rate": 2.9024367625765826e-06, "loss": 0.022125983238220216, "step": 50475 }, { "epoch": 0.47510588235294116, "grad_norm": 0.4617954817635661, "learning_rate": 2.902293014253591e-06, "loss": 0.013893023133277893, "step": 50480 }, { "epoch": 0.4751529411764706, "grad_norm": 0.5263534667626253, "learning_rate": 2.9021492872866553e-06, "loss": 0.018391147255897522, "step": 50485 }, { "epoch": 0.4752, "grad_norm": 0.46571354228381723, "learning_rate": 2.9020055816704877e-06, "loss": 0.022625085711479188, "step": 50490 }, { "epoch": 0.4752470588235294, "grad_norm": 0.5251208108327541, "learning_rate": 2.9018618973998024e-06, "loss": 0.01754375398159027, "step": 50495 }, { "epoch": 0.4752941176470588, "grad_norm": 0.476997373606058, "learning_rate": 2.9017182344693168e-06, "loss": 0.01819658577442169, "step": 50500 }, { "epoch": 0.4753411764705882, "grad_norm": 0.7180429406079232, "learning_rate": 2.9015745928737476e-06, "loss": 0.024187399446964263, "step": 50505 }, { "epoch": 0.47538823529411767, "grad_norm": 0.6221919551150907, "learning_rate": 2.901430972607816e-06, "loss": 0.016930904984474183, "step": 50510 }, { "epoch": 0.47543529411764707, "grad_norm": 0.3648996058631724, "learning_rate": 2.901287373666243e-06, "loss": 0.016161298751831053, "step": 50515 }, { "epoch": 0.47548235294117647, "grad_norm": 0.685858924192483, "learning_rate": 2.9011437960437515e-06, "loss": 0.021600329875946046, "step": 50520 }, { "epoch": 0.47552941176470587, "grad_norm": 0.6011860016339461, "learning_rate": 2.9010002397350674e-06, "loss": 0.022346824407577515, "step": 50525 }, { "epoch": 0.47557647058823527, "grad_norm": 0.41958236184095593, "learning_rate": 2.9008567047349185e-06, "loss": 0.016671133041381837, "step": 50530 }, { "epoch": 0.4756235294117647, "grad_norm": 0.3393646031358896, "learning_rate": 2.900713191038033e-06, "loss": 0.020459407567977907, "step": 50535 }, { "epoch": 0.4756705882352941, "grad_norm": 0.7018030861385669, "learning_rate": 2.900569698639143e-06, "loss": 0.021090492606163025, "step": 50540 }, { "epoch": 0.4757176470588235, "grad_norm": 0.5009000510801855, "learning_rate": 2.900426227532979e-06, "loss": 0.013856568932533264, "step": 50545 }, { "epoch": 0.4757647058823529, "grad_norm": 0.550610007217635, "learning_rate": 2.9002827777142784e-06, "loss": 0.02303614467382431, "step": 50550 }, { "epoch": 0.4758117647058824, "grad_norm": 0.45108361085172183, "learning_rate": 2.9001393491777747e-06, "loss": 0.019954195618629454, "step": 50555 }, { "epoch": 0.4758588235294118, "grad_norm": 0.9861015165684044, "learning_rate": 2.8999959419182076e-06, "loss": 0.022183623909950257, "step": 50560 }, { "epoch": 0.4759058823529412, "grad_norm": 0.4981243930374833, "learning_rate": 2.899852555930317e-06, "loss": 0.01609579473733902, "step": 50565 }, { "epoch": 0.4759529411764706, "grad_norm": 0.5228513624117799, "learning_rate": 2.899709191208844e-06, "loss": 0.023491880297660826, "step": 50570 }, { "epoch": 0.476, "grad_norm": 0.39339624513243576, "learning_rate": 2.899565847748533e-06, "loss": 0.016861262917518615, "step": 50575 }, { "epoch": 0.47604705882352943, "grad_norm": 0.4759414524121818, "learning_rate": 2.899422525544129e-06, "loss": 0.016217008233070374, "step": 50580 }, { "epoch": 0.47609411764705883, "grad_norm": 0.673878565772855, "learning_rate": 2.8992792245903794e-06, "loss": 0.016128316521644592, "step": 50585 }, { "epoch": 0.47614117647058823, "grad_norm": 0.4285829304458213, "learning_rate": 2.8991359448820326e-06, "loss": 0.017104190587997437, "step": 50590 }, { "epoch": 0.47618823529411763, "grad_norm": 0.4228711519970968, "learning_rate": 2.898992686413841e-06, "loss": 0.018371222913265227, "step": 50595 }, { "epoch": 0.47623529411764703, "grad_norm": 0.6549940246328935, "learning_rate": 2.898849449180555e-06, "loss": 0.01911069601774216, "step": 50600 }, { "epoch": 0.4762823529411765, "grad_norm": 0.46040983912706795, "learning_rate": 2.8987062331769316e-06, "loss": 0.015118962526321411, "step": 50605 }, { "epoch": 0.4763294117647059, "grad_norm": 0.8546181007190311, "learning_rate": 2.8985630383977252e-06, "loss": 0.021929295361042024, "step": 50610 }, { "epoch": 0.4763764705882353, "grad_norm": 0.5043154728982704, "learning_rate": 2.8984198648376954e-06, "loss": 0.014767183363437653, "step": 50615 }, { "epoch": 0.4764235294117647, "grad_norm": 0.40198345335845137, "learning_rate": 2.898276712491601e-06, "loss": 0.022574733197689056, "step": 50620 }, { "epoch": 0.4764705882352941, "grad_norm": 0.5697998167248071, "learning_rate": 2.8981335813542046e-06, "loss": 0.015681903064250945, "step": 50625 }, { "epoch": 0.47651764705882355, "grad_norm": 0.7142113385214235, "learning_rate": 2.897990471420269e-06, "loss": 0.02412264347076416, "step": 50630 }, { "epoch": 0.47656470588235295, "grad_norm": 0.7688780474294027, "learning_rate": 2.89784738268456e-06, "loss": 0.025387266278266908, "step": 50635 }, { "epoch": 0.47661176470588235, "grad_norm": 0.6972576831347186, "learning_rate": 2.897704315141845e-06, "loss": 0.02107928693294525, "step": 50640 }, { "epoch": 0.47665882352941175, "grad_norm": 0.5980283908350796, "learning_rate": 2.897561268786892e-06, "loss": 0.016990524530410767, "step": 50645 }, { "epoch": 0.4767058823529412, "grad_norm": 0.5699362874226602, "learning_rate": 2.897418243614474e-06, "loss": 0.015656131505966186, "step": 50650 }, { "epoch": 0.4767529411764706, "grad_norm": 0.4720318589475238, "learning_rate": 2.8972752396193605e-06, "loss": 0.016237345337867738, "step": 50655 }, { "epoch": 0.4768, "grad_norm": 0.534897482022522, "learning_rate": 2.897132256796328e-06, "loss": 0.0170746386051178, "step": 50660 }, { "epoch": 0.4768470588235294, "grad_norm": 0.5656499507672754, "learning_rate": 2.896989295140153e-06, "loss": 0.019623236358165742, "step": 50665 }, { "epoch": 0.4768941176470588, "grad_norm": 0.42325318475544044, "learning_rate": 2.8968463546456125e-06, "loss": 0.017756274342536925, "step": 50670 }, { "epoch": 0.47694117647058826, "grad_norm": 0.5136620493195595, "learning_rate": 2.8967034353074865e-06, "loss": 0.016232064366340636, "step": 50675 }, { "epoch": 0.47698823529411766, "grad_norm": 0.43455977604353746, "learning_rate": 2.8965605371205576e-06, "loss": 0.014906935393810272, "step": 50680 }, { "epoch": 0.47703529411764706, "grad_norm": 0.45824264875055193, "learning_rate": 2.896417660079607e-06, "loss": 0.01955432742834091, "step": 50685 }, { "epoch": 0.47708235294117646, "grad_norm": 0.5836612765479653, "learning_rate": 2.896274804179422e-06, "loss": 0.0180867001414299, "step": 50690 }, { "epoch": 0.47712941176470586, "grad_norm": 0.6310857713570478, "learning_rate": 2.896131969414789e-06, "loss": 0.01882210820913315, "step": 50695 }, { "epoch": 0.4771764705882353, "grad_norm": 0.5706754179954524, "learning_rate": 2.8959891557804964e-06, "loss": 0.02173057198524475, "step": 50700 }, { "epoch": 0.4772235294117647, "grad_norm": 0.5286476646097777, "learning_rate": 2.8958463632713356e-06, "loss": 0.016006411612033845, "step": 50705 }, { "epoch": 0.4772705882352941, "grad_norm": 0.47243454230860243, "learning_rate": 2.8957035918820993e-06, "loss": 0.01882508546113968, "step": 50710 }, { "epoch": 0.4773176470588235, "grad_norm": 0.7389352466399435, "learning_rate": 2.89556084160758e-06, "loss": 0.018284255266189577, "step": 50715 }, { "epoch": 0.47736470588235297, "grad_norm": 0.5351781021124075, "learning_rate": 2.895418112442575e-06, "loss": 0.020602017641067505, "step": 50720 }, { "epoch": 0.47741176470588237, "grad_norm": 0.6726187011592395, "learning_rate": 2.8952754043818833e-06, "loss": 0.019577404856681822, "step": 50725 }, { "epoch": 0.47745882352941177, "grad_norm": 0.358128780426049, "learning_rate": 2.8951327174203016e-06, "loss": 0.011788131296634674, "step": 50730 }, { "epoch": 0.47750588235294117, "grad_norm": 0.4541330385949411, "learning_rate": 2.8949900515526338e-06, "loss": 0.018058255314826965, "step": 50735 }, { "epoch": 0.47755294117647057, "grad_norm": 0.5054362381963255, "learning_rate": 2.8948474067736816e-06, "loss": 0.02003030776977539, "step": 50740 }, { "epoch": 0.4776, "grad_norm": 0.7328814582422204, "learning_rate": 2.8947047830782503e-06, "loss": 0.019614461064338683, "step": 50745 }, { "epoch": 0.4776470588235294, "grad_norm": 0.5733706728860084, "learning_rate": 2.8945621804611474e-06, "loss": 0.020697304606437684, "step": 50750 }, { "epoch": 0.4776941176470588, "grad_norm": 0.3640657711603169, "learning_rate": 2.8944195989171816e-06, "loss": 0.01861986219882965, "step": 50755 }, { "epoch": 0.4777411764705882, "grad_norm": 0.4764676160184186, "learning_rate": 2.8942770384411613e-06, "loss": 0.0215249702334404, "step": 50760 }, { "epoch": 0.4777882352941176, "grad_norm": 0.4828355076497742, "learning_rate": 2.8941344990279015e-06, "loss": 0.015092815458774566, "step": 50765 }, { "epoch": 0.4778352941176471, "grad_norm": 0.5242958350729047, "learning_rate": 2.8939919806722133e-06, "loss": 0.020378178358078, "step": 50770 }, { "epoch": 0.4778823529411765, "grad_norm": 0.4754687336654767, "learning_rate": 2.8938494833689152e-06, "loss": 0.015705859661102294, "step": 50775 }, { "epoch": 0.4779294117647059, "grad_norm": 0.5817239933008528, "learning_rate": 2.893707007112822e-06, "loss": 0.0183866947889328, "step": 50780 }, { "epoch": 0.4779764705882353, "grad_norm": 0.32863791424873323, "learning_rate": 2.893564551898755e-06, "loss": 0.021554943919181824, "step": 50785 }, { "epoch": 0.4780235294117647, "grad_norm": 0.45965739070149847, "learning_rate": 2.893422117721534e-06, "loss": 0.0192018061876297, "step": 50790 }, { "epoch": 0.47807058823529414, "grad_norm": 0.5073573858035376, "learning_rate": 2.8932797045759836e-06, "loss": 0.017710596323013306, "step": 50795 }, { "epoch": 0.47811764705882354, "grad_norm": 0.4786588781736681, "learning_rate": 2.8931373124569265e-06, "loss": 0.014925509691238403, "step": 50800 }, { "epoch": 0.47816470588235294, "grad_norm": 0.43966752639867884, "learning_rate": 2.89299494135919e-06, "loss": 0.018106025457382203, "step": 50805 }, { "epoch": 0.47821176470588234, "grad_norm": 0.578292932246112, "learning_rate": 2.8928525912776027e-06, "loss": 0.015179094672203065, "step": 50810 }, { "epoch": 0.4782588235294118, "grad_norm": 0.5175987172376465, "learning_rate": 2.892710262206994e-06, "loss": 0.017405621707439423, "step": 50815 }, { "epoch": 0.4783058823529412, "grad_norm": 0.5432218561995258, "learning_rate": 2.8925679541421966e-06, "loss": 0.015300874412059785, "step": 50820 }, { "epoch": 0.4783529411764706, "grad_norm": 0.5086014199680795, "learning_rate": 2.892425667078042e-06, "loss": 0.01990886330604553, "step": 50825 }, { "epoch": 0.4784, "grad_norm": 0.38815256988576124, "learning_rate": 2.8922834010093674e-06, "loss": 0.018523016571998598, "step": 50830 }, { "epoch": 0.4784470588235294, "grad_norm": 0.5379097701769946, "learning_rate": 2.8921411559310096e-06, "loss": 0.019855776429176332, "step": 50835 }, { "epoch": 0.47849411764705885, "grad_norm": 0.7156616783563046, "learning_rate": 2.891998931837808e-06, "loss": 0.018978890776634217, "step": 50840 }, { "epoch": 0.47854117647058825, "grad_norm": 0.3833087255276265, "learning_rate": 2.8918567287246015e-06, "loss": 0.016266193985939027, "step": 50845 }, { "epoch": 0.47858823529411765, "grad_norm": 0.3702238218392476, "learning_rate": 2.891714546586234e-06, "loss": 0.01664859354496002, "step": 50850 }, { "epoch": 0.47863529411764705, "grad_norm": 0.5258728167947689, "learning_rate": 2.8915723854175488e-06, "loss": 0.01951155662536621, "step": 50855 }, { "epoch": 0.47868235294117645, "grad_norm": 0.5645089985304583, "learning_rate": 2.891430245213393e-06, "loss": 0.013705460727214814, "step": 50860 }, { "epoch": 0.4787294117647059, "grad_norm": 0.5075149442841819, "learning_rate": 2.8912881259686137e-06, "loss": 0.018771466612815858, "step": 50865 }, { "epoch": 0.4787764705882353, "grad_norm": 0.40321807453159364, "learning_rate": 2.89114602767806e-06, "loss": 0.019499309360980988, "step": 50870 }, { "epoch": 0.4788235294117647, "grad_norm": 0.9829962152046045, "learning_rate": 2.8910039503365843e-06, "loss": 0.016856080293655394, "step": 50875 }, { "epoch": 0.4788705882352941, "grad_norm": 0.5418344611326797, "learning_rate": 2.8908618939390396e-06, "loss": 0.018656224012374878, "step": 50880 }, { "epoch": 0.4789176470588235, "grad_norm": 0.691355806292401, "learning_rate": 2.89071985848028e-06, "loss": 0.01971304416656494, "step": 50885 }, { "epoch": 0.47896470588235296, "grad_norm": 0.3713085220649312, "learning_rate": 2.890577843955162e-06, "loss": 0.016566550731658934, "step": 50890 }, { "epoch": 0.47901176470588236, "grad_norm": 0.6236595763459155, "learning_rate": 2.8904358503585445e-06, "loss": 0.013684041798114777, "step": 50895 }, { "epoch": 0.47905882352941176, "grad_norm": 0.30150076330925435, "learning_rate": 2.890293877685288e-06, "loss": 0.01641574203968048, "step": 50900 }, { "epoch": 0.47910588235294116, "grad_norm": 0.4217543975068254, "learning_rate": 2.8901519259302542e-06, "loss": 0.015456539392471314, "step": 50905 }, { "epoch": 0.4791529411764706, "grad_norm": 0.5915919313406098, "learning_rate": 2.8900099950883055e-06, "loss": 0.02055371701717377, "step": 50910 }, { "epoch": 0.4792, "grad_norm": 0.6537445218726483, "learning_rate": 2.8898680851543093e-06, "loss": 0.01990380734205246, "step": 50915 }, { "epoch": 0.4792470588235294, "grad_norm": 0.49095715344341756, "learning_rate": 2.8897261961231317e-06, "loss": 0.01838841736316681, "step": 50920 }, { "epoch": 0.4792941176470588, "grad_norm": 0.7102730746500361, "learning_rate": 2.8895843279896418e-06, "loss": 0.018548431992530822, "step": 50925 }, { "epoch": 0.4793411764705882, "grad_norm": 0.4337127520188345, "learning_rate": 2.889442480748711e-06, "loss": 0.02040221095085144, "step": 50930 }, { "epoch": 0.47938823529411767, "grad_norm": 0.34079354835790926, "learning_rate": 2.889300654395211e-06, "loss": 0.01589806079864502, "step": 50935 }, { "epoch": 0.47943529411764707, "grad_norm": 0.5278676310533734, "learning_rate": 2.8891588489240164e-06, "loss": 0.01610579788684845, "step": 50940 }, { "epoch": 0.47948235294117647, "grad_norm": 0.6152164571049886, "learning_rate": 2.889017064330003e-06, "loss": 0.017912040650844573, "step": 50945 }, { "epoch": 0.47952941176470587, "grad_norm": 0.5656570294593569, "learning_rate": 2.8888753006080496e-06, "loss": 0.016667018830776214, "step": 50950 }, { "epoch": 0.47957647058823527, "grad_norm": 0.37719360356473486, "learning_rate": 2.8887335577530344e-06, "loss": 0.016358393430709838, "step": 50955 }, { "epoch": 0.4796235294117647, "grad_norm": 0.34598060751849813, "learning_rate": 2.8885918357598397e-06, "loss": 0.01665002405643463, "step": 50960 }, { "epoch": 0.4796705882352941, "grad_norm": 1.0966584598712712, "learning_rate": 2.8884501346233478e-06, "loss": 0.022662177681922913, "step": 50965 }, { "epoch": 0.4797176470588235, "grad_norm": 0.4439495503912539, "learning_rate": 2.888308454338444e-06, "loss": 0.021245703101158142, "step": 50970 }, { "epoch": 0.4797647058823529, "grad_norm": 0.5499218174075695, "learning_rate": 2.8881667949000146e-06, "loss": 0.018367360532283782, "step": 50975 }, { "epoch": 0.4798117647058823, "grad_norm": 0.345605694499681, "learning_rate": 2.8880251563029493e-06, "loss": 0.01832878440618515, "step": 50980 }, { "epoch": 0.4798588235294118, "grad_norm": 0.41739837571488947, "learning_rate": 2.8878835385421355e-06, "loss": 0.01566153168678284, "step": 50985 }, { "epoch": 0.4799058823529412, "grad_norm": 0.41802748855059935, "learning_rate": 2.8877419416124674e-06, "loss": 0.01333269029855728, "step": 50990 }, { "epoch": 0.4799529411764706, "grad_norm": 0.5060753052600614, "learning_rate": 2.8876003655088373e-06, "loss": 0.017436747252941132, "step": 50995 }, { "epoch": 0.48, "grad_norm": 0.41588415726036215, "learning_rate": 2.887458810226142e-06, "loss": 0.013533146679401397, "step": 51000 }, { "epoch": 0.48004705882352944, "grad_norm": 0.5971531499939038, "learning_rate": 2.8873172757592765e-06, "loss": 0.018147923052310944, "step": 51005 }, { "epoch": 0.48009411764705884, "grad_norm": 0.631997881483661, "learning_rate": 2.887175762103141e-06, "loss": 0.017352135479450227, "step": 51010 }, { "epoch": 0.48014117647058824, "grad_norm": 1.2281794759409768, "learning_rate": 2.8870342692526366e-06, "loss": 0.01750491261482239, "step": 51015 }, { "epoch": 0.48018823529411764, "grad_norm": 0.5126907717776812, "learning_rate": 2.8868927972026644e-06, "loss": 0.01763608455657959, "step": 51020 }, { "epoch": 0.48023529411764704, "grad_norm": 0.4681580734412991, "learning_rate": 2.8867513459481293e-06, "loss": 0.02256532907485962, "step": 51025 }, { "epoch": 0.4802823529411765, "grad_norm": 0.7793377504933109, "learning_rate": 2.8866099154839367e-06, "loss": 0.026289421319961547, "step": 51030 }, { "epoch": 0.4803294117647059, "grad_norm": 0.498729572640127, "learning_rate": 2.8864685058049945e-06, "loss": 0.017174825072288513, "step": 51035 }, { "epoch": 0.4803764705882353, "grad_norm": 0.7541404377644731, "learning_rate": 2.8863271169062112e-06, "loss": 0.019345426559448244, "step": 51040 }, { "epoch": 0.4804235294117647, "grad_norm": 0.5398617872883761, "learning_rate": 2.8861857487825e-06, "loss": 0.019594524800777436, "step": 51045 }, { "epoch": 0.4804705882352941, "grad_norm": 0.5279772796330984, "learning_rate": 2.8860444014287715e-06, "loss": 0.017647553980350495, "step": 51050 }, { "epoch": 0.48051764705882355, "grad_norm": 0.684239640494421, "learning_rate": 2.8859030748399413e-06, "loss": 0.01922139823436737, "step": 51055 }, { "epoch": 0.48056470588235295, "grad_norm": 0.5725042803749505, "learning_rate": 2.8857617690109255e-06, "loss": 0.019501715898513794, "step": 51060 }, { "epoch": 0.48061176470588235, "grad_norm": 0.4900536601928716, "learning_rate": 2.8856204839366425e-06, "loss": 0.018449726700782775, "step": 51065 }, { "epoch": 0.48065882352941175, "grad_norm": 0.49419473286531107, "learning_rate": 2.8854792196120116e-06, "loss": 0.01660289168357849, "step": 51070 }, { "epoch": 0.48070588235294115, "grad_norm": 0.7749958307818573, "learning_rate": 2.885337976031955e-06, "loss": 0.02251783311367035, "step": 51075 }, { "epoch": 0.4807529411764706, "grad_norm": 0.45750295481748826, "learning_rate": 2.8851967531913955e-06, "loss": 0.016299337148666382, "step": 51080 }, { "epoch": 0.4808, "grad_norm": 0.41613076503900387, "learning_rate": 2.885055551085258e-06, "loss": 0.021685567498207093, "step": 51085 }, { "epoch": 0.4808470588235294, "grad_norm": 0.28936627501077716, "learning_rate": 2.884914369708469e-06, "loss": 0.015585207939147949, "step": 51090 }, { "epoch": 0.4808941176470588, "grad_norm": 0.5552520800645566, "learning_rate": 2.8847732090559575e-06, "loss": 0.021657712757587433, "step": 51095 }, { "epoch": 0.48094117647058826, "grad_norm": 0.6107857971850631, "learning_rate": 2.8846320691226547e-06, "loss": 0.0189844086766243, "step": 51100 }, { "epoch": 0.48098823529411766, "grad_norm": 0.42116428047860804, "learning_rate": 2.884490949903491e-06, "loss": 0.017846474051475526, "step": 51105 }, { "epoch": 0.48103529411764706, "grad_norm": 0.7014793301592395, "learning_rate": 2.8843498513934004e-06, "loss": 0.01920374631881714, "step": 51110 }, { "epoch": 0.48108235294117646, "grad_norm": 0.39046552214097346, "learning_rate": 2.8842087735873185e-06, "loss": 0.01580439358949661, "step": 51115 }, { "epoch": 0.48112941176470586, "grad_norm": 0.5603318711454057, "learning_rate": 2.8840677164801832e-06, "loss": 0.013050925731658936, "step": 51120 }, { "epoch": 0.4811764705882353, "grad_norm": 0.492652909689426, "learning_rate": 2.8839266800669324e-06, "loss": 0.01575128436088562, "step": 51125 }, { "epoch": 0.4812235294117647, "grad_norm": 0.51263133421998, "learning_rate": 2.883785664342507e-06, "loss": 0.016718706488609313, "step": 51130 }, { "epoch": 0.4812705882352941, "grad_norm": 0.5065288319635031, "learning_rate": 2.88364466930185e-06, "loss": 0.018613912165164948, "step": 51135 }, { "epoch": 0.4813176470588235, "grad_norm": 0.49125110048873477, "learning_rate": 2.8835036949399047e-06, "loss": 0.016947665810585023, "step": 51140 }, { "epoch": 0.4813647058823529, "grad_norm": 0.37842879625669207, "learning_rate": 2.883362741251618e-06, "loss": 0.01399872601032257, "step": 51145 }, { "epoch": 0.4814117647058824, "grad_norm": 0.5932902058538608, "learning_rate": 2.883221808231936e-06, "loss": 0.019763949513435363, "step": 51150 }, { "epoch": 0.4814588235294118, "grad_norm": 0.45494543184482994, "learning_rate": 2.883080895875808e-06, "loss": 0.017842902243137358, "step": 51155 }, { "epoch": 0.4815058823529412, "grad_norm": 0.5471014048951587, "learning_rate": 2.8829400041781867e-06, "loss": 0.017692850530147554, "step": 51160 }, { "epoch": 0.4815529411764706, "grad_norm": 0.4650284052634461, "learning_rate": 2.882799133134024e-06, "loss": 0.017688646912574768, "step": 51165 }, { "epoch": 0.4816, "grad_norm": 0.5665747697734285, "learning_rate": 2.8826582827382744e-06, "loss": 0.016185271739959716, "step": 51170 }, { "epoch": 0.48164705882352943, "grad_norm": 0.5150383276682984, "learning_rate": 2.8825174529858935e-06, "loss": 0.019301559031009673, "step": 51175 }, { "epoch": 0.48169411764705883, "grad_norm": 0.5122949759967403, "learning_rate": 2.88237664387184e-06, "loss": 0.014776521921157837, "step": 51180 }, { "epoch": 0.48174117647058823, "grad_norm": 0.61884219722065, "learning_rate": 2.882235855391073e-06, "loss": 0.02002849280834198, "step": 51185 }, { "epoch": 0.48178823529411763, "grad_norm": 0.49168058319582775, "learning_rate": 2.882095087538554e-06, "loss": 0.016240864992141724, "step": 51190 }, { "epoch": 0.4818352941176471, "grad_norm": 0.6654901886301675, "learning_rate": 2.881954340309246e-06, "loss": 0.018522316217422487, "step": 51195 }, { "epoch": 0.4818823529411765, "grad_norm": 0.5755143926832372, "learning_rate": 2.8818136136981146e-06, "loss": 0.020030483603477478, "step": 51200 }, { "epoch": 0.4819294117647059, "grad_norm": 0.807811680267028, "learning_rate": 2.8816729077001256e-06, "loss": 0.02016511559486389, "step": 51205 }, { "epoch": 0.4819764705882353, "grad_norm": 0.5257074614541338, "learning_rate": 2.8815322223102466e-06, "loss": 0.017754602432250976, "step": 51210 }, { "epoch": 0.4820235294117647, "grad_norm": 0.5891718848508076, "learning_rate": 2.8813915575234492e-06, "loss": 0.02107924222946167, "step": 51215 }, { "epoch": 0.48207058823529414, "grad_norm": 0.5032371702103047, "learning_rate": 2.8812509133347038e-06, "loss": 0.019358643889427186, "step": 51220 }, { "epoch": 0.48211764705882354, "grad_norm": 0.45800588475224924, "learning_rate": 2.8811102897389848e-06, "loss": 0.019340163469314574, "step": 51225 }, { "epoch": 0.48216470588235294, "grad_norm": 0.5740496374327332, "learning_rate": 2.8809696867312664e-06, "loss": 0.018021902441978453, "step": 51230 }, { "epoch": 0.48221176470588234, "grad_norm": 0.39734599445697344, "learning_rate": 2.8808291043065265e-06, "loss": 0.016122810542583466, "step": 51235 }, { "epoch": 0.48225882352941174, "grad_norm": 0.5893811214399051, "learning_rate": 2.8806885424597423e-06, "loss": 0.019675323367118837, "step": 51240 }, { "epoch": 0.4823058823529412, "grad_norm": 0.712619584162423, "learning_rate": 2.8805480011858955e-06, "loss": 0.023219388723373414, "step": 51245 }, { "epoch": 0.4823529411764706, "grad_norm": 0.47045661002021544, "learning_rate": 2.880407480479967e-06, "loss": 0.016157180070877075, "step": 51250 }, { "epoch": 0.4824, "grad_norm": 0.6629234994206382, "learning_rate": 2.8802669803369402e-06, "loss": 0.019236433506011962, "step": 51255 }, { "epoch": 0.4824470588235294, "grad_norm": 0.5485897443482397, "learning_rate": 2.880126500751802e-06, "loss": 0.018845173716545104, "step": 51260 }, { "epoch": 0.48249411764705885, "grad_norm": 0.5478946569045502, "learning_rate": 2.8799860417195393e-06, "loss": 0.0200833261013031, "step": 51265 }, { "epoch": 0.48254117647058825, "grad_norm": 0.46879110759337456, "learning_rate": 2.8798456032351396e-06, "loss": 0.017887309193611145, "step": 51270 }, { "epoch": 0.48258823529411765, "grad_norm": 0.6798301411954599, "learning_rate": 2.8797051852935937e-06, "loss": 0.022398799657821655, "step": 51275 }, { "epoch": 0.48263529411764705, "grad_norm": 0.40843244209704105, "learning_rate": 2.879564787889895e-06, "loss": 0.01839127242565155, "step": 51280 }, { "epoch": 0.48268235294117645, "grad_norm": 0.6569305667477846, "learning_rate": 2.8794244110190367e-06, "loss": 0.018298125267028807, "step": 51285 }, { "epoch": 0.4827294117647059, "grad_norm": 0.721762314340468, "learning_rate": 2.8792840546760154e-06, "loss": 0.018967485427856444, "step": 51290 }, { "epoch": 0.4827764705882353, "grad_norm": 0.624176217606843, "learning_rate": 2.879143718855827e-06, "loss": 0.017379269003868103, "step": 51295 }, { "epoch": 0.4828235294117647, "grad_norm": 0.4610213406818506, "learning_rate": 2.879003403553471e-06, "loss": 0.01747315526008606, "step": 51300 }, { "epoch": 0.4828705882352941, "grad_norm": 1.0326860559301434, "learning_rate": 2.878863108763949e-06, "loss": 0.01952333003282547, "step": 51305 }, { "epoch": 0.4829176470588235, "grad_norm": 0.6459248367597297, "learning_rate": 2.8787228344822625e-06, "loss": 0.01814795136451721, "step": 51310 }, { "epoch": 0.48296470588235296, "grad_norm": 0.4893043618538082, "learning_rate": 2.8785825807034166e-06, "loss": 0.017951393127441408, "step": 51315 }, { "epoch": 0.48301176470588236, "grad_norm": 0.4940166073395812, "learning_rate": 2.8784423474224173e-06, "loss": 0.01961060017347336, "step": 51320 }, { "epoch": 0.48305882352941176, "grad_norm": 0.5233694496995688, "learning_rate": 2.878302134634271e-06, "loss": 0.014984098076820374, "step": 51325 }, { "epoch": 0.48310588235294116, "grad_norm": 0.7147521615774257, "learning_rate": 2.878161942333988e-06, "loss": 0.018852370977401733, "step": 51330 }, { "epoch": 0.48315294117647056, "grad_norm": 0.4439114902839464, "learning_rate": 2.8780217705165785e-06, "loss": 0.016719552874565124, "step": 51335 }, { "epoch": 0.4832, "grad_norm": 0.4873929190386709, "learning_rate": 2.877881619177057e-06, "loss": 0.02187182903289795, "step": 51340 }, { "epoch": 0.4832470588235294, "grad_norm": 0.410146590791917, "learning_rate": 2.877741488310436e-06, "loss": 0.01551607996225357, "step": 51345 }, { "epoch": 0.4832941176470588, "grad_norm": 0.7408087347001656, "learning_rate": 2.877601377911732e-06, "loss": 0.01918395161628723, "step": 51350 }, { "epoch": 0.4833411764705882, "grad_norm": 0.46805610336098713, "learning_rate": 2.8774612879759635e-06, "loss": 0.019013968110084534, "step": 51355 }, { "epoch": 0.4833882352941177, "grad_norm": 0.4176231268457287, "learning_rate": 2.877321218498149e-06, "loss": 0.01962828040122986, "step": 51360 }, { "epoch": 0.4834352941176471, "grad_norm": 0.5891432241028659, "learning_rate": 2.877181169473311e-06, "loss": 0.02328963577747345, "step": 51365 }, { "epoch": 0.4834823529411765, "grad_norm": 0.43870752336666646, "learning_rate": 2.8770411408964716e-06, "loss": 0.01666305959224701, "step": 51370 }, { "epoch": 0.4835294117647059, "grad_norm": 0.46983045323159883, "learning_rate": 2.8769011327626556e-06, "loss": 0.014414925873279572, "step": 51375 }, { "epoch": 0.4835764705882353, "grad_norm": 0.5679573148723553, "learning_rate": 2.876761145066889e-06, "loss": 0.01684008240699768, "step": 51380 }, { "epoch": 0.48362352941176473, "grad_norm": 0.36905409797434036, "learning_rate": 2.8766211778042008e-06, "loss": 0.01727234721183777, "step": 51385 }, { "epoch": 0.48367058823529413, "grad_norm": 0.42022842456596643, "learning_rate": 2.8764812309696187e-06, "loss": 0.01858636885881424, "step": 51390 }, { "epoch": 0.48371764705882353, "grad_norm": 0.5599489463954925, "learning_rate": 2.8763413045581763e-06, "loss": 0.0197698712348938, "step": 51395 }, { "epoch": 0.48376470588235293, "grad_norm": 0.4203402674712342, "learning_rate": 2.8762013985649044e-06, "loss": 0.01361929178237915, "step": 51400 }, { "epoch": 0.48381176470588233, "grad_norm": 0.5800292693400664, "learning_rate": 2.8760615129848406e-06, "loss": 0.0166470006108284, "step": 51405 }, { "epoch": 0.4838588235294118, "grad_norm": 0.446493563123638, "learning_rate": 2.8759216478130188e-06, "loss": 0.016165879368782044, "step": 51410 }, { "epoch": 0.4839058823529412, "grad_norm": 0.7108161937707755, "learning_rate": 2.8757818030444784e-06, "loss": 0.026676049828529357, "step": 51415 }, { "epoch": 0.4839529411764706, "grad_norm": 0.49963278677535733, "learning_rate": 2.875641978674259e-06, "loss": 0.02097972631454468, "step": 51420 }, { "epoch": 0.484, "grad_norm": 0.32896147927988084, "learning_rate": 2.8755021746974015e-06, "loss": 0.016526961326599122, "step": 51425 }, { "epoch": 0.4840470588235294, "grad_norm": 0.4060598310090868, "learning_rate": 2.87536239110895e-06, "loss": 0.019616636633872985, "step": 51430 }, { "epoch": 0.48409411764705884, "grad_norm": 0.5127790490103215, "learning_rate": 2.8752226279039485e-06, "loss": 0.01679348051548004, "step": 51435 }, { "epoch": 0.48414117647058824, "grad_norm": 0.36461918643767616, "learning_rate": 2.875082885077445e-06, "loss": 0.019227594137191772, "step": 51440 }, { "epoch": 0.48418823529411764, "grad_norm": 0.3742017616894066, "learning_rate": 2.8749431626244863e-06, "loss": 0.020693059265613555, "step": 51445 }, { "epoch": 0.48423529411764704, "grad_norm": 0.4701816775397293, "learning_rate": 2.8748034605401227e-06, "loss": 0.013965167105197906, "step": 51450 }, { "epoch": 0.4842823529411765, "grad_norm": 0.4848782300711726, "learning_rate": 2.8746637788194064e-06, "loss": 0.021450015902519225, "step": 51455 }, { "epoch": 0.4843294117647059, "grad_norm": 0.6813516845351882, "learning_rate": 2.87452411745739e-06, "loss": 0.016470307111740114, "step": 51460 }, { "epoch": 0.4843764705882353, "grad_norm": 0.34834766869142625, "learning_rate": 2.874384476449129e-06, "loss": 0.01407201886177063, "step": 51465 }, { "epoch": 0.4844235294117647, "grad_norm": 0.5535335729656322, "learning_rate": 2.87424485578968e-06, "loss": 0.018699440360069274, "step": 51470 }, { "epoch": 0.4844705882352941, "grad_norm": 0.670838595000586, "learning_rate": 2.874105255474101e-06, "loss": 0.015966255962848664, "step": 51475 }, { "epoch": 0.48451764705882355, "grad_norm": 0.5872441588055796, "learning_rate": 2.8739656754974527e-06, "loss": 0.021421095728874205, "step": 51480 }, { "epoch": 0.48456470588235295, "grad_norm": 0.43447746698948425, "learning_rate": 2.873826115854796e-06, "loss": 0.01960199177265167, "step": 51485 }, { "epoch": 0.48461176470588235, "grad_norm": 0.5929012251102312, "learning_rate": 2.873686576541195e-06, "loss": 0.015943270921707154, "step": 51490 }, { "epoch": 0.48465882352941175, "grad_norm": 0.4430604739320826, "learning_rate": 2.8735470575517137e-06, "loss": 0.018007269501686095, "step": 51495 }, { "epoch": 0.48470588235294115, "grad_norm": 0.4840966554600164, "learning_rate": 2.8734075588814203e-06, "loss": 0.017577172815799715, "step": 51500 }, { "epoch": 0.4847529411764706, "grad_norm": 0.8606029881862859, "learning_rate": 2.8732680805253823e-06, "loss": 0.023037636280059816, "step": 51505 }, { "epoch": 0.4848, "grad_norm": 0.43729854796660117, "learning_rate": 2.87312862247867e-06, "loss": 0.014779712259769439, "step": 51510 }, { "epoch": 0.4848470588235294, "grad_norm": 0.3790334685696638, "learning_rate": 2.8729891847363554e-06, "loss": 0.01708379089832306, "step": 51515 }, { "epoch": 0.4848941176470588, "grad_norm": 0.7207017732997367, "learning_rate": 2.872849767293512e-06, "loss": 0.023769064247608183, "step": 51520 }, { "epoch": 0.4849411764705882, "grad_norm": 0.42261863158765006, "learning_rate": 2.8727103701452134e-06, "loss": 0.016774049401283263, "step": 51525 }, { "epoch": 0.48498823529411766, "grad_norm": 0.6863147226010206, "learning_rate": 2.8725709932865385e-06, "loss": 0.023253709077835083, "step": 51530 }, { "epoch": 0.48503529411764706, "grad_norm": 0.4627138766437291, "learning_rate": 2.872431636712565e-06, "loss": 0.016737519204616545, "step": 51535 }, { "epoch": 0.48508235294117646, "grad_norm": 0.7046678180409249, "learning_rate": 2.872292300418373e-06, "loss": 0.020648065209388732, "step": 51540 }, { "epoch": 0.48512941176470586, "grad_norm": 0.5184637826698715, "learning_rate": 2.8721529843990443e-06, "loss": 0.01345684826374054, "step": 51545 }, { "epoch": 0.4851764705882353, "grad_norm": 0.5118225145139165, "learning_rate": 2.8720136886496614e-06, "loss": 0.014914576709270478, "step": 51550 }, { "epoch": 0.4852235294117647, "grad_norm": 1.0027727547321124, "learning_rate": 2.871874413165311e-06, "loss": 0.020824730396270752, "step": 51555 }, { "epoch": 0.4852705882352941, "grad_norm": 0.5687992405993596, "learning_rate": 2.871735157941079e-06, "loss": 0.023411941528320313, "step": 51560 }, { "epoch": 0.4853176470588235, "grad_norm": 0.5903224262151062, "learning_rate": 2.8715959229720554e-06, "loss": 0.019403749704360963, "step": 51565 }, { "epoch": 0.4853647058823529, "grad_norm": 0.7531207576721798, "learning_rate": 2.8714567082533273e-06, "loss": 0.017145726084709167, "step": 51570 }, { "epoch": 0.4854117647058824, "grad_norm": 0.5050598923967794, "learning_rate": 2.8713175137799893e-06, "loss": 0.01952897012233734, "step": 51575 }, { "epoch": 0.4854588235294118, "grad_norm": 0.48269536411317876, "learning_rate": 2.8711783395471338e-06, "loss": 0.015752097964286803, "step": 51580 }, { "epoch": 0.4855058823529412, "grad_norm": 0.5047111256088619, "learning_rate": 2.871039185549856e-06, "loss": 0.016883018612861633, "step": 51585 }, { "epoch": 0.4855529411764706, "grad_norm": 0.5059937511185943, "learning_rate": 2.8709000517832524e-06, "loss": 0.015689873695373537, "step": 51590 }, { "epoch": 0.4856, "grad_norm": 0.6837434030441467, "learning_rate": 2.870760938242422e-06, "loss": 0.016225582361221312, "step": 51595 }, { "epoch": 0.48564705882352943, "grad_norm": 0.4486593903156872, "learning_rate": 2.8706218449224656e-06, "loss": 0.017680791020393372, "step": 51600 }, { "epoch": 0.48569411764705883, "grad_norm": 0.7160755157877414, "learning_rate": 2.870482771818483e-06, "loss": 0.02863534092903137, "step": 51605 }, { "epoch": 0.48574117647058823, "grad_norm": 0.5461721328054638, "learning_rate": 2.8703437189255794e-06, "loss": 0.02218528985977173, "step": 51610 }, { "epoch": 0.48578823529411763, "grad_norm": 0.5130963843683954, "learning_rate": 2.870204686238859e-06, "loss": 0.020854844152927397, "step": 51615 }, { "epoch": 0.48583529411764703, "grad_norm": 0.505670813175497, "learning_rate": 2.870065673753429e-06, "loss": 0.02170453518629074, "step": 51620 }, { "epoch": 0.4858823529411765, "grad_norm": 0.48985274918021054, "learning_rate": 2.8699266814643976e-06, "loss": 0.022704215347766878, "step": 51625 }, { "epoch": 0.4859294117647059, "grad_norm": 0.5060541593548956, "learning_rate": 2.869787709366875e-06, "loss": 0.017628592252731324, "step": 51630 }, { "epoch": 0.4859764705882353, "grad_norm": 0.5345163334281884, "learning_rate": 2.8696487574559733e-06, "loss": 0.01819152235984802, "step": 51635 }, { "epoch": 0.4860235294117647, "grad_norm": 0.6627807056590539, "learning_rate": 2.8695098257268057e-06, "loss": 0.0179739773273468, "step": 51640 }, { "epoch": 0.48607058823529414, "grad_norm": 0.8995838706195682, "learning_rate": 2.8693709141744864e-06, "loss": 0.021075111627578736, "step": 51645 }, { "epoch": 0.48611764705882354, "grad_norm": 0.6781976376105882, "learning_rate": 2.8692320227941335e-06, "loss": 0.0170292004942894, "step": 51650 }, { "epoch": 0.48616470588235294, "grad_norm": 0.42849100971095616, "learning_rate": 2.869093151580864e-06, "loss": 0.01940772831439972, "step": 51655 }, { "epoch": 0.48621176470588234, "grad_norm": 0.4546836988157198, "learning_rate": 2.8689543005297994e-06, "loss": 0.028535857796669006, "step": 51660 }, { "epoch": 0.48625882352941174, "grad_norm": 0.732572135350577, "learning_rate": 2.86881546963606e-06, "loss": 0.019480456411838532, "step": 51665 }, { "epoch": 0.4863058823529412, "grad_norm": 0.43380110176739245, "learning_rate": 2.86867665889477e-06, "loss": 0.019708892703056334, "step": 51670 }, { "epoch": 0.4863529411764706, "grad_norm": 0.8792389978205757, "learning_rate": 2.868537868301054e-06, "loss": 0.01735784113407135, "step": 51675 }, { "epoch": 0.4864, "grad_norm": 0.40530050128945444, "learning_rate": 2.8683990978500386e-06, "loss": 0.017295873165130614, "step": 51680 }, { "epoch": 0.4864470588235294, "grad_norm": 0.4539994960739847, "learning_rate": 2.8682603475368524e-06, "loss": 0.017860442399978638, "step": 51685 }, { "epoch": 0.4864941176470588, "grad_norm": 0.8169574309442701, "learning_rate": 2.868121617356625e-06, "loss": 0.020413808524608612, "step": 51690 }, { "epoch": 0.48654117647058825, "grad_norm": 0.5372816211219882, "learning_rate": 2.867982907304489e-06, "loss": 0.0162402480840683, "step": 51695 }, { "epoch": 0.48658823529411765, "grad_norm": 0.6967437364472252, "learning_rate": 2.867844217375576e-06, "loss": 0.01682334840297699, "step": 51700 }, { "epoch": 0.48663529411764705, "grad_norm": 0.43676341916088346, "learning_rate": 2.8677055475650218e-06, "loss": 0.01764365881681442, "step": 51705 }, { "epoch": 0.48668235294117645, "grad_norm": 0.5507344632778024, "learning_rate": 2.8675668978679627e-06, "loss": 0.019928428530693054, "step": 51710 }, { "epoch": 0.48672941176470585, "grad_norm": 0.6242917502467832, "learning_rate": 2.8674282682795373e-06, "loss": 0.01650099903345108, "step": 51715 }, { "epoch": 0.4867764705882353, "grad_norm": 0.5292572651956472, "learning_rate": 2.8672896587948845e-06, "loss": 0.015107536315917968, "step": 51720 }, { "epoch": 0.4868235294117647, "grad_norm": 0.46842057914772534, "learning_rate": 2.867151069409147e-06, "loss": 0.017850467562675477, "step": 51725 }, { "epoch": 0.4868705882352941, "grad_norm": 0.41886503414020493, "learning_rate": 2.867012500117467e-06, "loss": 0.016476750373840332, "step": 51730 }, { "epoch": 0.4869176470588235, "grad_norm": 0.5259300860388927, "learning_rate": 2.866873950914989e-06, "loss": 0.01441555768251419, "step": 51735 }, { "epoch": 0.48696470588235297, "grad_norm": 0.5145038444432702, "learning_rate": 2.8667354217968603e-06, "loss": 0.01915305256843567, "step": 51740 }, { "epoch": 0.48701176470588237, "grad_norm": 0.5385354749812545, "learning_rate": 2.8665969127582283e-06, "loss": 0.019367265701293945, "step": 51745 }, { "epoch": 0.48705882352941177, "grad_norm": 0.5174496584334825, "learning_rate": 2.866458423794243e-06, "loss": 0.017381736636161806, "step": 51750 }, { "epoch": 0.48710588235294117, "grad_norm": 0.9306989555916273, "learning_rate": 2.8663199549000552e-06, "loss": 0.018797272443771364, "step": 51755 }, { "epoch": 0.48715294117647057, "grad_norm": 0.7332546166279997, "learning_rate": 2.866181506070818e-06, "loss": 0.018551184237003325, "step": 51760 }, { "epoch": 0.4872, "grad_norm": 0.4070544067781965, "learning_rate": 2.866043077301687e-06, "loss": 0.01905178427696228, "step": 51765 }, { "epoch": 0.4872470588235294, "grad_norm": 0.6064200019984445, "learning_rate": 2.865904668587817e-06, "loss": 0.02067682147026062, "step": 51770 }, { "epoch": 0.4872941176470588, "grad_norm": 0.44082061045139814, "learning_rate": 2.865766279924367e-06, "loss": 0.01805194318294525, "step": 51775 }, { "epoch": 0.4873411764705882, "grad_norm": 0.5667765477726996, "learning_rate": 2.8656279113064953e-06, "loss": 0.01818462908267975, "step": 51780 }, { "epoch": 0.4873882352941176, "grad_norm": 0.5779086114414558, "learning_rate": 2.865489562729364e-06, "loss": 0.015729860961437227, "step": 51785 }, { "epoch": 0.4874352941176471, "grad_norm": 0.6193156157670142, "learning_rate": 2.8653512341881362e-06, "loss": 0.018687233328819275, "step": 51790 }, { "epoch": 0.4874823529411765, "grad_norm": 0.7664135284509519, "learning_rate": 2.865212925677975e-06, "loss": 0.02319803535938263, "step": 51795 }, { "epoch": 0.4875294117647059, "grad_norm": 0.8646549245693674, "learning_rate": 2.8650746371940473e-06, "loss": 0.018880844116210938, "step": 51800 }, { "epoch": 0.4875764705882353, "grad_norm": 0.4921725099329632, "learning_rate": 2.86493636873152e-06, "loss": 0.01741207391023636, "step": 51805 }, { "epoch": 0.48762352941176473, "grad_norm": 0.5824599191740685, "learning_rate": 2.864798120285564e-06, "loss": 0.01567138135433197, "step": 51810 }, { "epoch": 0.48767058823529413, "grad_norm": 0.5798922754205575, "learning_rate": 2.864659891851349e-06, "loss": 0.015433962643146514, "step": 51815 }, { "epoch": 0.48771764705882353, "grad_norm": 0.4447575742847449, "learning_rate": 2.8645216834240474e-06, "loss": 0.016841335594654082, "step": 51820 }, { "epoch": 0.48776470588235293, "grad_norm": 0.5351428270345977, "learning_rate": 2.864383494998834e-06, "loss": 0.013632073998451233, "step": 51825 }, { "epoch": 0.48781176470588233, "grad_norm": 0.3875567731850286, "learning_rate": 2.864245326570885e-06, "loss": 0.0181523859500885, "step": 51830 }, { "epoch": 0.4878588235294118, "grad_norm": 0.5346450922391364, "learning_rate": 2.8641071781353764e-06, "loss": 0.015217822790145875, "step": 51835 }, { "epoch": 0.4879058823529412, "grad_norm": 0.6434962966734942, "learning_rate": 2.8639690496874882e-06, "loss": 0.017846105992794035, "step": 51840 }, { "epoch": 0.4879529411764706, "grad_norm": 0.593268784025866, "learning_rate": 2.8638309412224015e-06, "loss": 0.01910310089588165, "step": 51845 }, { "epoch": 0.488, "grad_norm": 0.6071358314653976, "learning_rate": 2.863692852735298e-06, "loss": 0.01595710515975952, "step": 51850 }, { "epoch": 0.4880470588235294, "grad_norm": 0.5553692359554195, "learning_rate": 2.8635547842213618e-06, "loss": 0.02101055085659027, "step": 51855 }, { "epoch": 0.48809411764705884, "grad_norm": 0.5596828532973196, "learning_rate": 2.863416735675779e-06, "loss": 0.01880088448524475, "step": 51860 }, { "epoch": 0.48814117647058825, "grad_norm": 0.520576591886573, "learning_rate": 2.863278707093736e-06, "loss": 0.018926745653152464, "step": 51865 }, { "epoch": 0.48818823529411765, "grad_norm": 0.43084061526520795, "learning_rate": 2.8631406984704224e-06, "loss": 0.013894644379615784, "step": 51870 }, { "epoch": 0.48823529411764705, "grad_norm": 0.5440016895697742, "learning_rate": 2.8630027098010276e-06, "loss": 0.019956716895103456, "step": 51875 }, { "epoch": 0.48828235294117645, "grad_norm": 0.6565136487595974, "learning_rate": 2.8628647410807443e-06, "loss": 0.01820014715194702, "step": 51880 }, { "epoch": 0.4883294117647059, "grad_norm": 0.3707593335043351, "learning_rate": 2.862726792304767e-06, "loss": 0.019524428248405456, "step": 51885 }, { "epoch": 0.4883764705882353, "grad_norm": 0.40520661382782636, "learning_rate": 2.8625888634682896e-06, "loss": 0.016869747638702394, "step": 51890 }, { "epoch": 0.4884235294117647, "grad_norm": 0.6548754232196596, "learning_rate": 2.8624509545665103e-06, "loss": 0.01797345280647278, "step": 51895 }, { "epoch": 0.4884705882352941, "grad_norm": 0.5432824534454255, "learning_rate": 2.8623130655946264e-06, "loss": 0.017123925685882568, "step": 51900 }, { "epoch": 0.48851764705882356, "grad_norm": 0.43374691119319364, "learning_rate": 2.8621751965478394e-06, "loss": 0.015670794248580932, "step": 51905 }, { "epoch": 0.48856470588235296, "grad_norm": 0.4828210545879187, "learning_rate": 2.86203734742135e-06, "loss": 0.015078401565551758, "step": 51910 }, { "epoch": 0.48861176470588236, "grad_norm": 0.5141629979357999, "learning_rate": 2.8618995182103626e-06, "loss": 0.016545385122299194, "step": 51915 }, { "epoch": 0.48865882352941176, "grad_norm": 0.5622446929072916, "learning_rate": 2.8617617089100813e-06, "loss": 0.0183249294757843, "step": 51920 }, { "epoch": 0.48870588235294116, "grad_norm": 0.6719501901794808, "learning_rate": 2.8616239195157138e-06, "loss": 0.017210251092910765, "step": 51925 }, { "epoch": 0.4887529411764706, "grad_norm": 0.500728171119082, "learning_rate": 2.861486150022467e-06, "loss": 0.01713862419128418, "step": 51930 }, { "epoch": 0.4888, "grad_norm": 0.41433645823731624, "learning_rate": 2.8613484004255513e-06, "loss": 0.017688505351543427, "step": 51935 }, { "epoch": 0.4888470588235294, "grad_norm": 0.3441733926598118, "learning_rate": 2.8612106707201794e-06, "loss": 0.01794370263814926, "step": 51940 }, { "epoch": 0.4888941176470588, "grad_norm": 0.5703766521063811, "learning_rate": 2.8610729609015626e-06, "loss": 0.020572525262832642, "step": 51945 }, { "epoch": 0.4889411764705882, "grad_norm": 0.6108955355711847, "learning_rate": 2.860935270964917e-06, "loss": 0.018732544779777528, "step": 51950 }, { "epoch": 0.48898823529411767, "grad_norm": 0.5459473618942331, "learning_rate": 2.860797600905458e-06, "loss": 0.014625689387321473, "step": 51955 }, { "epoch": 0.48903529411764707, "grad_norm": 0.5894804619341907, "learning_rate": 2.860659950718404e-06, "loss": 0.020770825445652008, "step": 51960 }, { "epoch": 0.48908235294117647, "grad_norm": 0.4722564942495507, "learning_rate": 2.8605223203989746e-06, "loss": 0.01587234139442444, "step": 51965 }, { "epoch": 0.48912941176470587, "grad_norm": 0.44811706388978156, "learning_rate": 2.8603847099423907e-06, "loss": 0.018152594566345215, "step": 51970 }, { "epoch": 0.48917647058823527, "grad_norm": 0.4322522337213978, "learning_rate": 2.860247119343875e-06, "loss": 0.01572923958301544, "step": 51975 }, { "epoch": 0.4892235294117647, "grad_norm": 0.6232777197154338, "learning_rate": 2.860109548598653e-06, "loss": 0.01495501697063446, "step": 51980 }, { "epoch": 0.4892705882352941, "grad_norm": 0.4035852154933493, "learning_rate": 2.8599719977019486e-06, "loss": 0.01832200139760971, "step": 51985 }, { "epoch": 0.4893176470588235, "grad_norm": 0.6090589049306321, "learning_rate": 2.859834466648991e-06, "loss": 0.019734971225261688, "step": 51990 }, { "epoch": 0.4893647058823529, "grad_norm": 0.48153828221488154, "learning_rate": 2.8596969554350094e-06, "loss": 0.02001650333404541, "step": 51995 }, { "epoch": 0.4894117647058824, "grad_norm": 0.5417836692248775, "learning_rate": 2.8595594640552338e-06, "loss": 0.02081737220287323, "step": 52000 }, { "epoch": 0.4894588235294118, "grad_norm": 0.5312671913594541, "learning_rate": 2.859421992504897e-06, "loss": 0.019332000613212587, "step": 52005 }, { "epoch": 0.4895058823529412, "grad_norm": 0.5446156885250701, "learning_rate": 2.859284540779233e-06, "loss": 0.01758417934179306, "step": 52010 }, { "epoch": 0.4895529411764706, "grad_norm": 0.5802420667141559, "learning_rate": 2.859147108873478e-06, "loss": 0.015502464771270753, "step": 52015 }, { "epoch": 0.4896, "grad_norm": 0.579396688393983, "learning_rate": 2.8590096967828686e-06, "loss": 0.02079963982105255, "step": 52020 }, { "epoch": 0.48964705882352944, "grad_norm": 0.5048802962267867, "learning_rate": 2.8588723045026435e-06, "loss": 0.016580308973789214, "step": 52025 }, { "epoch": 0.48969411764705884, "grad_norm": 0.4007666278370801, "learning_rate": 2.8587349320280426e-06, "loss": 0.01604004353284836, "step": 52030 }, { "epoch": 0.48974117647058824, "grad_norm": 3.958834771661909, "learning_rate": 2.85859757935431e-06, "loss": 0.01656409353017807, "step": 52035 }, { "epoch": 0.48978823529411764, "grad_norm": 0.6035546499712132, "learning_rate": 2.858460246476687e-06, "loss": 0.020740756392478944, "step": 52040 }, { "epoch": 0.48983529411764704, "grad_norm": 0.5674747922848397, "learning_rate": 2.85832293339042e-06, "loss": 0.01786348819732666, "step": 52045 }, { "epoch": 0.4898823529411765, "grad_norm": 0.5122218510768902, "learning_rate": 2.8581856400907558e-06, "loss": 0.015845008194446564, "step": 52050 }, { "epoch": 0.4899294117647059, "grad_norm": 0.5511215477248355, "learning_rate": 2.8580483665729423e-06, "loss": 0.016156063973903657, "step": 52055 }, { "epoch": 0.4899764705882353, "grad_norm": 0.5057569976568624, "learning_rate": 2.8579111128322305e-06, "loss": 0.019651544094085694, "step": 52060 }, { "epoch": 0.4900235294117647, "grad_norm": 0.5035179518110863, "learning_rate": 2.8577738788638714e-06, "loss": 0.018702520430088042, "step": 52065 }, { "epoch": 0.4900705882352941, "grad_norm": 0.4807692623924352, "learning_rate": 2.8576366646631184e-06, "loss": 0.01953166425228119, "step": 52070 }, { "epoch": 0.49011764705882355, "grad_norm": 0.4860813121546973, "learning_rate": 2.857499470225226e-06, "loss": 0.018921773135662078, "step": 52075 }, { "epoch": 0.49016470588235295, "grad_norm": 0.5523672721117947, "learning_rate": 2.85736229554545e-06, "loss": 0.02123126983642578, "step": 52080 }, { "epoch": 0.49021176470588235, "grad_norm": 0.793800722173999, "learning_rate": 2.85722514061905e-06, "loss": 0.017655938863754272, "step": 52085 }, { "epoch": 0.49025882352941175, "grad_norm": 0.3298132731809967, "learning_rate": 2.857088005441284e-06, "loss": 0.021463829278945922, "step": 52090 }, { "epoch": 0.4903058823529412, "grad_norm": 0.657437648696874, "learning_rate": 2.856950890007414e-06, "loss": 0.017523199319839478, "step": 52095 }, { "epoch": 0.4903529411764706, "grad_norm": 0.5431652023315945, "learning_rate": 2.856813794312703e-06, "loss": 0.015329369902610778, "step": 52100 }, { "epoch": 0.4904, "grad_norm": 0.4334710806831638, "learning_rate": 2.8566767183524154e-06, "loss": 0.014757801592350007, "step": 52105 }, { "epoch": 0.4904470588235294, "grad_norm": 0.40229803666795294, "learning_rate": 2.8565396621218167e-06, "loss": 0.017749738693237305, "step": 52110 }, { "epoch": 0.4904941176470588, "grad_norm": 0.45702502469947687, "learning_rate": 2.856402625616174e-06, "loss": 0.01806618124246597, "step": 52115 }, { "epoch": 0.49054117647058826, "grad_norm": 0.7320774999985422, "learning_rate": 2.856265608830757e-06, "loss": 0.01868780851364136, "step": 52120 }, { "epoch": 0.49058823529411766, "grad_norm": 0.41951036747819564, "learning_rate": 2.8561286117608374e-06, "loss": 0.018927007913589478, "step": 52125 }, { "epoch": 0.49063529411764706, "grad_norm": 0.46272030852497104, "learning_rate": 2.8559916344016857e-06, "loss": 0.015316680073738098, "step": 52130 }, { "epoch": 0.49068235294117646, "grad_norm": 0.629638256441286, "learning_rate": 2.8558546767485768e-06, "loss": 0.020790109038352968, "step": 52135 }, { "epoch": 0.49072941176470586, "grad_norm": 0.5050719321135516, "learning_rate": 2.855717738796786e-06, "loss": 0.020161667466163637, "step": 52140 }, { "epoch": 0.4907764705882353, "grad_norm": 0.6457900572922431, "learning_rate": 2.855580820541591e-06, "loss": 0.021149128675460815, "step": 52145 }, { "epoch": 0.4908235294117647, "grad_norm": 0.5266389323140377, "learning_rate": 2.855443921978269e-06, "loss": 0.0201271191239357, "step": 52150 }, { "epoch": 0.4908705882352941, "grad_norm": 0.6986355560464037, "learning_rate": 2.855307043102102e-06, "loss": 0.01924116611480713, "step": 52155 }, { "epoch": 0.4909176470588235, "grad_norm": 0.3924271481637059, "learning_rate": 2.85517018390837e-06, "loss": 0.018293526768684388, "step": 52160 }, { "epoch": 0.4909647058823529, "grad_norm": 0.9106679998585139, "learning_rate": 2.855033344392358e-06, "loss": 0.01771705150604248, "step": 52165 }, { "epoch": 0.49101176470588237, "grad_norm": 0.46135232398268905, "learning_rate": 2.8548965245493492e-06, "loss": 0.018337754905223845, "step": 52170 }, { "epoch": 0.49105882352941177, "grad_norm": 0.500508880723182, "learning_rate": 2.8547597243746323e-06, "loss": 0.022092336416244508, "step": 52175 }, { "epoch": 0.49110588235294117, "grad_norm": 0.5521670498986327, "learning_rate": 2.8546229438634942e-06, "loss": 0.022182533144950868, "step": 52180 }, { "epoch": 0.49115294117647057, "grad_norm": 0.7340523564374337, "learning_rate": 2.854486183011225e-06, "loss": 0.018799687922000884, "step": 52185 }, { "epoch": 0.4912, "grad_norm": 0.49444486628361867, "learning_rate": 2.854349441813116e-06, "loss": 0.015232759714126586, "step": 52190 }, { "epoch": 0.4912470588235294, "grad_norm": 0.4281987775433457, "learning_rate": 2.85421272026446e-06, "loss": 0.016587670147418975, "step": 52195 }, { "epoch": 0.4912941176470588, "grad_norm": 0.5605682899955158, "learning_rate": 2.854076018360551e-06, "loss": 0.01712198257446289, "step": 52200 }, { "epoch": 0.4913411764705882, "grad_norm": 0.6530942715873522, "learning_rate": 2.853939336096686e-06, "loss": 0.018818315863609315, "step": 52205 }, { "epoch": 0.4913882352941176, "grad_norm": 0.6191234053827128, "learning_rate": 2.853802673468162e-06, "loss": 0.01879071891307831, "step": 52210 }, { "epoch": 0.4914352941176471, "grad_norm": 0.5458277246161022, "learning_rate": 2.853666030470278e-06, "loss": 0.019654381275177, "step": 52215 }, { "epoch": 0.4914823529411765, "grad_norm": 0.36410614098919986, "learning_rate": 2.853529407098335e-06, "loss": 0.014919331669807434, "step": 52220 }, { "epoch": 0.4915294117647059, "grad_norm": 0.7521696223613602, "learning_rate": 2.853392803347636e-06, "loss": 0.01673455685377121, "step": 52225 }, { "epoch": 0.4915764705882353, "grad_norm": 0.5645753670494745, "learning_rate": 2.853256219213484e-06, "loss": 0.015218749642372131, "step": 52230 }, { "epoch": 0.4916235294117647, "grad_norm": 0.432174604294971, "learning_rate": 2.853119654691185e-06, "loss": 0.014660869538784028, "step": 52235 }, { "epoch": 0.49167058823529414, "grad_norm": 0.41806494637222535, "learning_rate": 2.8529831097760457e-06, "loss": 0.017618054151535036, "step": 52240 }, { "epoch": 0.49171764705882354, "grad_norm": 0.7113637051076819, "learning_rate": 2.8528465844633753e-06, "loss": 0.01734657883644104, "step": 52245 }, { "epoch": 0.49176470588235294, "grad_norm": 0.7321313463485941, "learning_rate": 2.852710078748483e-06, "loss": 0.024169620871543885, "step": 52250 }, { "epoch": 0.49181176470588234, "grad_norm": 0.4658399324747831, "learning_rate": 2.852573592626682e-06, "loss": 0.01717989593744278, "step": 52255 }, { "epoch": 0.49185882352941174, "grad_norm": 0.653464611211656, "learning_rate": 2.8524371260932844e-06, "loss": 0.021304261684417725, "step": 52260 }, { "epoch": 0.4919058823529412, "grad_norm": 0.46307979879962174, "learning_rate": 2.8523006791436058e-06, "loss": 0.018327239155769347, "step": 52265 }, { "epoch": 0.4919529411764706, "grad_norm": 0.37927866240079844, "learning_rate": 2.8521642517729625e-06, "loss": 0.018493333458900453, "step": 52270 }, { "epoch": 0.492, "grad_norm": 0.4952281694004891, "learning_rate": 2.8520278439766725e-06, "loss": 0.019146469235420228, "step": 52275 }, { "epoch": 0.4920470588235294, "grad_norm": 0.4448119416079545, "learning_rate": 2.851891455750056e-06, "loss": 0.020330923795700073, "step": 52280 }, { "epoch": 0.49209411764705885, "grad_norm": 0.4114900054071496, "learning_rate": 2.851755087088433e-06, "loss": 0.017937442660331725, "step": 52285 }, { "epoch": 0.49214117647058825, "grad_norm": 0.6165265107194451, "learning_rate": 2.851618737987128e-06, "loss": 0.018097783625125884, "step": 52290 }, { "epoch": 0.49218823529411765, "grad_norm": 0.6277129965236694, "learning_rate": 2.8514824084414633e-06, "loss": 0.015803444385528564, "step": 52295 }, { "epoch": 0.49223529411764705, "grad_norm": 0.3121027908118598, "learning_rate": 2.851346098446766e-06, "loss": 0.012230116128921508, "step": 52300 }, { "epoch": 0.49228235294117645, "grad_norm": 0.5988127710906223, "learning_rate": 2.8512098079983634e-06, "loss": 0.016517496109008788, "step": 52305 }, { "epoch": 0.4923294117647059, "grad_norm": 0.42975660923115044, "learning_rate": 2.8510735370915843e-06, "loss": 0.022875374555587767, "step": 52310 }, { "epoch": 0.4923764705882353, "grad_norm": 0.48701101995803536, "learning_rate": 2.85093728572176e-06, "loss": 0.021641799807548524, "step": 52315 }, { "epoch": 0.4924235294117647, "grad_norm": 0.4854338616390703, "learning_rate": 2.8508010538842213e-06, "loss": 0.02046746462583542, "step": 52320 }, { "epoch": 0.4924705882352941, "grad_norm": 0.6586992771997142, "learning_rate": 2.8506648415743027e-06, "loss": 0.014191192388534547, "step": 52325 }, { "epoch": 0.4925176470588235, "grad_norm": 0.6359073062291134, "learning_rate": 2.8505286487873402e-06, "loss": 0.018948063254356384, "step": 52330 }, { "epoch": 0.49256470588235296, "grad_norm": 0.3370218275388785, "learning_rate": 2.850392475518669e-06, "loss": 0.016507917642593385, "step": 52335 }, { "epoch": 0.49261176470588236, "grad_norm": 0.7121717483516016, "learning_rate": 2.8502563217636285e-06, "loss": 0.021793943643569947, "step": 52340 }, { "epoch": 0.49265882352941176, "grad_norm": 0.3509882376168165, "learning_rate": 2.850120187517559e-06, "loss": 0.014934499561786652, "step": 52345 }, { "epoch": 0.49270588235294116, "grad_norm": 0.5762145857145958, "learning_rate": 2.849984072775801e-06, "loss": 0.019180914759635924, "step": 52350 }, { "epoch": 0.4927529411764706, "grad_norm": 0.561674549788251, "learning_rate": 2.8498479775336986e-06, "loss": 0.019290286302566528, "step": 52355 }, { "epoch": 0.4928, "grad_norm": 0.6590278522542833, "learning_rate": 2.849711901786595e-06, "loss": 0.0173238605260849, "step": 52360 }, { "epoch": 0.4928470588235294, "grad_norm": 0.45189380359767806, "learning_rate": 2.849575845529838e-06, "loss": 0.01657578945159912, "step": 52365 }, { "epoch": 0.4928941176470588, "grad_norm": 0.36776269307087167, "learning_rate": 2.849439808758774e-06, "loss": 0.017552667856216432, "step": 52370 }, { "epoch": 0.4929411764705882, "grad_norm": 0.5401615271676441, "learning_rate": 2.8493037914687526e-06, "loss": 0.0178959459066391, "step": 52375 }, { "epoch": 0.49298823529411767, "grad_norm": 0.8080073018974145, "learning_rate": 2.849167793655125e-06, "loss": 0.021175286173820494, "step": 52380 }, { "epoch": 0.49303529411764707, "grad_norm": 0.36950299179995416, "learning_rate": 2.8490318153132434e-06, "loss": 0.014127308130264282, "step": 52385 }, { "epoch": 0.49308235294117647, "grad_norm": 0.4966591754511714, "learning_rate": 2.848895856438462e-06, "loss": 0.020573368668556212, "step": 52390 }, { "epoch": 0.49312941176470587, "grad_norm": 1.0652968265298188, "learning_rate": 2.848759917026136e-06, "loss": 0.015557193756103515, "step": 52395 }, { "epoch": 0.49317647058823527, "grad_norm": 0.4943115930677448, "learning_rate": 2.8486239970716224e-06, "loss": 0.019471383094787596, "step": 52400 }, { "epoch": 0.4932235294117647, "grad_norm": 0.534469047986919, "learning_rate": 2.84848809657028e-06, "loss": 0.01686355173587799, "step": 52405 }, { "epoch": 0.4932705882352941, "grad_norm": 0.5623284714494017, "learning_rate": 2.848352215517468e-06, "loss": 0.014737993478775024, "step": 52410 }, { "epoch": 0.4933176470588235, "grad_norm": 0.5540135768772637, "learning_rate": 2.84821635390855e-06, "loss": 0.014727181196212769, "step": 52415 }, { "epoch": 0.4933647058823529, "grad_norm": 0.44815228944718605, "learning_rate": 2.848080511738887e-06, "loss": 0.017948353290557863, "step": 52420 }, { "epoch": 0.4934117647058823, "grad_norm": 0.5962270775195859, "learning_rate": 2.847944689003846e-06, "loss": 0.01769366264343262, "step": 52425 }, { "epoch": 0.4934588235294118, "grad_norm": 0.5290326646687414, "learning_rate": 2.8478088856987916e-06, "loss": 0.018531052768230437, "step": 52430 }, { "epoch": 0.4935058823529412, "grad_norm": 0.6186354396124657, "learning_rate": 2.8476731018190923e-06, "loss": 0.014799985289573669, "step": 52435 }, { "epoch": 0.4935529411764706, "grad_norm": 0.6372274793508109, "learning_rate": 2.847537337360118e-06, "loss": 0.018361194431781767, "step": 52440 }, { "epoch": 0.4936, "grad_norm": 0.478327099511259, "learning_rate": 2.8474015923172387e-06, "loss": 0.02021794766187668, "step": 52445 }, { "epoch": 0.49364705882352944, "grad_norm": 0.38697136542945887, "learning_rate": 2.8472658666858276e-06, "loss": 0.014907756447792053, "step": 52450 }, { "epoch": 0.49369411764705884, "grad_norm": 0.6409671602704825, "learning_rate": 2.847130160461259e-06, "loss": 0.017412003874778748, "step": 52455 }, { "epoch": 0.49374117647058824, "grad_norm": 0.6043159444836071, "learning_rate": 2.846994473638908e-06, "loss": 0.017432689666748047, "step": 52460 }, { "epoch": 0.49378823529411764, "grad_norm": 0.569936318045636, "learning_rate": 2.846858806214151e-06, "loss": 0.014509186148643494, "step": 52465 }, { "epoch": 0.49383529411764704, "grad_norm": 0.5337845182276763, "learning_rate": 2.8467231581823685e-06, "loss": 0.02122882455587387, "step": 52470 }, { "epoch": 0.4938823529411765, "grad_norm": 0.4403890386604988, "learning_rate": 2.846587529538939e-06, "loss": 0.016773131489753724, "step": 52475 }, { "epoch": 0.4939294117647059, "grad_norm": 0.6373760062723073, "learning_rate": 2.846451920279246e-06, "loss": 0.01881731003522873, "step": 52480 }, { "epoch": 0.4939764705882353, "grad_norm": 0.802657817955098, "learning_rate": 2.8463163303986708e-06, "loss": 0.02250604033470154, "step": 52485 }, { "epoch": 0.4940235294117647, "grad_norm": 0.6722176704382851, "learning_rate": 2.8461807598926e-06, "loss": 0.017794452607631683, "step": 52490 }, { "epoch": 0.4940705882352941, "grad_norm": 0.45986170121592757, "learning_rate": 2.8460452087564182e-06, "loss": 0.01588878035545349, "step": 52495 }, { "epoch": 0.49411764705882355, "grad_norm": 0.7478344066957565, "learning_rate": 2.8459096769855153e-06, "loss": 0.02086870074272156, "step": 52500 }, { "epoch": 0.49416470588235295, "grad_norm": 0.7449721404756825, "learning_rate": 2.84577416457528e-06, "loss": 0.01835800111293793, "step": 52505 }, { "epoch": 0.49421176470588235, "grad_norm": 0.3971322708308312, "learning_rate": 2.845638671521103e-06, "loss": 0.015104874968528748, "step": 52510 }, { "epoch": 0.49425882352941175, "grad_norm": 0.44670877735714465, "learning_rate": 2.8455031978183767e-06, "loss": 0.015576191246509552, "step": 52515 }, { "epoch": 0.49430588235294115, "grad_norm": 0.6790006204045298, "learning_rate": 2.845367743462496e-06, "loss": 0.01774005889892578, "step": 52520 }, { "epoch": 0.4943529411764706, "grad_norm": 0.7267362000558951, "learning_rate": 2.8452323084488552e-06, "loss": 0.016472908854484557, "step": 52525 }, { "epoch": 0.4944, "grad_norm": 0.6391438250394575, "learning_rate": 2.845096892772853e-06, "loss": 0.01827214062213898, "step": 52530 }, { "epoch": 0.4944470588235294, "grad_norm": 0.5281498600068454, "learning_rate": 2.8449614964298865e-06, "loss": 0.01772419363260269, "step": 52535 }, { "epoch": 0.4944941176470588, "grad_norm": 0.5411857560520718, "learning_rate": 2.844826119415357e-06, "loss": 0.019841623306274415, "step": 52540 }, { "epoch": 0.49454117647058826, "grad_norm": 0.50964288976007, "learning_rate": 2.8446907617246654e-06, "loss": 0.01942518949508667, "step": 52545 }, { "epoch": 0.49458823529411766, "grad_norm": 0.5817109996346386, "learning_rate": 2.8445554233532157e-06, "loss": 0.022278830409049988, "step": 52550 }, { "epoch": 0.49463529411764706, "grad_norm": 0.7597594806300653, "learning_rate": 2.844420104296413e-06, "loss": 0.020897409319877623, "step": 52555 }, { "epoch": 0.49468235294117646, "grad_norm": 0.5329912810222074, "learning_rate": 2.8442848045496625e-06, "loss": 0.018817564845085143, "step": 52560 }, { "epoch": 0.49472941176470586, "grad_norm": 0.5223021108550097, "learning_rate": 2.8441495241083733e-06, "loss": 0.01823660880327225, "step": 52565 }, { "epoch": 0.4947764705882353, "grad_norm": 0.4693072808930234, "learning_rate": 2.8440142629679533e-06, "loss": 0.01768272966146469, "step": 52570 }, { "epoch": 0.4948235294117647, "grad_norm": 0.5584065292467845, "learning_rate": 2.843879021123815e-06, "loss": 0.01401025652885437, "step": 52575 }, { "epoch": 0.4948705882352941, "grad_norm": 0.5320092572870971, "learning_rate": 2.8437437985713697e-06, "loss": 0.016347622871398924, "step": 52580 }, { "epoch": 0.4949176470588235, "grad_norm": 0.6346574789225506, "learning_rate": 2.8436085953060314e-06, "loss": 0.019100421667099, "step": 52585 }, { "epoch": 0.4949647058823529, "grad_norm": 0.3729322381345809, "learning_rate": 2.8434734113232165e-06, "loss": 0.0145864337682724, "step": 52590 }, { "epoch": 0.4950117647058824, "grad_norm": 0.6673926067207824, "learning_rate": 2.8433382466183412e-06, "loss": 0.01909240782260895, "step": 52595 }, { "epoch": 0.4950588235294118, "grad_norm": 0.48499269958783287, "learning_rate": 2.843203101186825e-06, "loss": 0.01709206700325012, "step": 52600 }, { "epoch": 0.4951058823529412, "grad_norm": 0.3897247492370909, "learning_rate": 2.8430679750240866e-06, "loss": 0.018492496013641356, "step": 52605 }, { "epoch": 0.4951529411764706, "grad_norm": 0.5029960832714263, "learning_rate": 2.8429328681255486e-06, "loss": 0.02783050835132599, "step": 52610 }, { "epoch": 0.4952, "grad_norm": 0.4430112266968053, "learning_rate": 2.842797780486633e-06, "loss": 0.01569965183734894, "step": 52615 }, { "epoch": 0.49524705882352943, "grad_norm": 0.6666166290780576, "learning_rate": 2.842662712102766e-06, "loss": 0.02398149073123932, "step": 52620 }, { "epoch": 0.49529411764705883, "grad_norm": 0.7180363127882915, "learning_rate": 2.8425276629693725e-06, "loss": 0.018372106552124023, "step": 52625 }, { "epoch": 0.49534117647058823, "grad_norm": 0.34877729342613806, "learning_rate": 2.8423926330818814e-06, "loss": 0.014025324583053589, "step": 52630 }, { "epoch": 0.49538823529411763, "grad_norm": 0.4952699844392899, "learning_rate": 2.8422576224357206e-06, "loss": 0.016113144159317017, "step": 52635 }, { "epoch": 0.4954352941176471, "grad_norm": 0.47547787483702897, "learning_rate": 2.842122631026322e-06, "loss": 0.017299920320510864, "step": 52640 }, { "epoch": 0.4954823529411765, "grad_norm": 0.7348724471624726, "learning_rate": 2.841987658849116e-06, "loss": 0.021108397841453554, "step": 52645 }, { "epoch": 0.4955294117647059, "grad_norm": 0.543616477587541, "learning_rate": 2.841852705899539e-06, "loss": 0.015480975806713104, "step": 52650 }, { "epoch": 0.4955764705882353, "grad_norm": 0.5951418373485672, "learning_rate": 2.8417177721730238e-06, "loss": 0.018364088237285615, "step": 52655 }, { "epoch": 0.4956235294117647, "grad_norm": 0.5116385646191399, "learning_rate": 2.841582857665009e-06, "loss": 0.016045151650905608, "step": 52660 }, { "epoch": 0.49567058823529414, "grad_norm": 0.5483864724593411, "learning_rate": 2.841447962370931e-06, "loss": 0.019764885306358337, "step": 52665 }, { "epoch": 0.49571764705882354, "grad_norm": 0.604649443169052, "learning_rate": 2.841313086286232e-06, "loss": 0.017768362164497377, "step": 52670 }, { "epoch": 0.49576470588235294, "grad_norm": 0.43855203452603186, "learning_rate": 2.841178229406352e-06, "loss": 0.015312525629997253, "step": 52675 }, { "epoch": 0.49581176470588234, "grad_norm": 0.55776079097455, "learning_rate": 2.8410433917267335e-06, "loss": 0.013385853171348572, "step": 52680 }, { "epoch": 0.49585882352941174, "grad_norm": 0.6383434288982671, "learning_rate": 2.8409085732428217e-06, "loss": 0.017724575102329256, "step": 52685 }, { "epoch": 0.4959058823529412, "grad_norm": 1.0063556340059794, "learning_rate": 2.8407737739500614e-06, "loss": 0.018923962116241456, "step": 52690 }, { "epoch": 0.4959529411764706, "grad_norm": 0.5556242743671815, "learning_rate": 2.8406389938439017e-06, "loss": 0.023559121787548064, "step": 52695 }, { "epoch": 0.496, "grad_norm": 0.4639207384913027, "learning_rate": 2.8405042329197895e-06, "loss": 0.016892284154891968, "step": 52700 }, { "epoch": 0.4960470588235294, "grad_norm": 0.5658519250192775, "learning_rate": 2.840369491173177e-06, "loss": 0.01829630434513092, "step": 52705 }, { "epoch": 0.4960941176470588, "grad_norm": 0.5656634556613792, "learning_rate": 2.8402347685995154e-06, "loss": 0.013723349571228028, "step": 52710 }, { "epoch": 0.49614117647058825, "grad_norm": 0.5015259396675944, "learning_rate": 2.840100065194257e-06, "loss": 0.014599694311618805, "step": 52715 }, { "epoch": 0.49618823529411765, "grad_norm": 0.4640334056898389, "learning_rate": 2.839965380952859e-06, "loss": 0.015766134858131407, "step": 52720 }, { "epoch": 0.49623529411764705, "grad_norm": 0.6388558436061316, "learning_rate": 2.8398307158707766e-06, "loss": 0.01851399540901184, "step": 52725 }, { "epoch": 0.49628235294117645, "grad_norm": 0.617181805568242, "learning_rate": 2.8396960699434674e-06, "loss": 0.01636712849140167, "step": 52730 }, { "epoch": 0.4963294117647059, "grad_norm": 0.5470282754051475, "learning_rate": 2.839561443166392e-06, "loss": 0.017909575998783112, "step": 52735 }, { "epoch": 0.4963764705882353, "grad_norm": 0.47292055966392904, "learning_rate": 2.83942683553501e-06, "loss": 0.02000851631164551, "step": 52740 }, { "epoch": 0.4964235294117647, "grad_norm": 0.34861167863346515, "learning_rate": 2.8392922470447848e-06, "loss": 0.014376547932624818, "step": 52745 }, { "epoch": 0.4964705882352941, "grad_norm": 0.4763479581404615, "learning_rate": 2.83915767769118e-06, "loss": 0.016980947554111482, "step": 52750 }, { "epoch": 0.4965176470588235, "grad_norm": 0.7001676687637305, "learning_rate": 2.8390231274696615e-06, "loss": 0.015716753900051117, "step": 52755 }, { "epoch": 0.49656470588235296, "grad_norm": 0.8421033883066733, "learning_rate": 2.838888596375696e-06, "loss": 0.0208291232585907, "step": 52760 }, { "epoch": 0.49661176470588236, "grad_norm": 0.6232017645964366, "learning_rate": 2.8387540844047523e-06, "loss": 0.02124873697757721, "step": 52765 }, { "epoch": 0.49665882352941176, "grad_norm": 0.5599406422089532, "learning_rate": 2.8386195915522996e-06, "loss": 0.021904316544532777, "step": 52770 }, { "epoch": 0.49670588235294116, "grad_norm": 0.710187258478676, "learning_rate": 2.83848511781381e-06, "loss": 0.017208385467529296, "step": 52775 }, { "epoch": 0.49675294117647056, "grad_norm": 0.8291435376965853, "learning_rate": 2.838350663184757e-06, "loss": 0.015062466263771057, "step": 52780 }, { "epoch": 0.4968, "grad_norm": 0.36791867871307704, "learning_rate": 2.838216227660614e-06, "loss": 0.014249080419540405, "step": 52785 }, { "epoch": 0.4968470588235294, "grad_norm": 0.5284545940370453, "learning_rate": 2.8380818112368585e-06, "loss": 0.015723595023155214, "step": 52790 }, { "epoch": 0.4968941176470588, "grad_norm": 0.4176558835727774, "learning_rate": 2.837947413908966e-06, "loss": 0.016546669602394103, "step": 52795 }, { "epoch": 0.4969411764705882, "grad_norm": 0.7449505690220187, "learning_rate": 2.8378130356724177e-06, "loss": 0.018953323364257812, "step": 52800 }, { "epoch": 0.4969882352941176, "grad_norm": 0.636700660613245, "learning_rate": 2.8376786765226923e-06, "loss": 0.023522442579269408, "step": 52805 }, { "epoch": 0.4970352941176471, "grad_norm": 0.3398086074475631, "learning_rate": 2.8375443364552724e-06, "loss": 0.015285670757293701, "step": 52810 }, { "epoch": 0.4970823529411765, "grad_norm": 0.40061270926469555, "learning_rate": 2.837410015465642e-06, "loss": 0.012828290462493896, "step": 52815 }, { "epoch": 0.4971294117647059, "grad_norm": 0.3804300629374921, "learning_rate": 2.837275713549286e-06, "loss": 0.018128491938114166, "step": 52820 }, { "epoch": 0.4971764705882353, "grad_norm": 0.6772103804031915, "learning_rate": 2.8371414307016894e-06, "loss": 0.024044889211654662, "step": 52825 }, { "epoch": 0.49722352941176473, "grad_norm": 0.5653733116291816, "learning_rate": 2.8370071669183426e-06, "loss": 0.016571709513664247, "step": 52830 }, { "epoch": 0.49727058823529413, "grad_norm": 0.442090114892302, "learning_rate": 2.8368729221947334e-06, "loss": 0.015553018450737, "step": 52835 }, { "epoch": 0.49731764705882353, "grad_norm": 0.5993483590806854, "learning_rate": 2.8367386965263527e-06, "loss": 0.020848248898983002, "step": 52840 }, { "epoch": 0.49736470588235293, "grad_norm": 0.4742406374712405, "learning_rate": 2.836604489908694e-06, "loss": 0.017993226647377014, "step": 52845 }, { "epoch": 0.49741176470588233, "grad_norm": 0.4996239532697153, "learning_rate": 2.8364703023372507e-06, "loss": 0.014800076186656953, "step": 52850 }, { "epoch": 0.4974588235294118, "grad_norm": 0.8692104145798231, "learning_rate": 2.8363361338075184e-06, "loss": 0.016231992840766908, "step": 52855 }, { "epoch": 0.4975058823529412, "grad_norm": 0.4913135064781547, "learning_rate": 2.8362019843149942e-06, "loss": 0.01955161690711975, "step": 52860 }, { "epoch": 0.4975529411764706, "grad_norm": 0.41526008858100383, "learning_rate": 2.8360678538551763e-06, "loss": 0.0184002548456192, "step": 52865 }, { "epoch": 0.4976, "grad_norm": 0.5515614764543937, "learning_rate": 2.835933742423564e-06, "loss": 0.015359410643577575, "step": 52870 }, { "epoch": 0.4976470588235294, "grad_norm": 0.613767743830035, "learning_rate": 2.8357996500156598e-06, "loss": 0.022299017012119293, "step": 52875 }, { "epoch": 0.49769411764705884, "grad_norm": 0.49771571617174, "learning_rate": 2.835665576626966e-06, "loss": 0.01890549808740616, "step": 52880 }, { "epoch": 0.49774117647058824, "grad_norm": 0.4925517881104386, "learning_rate": 2.835531522252987e-06, "loss": 0.018462203443050385, "step": 52885 }, { "epoch": 0.49778823529411764, "grad_norm": 0.5118271450263148, "learning_rate": 2.835397486889229e-06, "loss": 0.018379056453704835, "step": 52890 }, { "epoch": 0.49783529411764704, "grad_norm": 0.5116629691810307, "learning_rate": 2.8352634705311994e-06, "loss": 0.01779065728187561, "step": 52895 }, { "epoch": 0.4978823529411765, "grad_norm": 0.4857168339223752, "learning_rate": 2.835129473174407e-06, "loss": 0.018471620976924896, "step": 52900 }, { "epoch": 0.4979294117647059, "grad_norm": 0.42669994856430804, "learning_rate": 2.834995494814361e-06, "loss": 0.016037000715732573, "step": 52905 }, { "epoch": 0.4979764705882353, "grad_norm": 0.6031719263527895, "learning_rate": 2.8348615354465753e-06, "loss": 0.02166740596294403, "step": 52910 }, { "epoch": 0.4980235294117647, "grad_norm": 0.5330515895630781, "learning_rate": 2.834727595066561e-06, "loss": 0.01588687002658844, "step": 52915 }, { "epoch": 0.4980705882352941, "grad_norm": 0.5469535135461352, "learning_rate": 2.8345936736698347e-06, "loss": 0.017363280057907104, "step": 52920 }, { "epoch": 0.49811764705882355, "grad_norm": 0.7324510977068072, "learning_rate": 2.834459771251912e-06, "loss": 0.023265767097473144, "step": 52925 }, { "epoch": 0.49816470588235295, "grad_norm": 0.5374762737312113, "learning_rate": 2.83432588780831e-06, "loss": 0.019429126381874086, "step": 52930 }, { "epoch": 0.49821176470588235, "grad_norm": 0.5995558659670236, "learning_rate": 2.8341920233345487e-06, "loss": 0.018118518590927123, "step": 52935 }, { "epoch": 0.49825882352941175, "grad_norm": 0.562690658985673, "learning_rate": 2.8340581778261493e-06, "loss": 0.022807517647743226, "step": 52940 }, { "epoch": 0.49830588235294115, "grad_norm": 0.6017955460472179, "learning_rate": 2.8339243512786328e-06, "loss": 0.015980488061904906, "step": 52945 }, { "epoch": 0.4983529411764706, "grad_norm": 0.26595558873437275, "learning_rate": 2.833790543687524e-06, "loss": 0.01903391778469086, "step": 52950 }, { "epoch": 0.4984, "grad_norm": 0.3893883660527899, "learning_rate": 2.8336567550483474e-06, "loss": 0.019797012209892273, "step": 52955 }, { "epoch": 0.4984470588235294, "grad_norm": 0.4172235878853972, "learning_rate": 2.83352298535663e-06, "loss": 0.015999211370944975, "step": 52960 }, { "epoch": 0.4984941176470588, "grad_norm": 0.6443028525233694, "learning_rate": 2.833389234607899e-06, "loss": 0.017050859332084656, "step": 52965 }, { "epoch": 0.4985411764705882, "grad_norm": 0.6930080109212015, "learning_rate": 2.833255502797685e-06, "loss": 0.02229509949684143, "step": 52970 }, { "epoch": 0.49858823529411767, "grad_norm": 0.44265728560421275, "learning_rate": 2.8331217899215187e-06, "loss": 0.015560765564441682, "step": 52975 }, { "epoch": 0.49863529411764707, "grad_norm": 0.5557025811572023, "learning_rate": 2.8329880959749327e-06, "loss": 0.02144493758678436, "step": 52980 }, { "epoch": 0.49868235294117647, "grad_norm": 0.5102105376454216, "learning_rate": 2.832854420953461e-06, "loss": 0.015679720044136047, "step": 52985 }, { "epoch": 0.49872941176470587, "grad_norm": 0.4965819073061259, "learning_rate": 2.832720764852639e-06, "loss": 0.017219249904155732, "step": 52990 }, { "epoch": 0.4987764705882353, "grad_norm": 0.5123070631857012, "learning_rate": 2.832587127668004e-06, "loss": 0.016865074634552002, "step": 52995 }, { "epoch": 0.4988235294117647, "grad_norm": 0.4448100016825464, "learning_rate": 2.832453509395094e-06, "loss": 0.018334949016571046, "step": 53000 }, { "epoch": 0.4988705882352941, "grad_norm": 0.2935146643591006, "learning_rate": 2.832319910029449e-06, "loss": 0.014613303542137145, "step": 53005 }, { "epoch": 0.4989176470588235, "grad_norm": 0.5365111835640636, "learning_rate": 2.8321863295666113e-06, "loss": 0.016673623025417326, "step": 53010 }, { "epoch": 0.4989647058823529, "grad_norm": 0.5099061561802435, "learning_rate": 2.8320527680021224e-06, "loss": 0.019447845220565797, "step": 53015 }, { "epoch": 0.4990117647058824, "grad_norm": 0.5516124750875568, "learning_rate": 2.8319192253315275e-06, "loss": 0.016998353600502013, "step": 53020 }, { "epoch": 0.4990588235294118, "grad_norm": 0.44960153221414445, "learning_rate": 2.8317857015503718e-06, "loss": 0.023571233451366424, "step": 53025 }, { "epoch": 0.4991058823529412, "grad_norm": 0.461910709904134, "learning_rate": 2.8316521966542027e-06, "loss": 0.02133915275335312, "step": 53030 }, { "epoch": 0.4991529411764706, "grad_norm": 0.28956418180488663, "learning_rate": 2.83151871063857e-06, "loss": 0.01382337063550949, "step": 53035 }, { "epoch": 0.4992, "grad_norm": 0.54175802036668, "learning_rate": 2.8313852434990223e-06, "loss": 0.015131333470344543, "step": 53040 }, { "epoch": 0.49924705882352943, "grad_norm": 0.4454922328972315, "learning_rate": 2.8312517952311126e-06, "loss": 0.015983666479587554, "step": 53045 }, { "epoch": 0.49929411764705883, "grad_norm": 0.5093365727667738, "learning_rate": 2.8311183658303933e-06, "loss": 0.016561296582221986, "step": 53050 }, { "epoch": 0.49934117647058823, "grad_norm": 0.5920060304030798, "learning_rate": 2.830984955292419e-06, "loss": 0.018435287475585937, "step": 53055 }, { "epoch": 0.49938823529411763, "grad_norm": 0.4420028712388606, "learning_rate": 2.8308515636127466e-06, "loss": 0.018075600266456604, "step": 53060 }, { "epoch": 0.49943529411764703, "grad_norm": 0.5189483695594105, "learning_rate": 2.830718190786933e-06, "loss": 0.02078935652971268, "step": 53065 }, { "epoch": 0.4994823529411765, "grad_norm": 0.4139370700695827, "learning_rate": 2.8305848368105365e-06, "loss": 0.016539856791496277, "step": 53070 }, { "epoch": 0.4995294117647059, "grad_norm": 0.5202626015119808, "learning_rate": 2.8304515016791184e-06, "loss": 0.017032718658447264, "step": 53075 }, { "epoch": 0.4995764705882353, "grad_norm": 0.5903732109616768, "learning_rate": 2.830318185388241e-06, "loss": 0.01817013919353485, "step": 53080 }, { "epoch": 0.4996235294117647, "grad_norm": 0.6837284068901642, "learning_rate": 2.8301848879334675e-06, "loss": 0.018246006965637208, "step": 53085 }, { "epoch": 0.49967058823529414, "grad_norm": 0.44088054538912647, "learning_rate": 2.830051609310362e-06, "loss": 0.01704854816198349, "step": 53090 }, { "epoch": 0.49971764705882354, "grad_norm": 0.5662008424761187, "learning_rate": 2.829918349514492e-06, "loss": 0.013808530569076539, "step": 53095 }, { "epoch": 0.49976470588235294, "grad_norm": 0.8365190957243877, "learning_rate": 2.829785108541424e-06, "loss": 0.023257426917552948, "step": 53100 }, { "epoch": 0.49981176470588234, "grad_norm": 0.7001035422874744, "learning_rate": 2.8296518863867285e-06, "loss": 0.02031940519809723, "step": 53105 }, { "epoch": 0.49985882352941174, "grad_norm": 0.6998125461444807, "learning_rate": 2.829518683045975e-06, "loss": 0.01649990975856781, "step": 53110 }, { "epoch": 0.4999058823529412, "grad_norm": 0.41503541251543835, "learning_rate": 2.829385498514736e-06, "loss": 0.017869243025779726, "step": 53115 }, { "epoch": 0.4999529411764706, "grad_norm": 0.44353171899643085, "learning_rate": 2.8292523327885865e-06, "loss": 0.018162713944911958, "step": 53120 }, { "epoch": 0.5, "grad_norm": 0.6392767335488928, "learning_rate": 2.8291191858631e-06, "loss": 0.023774005472660065, "step": 53125 }, { "epoch": 0.5000470588235294, "grad_norm": 0.35658731487277207, "learning_rate": 2.8289860577338533e-06, "loss": 0.013489609956741333, "step": 53130 }, { "epoch": 0.5000941176470588, "grad_norm": 0.3911004601130001, "learning_rate": 2.8288529483964255e-06, "loss": 0.015909889340400697, "step": 53135 }, { "epoch": 0.5001411764705882, "grad_norm": 0.45404538095602204, "learning_rate": 2.8287198578463943e-06, "loss": 0.015365338325500489, "step": 53140 }, { "epoch": 0.5001882352941176, "grad_norm": 0.6997297368240566, "learning_rate": 2.828586786079342e-06, "loss": 0.016411317884922026, "step": 53145 }, { "epoch": 0.5002352941176471, "grad_norm": 0.46562693725466936, "learning_rate": 2.82845373309085e-06, "loss": 0.015238408744335175, "step": 53150 }, { "epoch": 0.5002823529411765, "grad_norm": 0.48469850284755694, "learning_rate": 2.828320698876503e-06, "loss": 0.014727565646171569, "step": 53155 }, { "epoch": 0.5003294117647059, "grad_norm": 0.45175092051174687, "learning_rate": 2.8281876834318854e-06, "loss": 0.016397589445114137, "step": 53160 }, { "epoch": 0.5003764705882353, "grad_norm": 0.6161409268502143, "learning_rate": 2.8280546867525848e-06, "loss": 0.0223580926656723, "step": 53165 }, { "epoch": 0.5004235294117647, "grad_norm": 0.555474927092957, "learning_rate": 2.8279217088341894e-06, "loss": 0.018831640481948853, "step": 53170 }, { "epoch": 0.5004705882352941, "grad_norm": 0.5018523253105749, "learning_rate": 2.827788749672288e-06, "loss": 0.0158893883228302, "step": 53175 }, { "epoch": 0.5005176470588235, "grad_norm": 0.46279597054845006, "learning_rate": 2.827655809262472e-06, "loss": 0.014911223948001862, "step": 53180 }, { "epoch": 0.5005647058823529, "grad_norm": 0.5041392116195695, "learning_rate": 2.827522887600334e-06, "loss": 0.019884517788887023, "step": 53185 }, { "epoch": 0.5006117647058823, "grad_norm": 0.43035373943458693, "learning_rate": 2.8273899846814684e-06, "loss": 0.02407699227333069, "step": 53190 }, { "epoch": 0.5006588235294117, "grad_norm": 0.4344215589114263, "learning_rate": 2.82725710050147e-06, "loss": 0.01573539078235626, "step": 53195 }, { "epoch": 0.5007058823529412, "grad_norm": 0.44262690052519, "learning_rate": 2.827124235055936e-06, "loss": 0.01758727878332138, "step": 53200 }, { "epoch": 0.5007529411764706, "grad_norm": 0.6027303716539981, "learning_rate": 2.8269913883404648e-06, "loss": 0.018514546751976012, "step": 53205 }, { "epoch": 0.5008, "grad_norm": 0.42067218223778197, "learning_rate": 2.8268585603506556e-06, "loss": 0.021262609958648683, "step": 53210 }, { "epoch": 0.5008470588235294, "grad_norm": 0.6664604300833289, "learning_rate": 2.8267257510821104e-06, "loss": 0.017719459533691407, "step": 53215 }, { "epoch": 0.5008941176470588, "grad_norm": 0.5422597747357291, "learning_rate": 2.826592960530432e-06, "loss": 0.016777591407299043, "step": 53220 }, { "epoch": 0.5009411764705882, "grad_norm": 1.541697456344226, "learning_rate": 2.8264601886912233e-06, "loss": 0.019232022762298583, "step": 53225 }, { "epoch": 0.5009882352941176, "grad_norm": 0.7473831685561564, "learning_rate": 2.8263274355600913e-06, "loss": 0.02018427848815918, "step": 53230 }, { "epoch": 0.501035294117647, "grad_norm": 0.7237493611028024, "learning_rate": 2.826194701132642e-06, "loss": 0.01848914921283722, "step": 53235 }, { "epoch": 0.5010823529411764, "grad_norm": 0.7024894577938186, "learning_rate": 2.826061985404484e-06, "loss": 0.021106189489364623, "step": 53240 }, { "epoch": 0.5011294117647059, "grad_norm": 0.5284803057537045, "learning_rate": 2.825929288371228e-06, "loss": 0.014097300171852113, "step": 53245 }, { "epoch": 0.5011764705882353, "grad_norm": 0.4362252670360858, "learning_rate": 2.8257966100284843e-06, "loss": 0.016006579995155333, "step": 53250 }, { "epoch": 0.5012235294117647, "grad_norm": 0.5659839533998329, "learning_rate": 2.8256639503718663e-06, "loss": 0.01784307062625885, "step": 53255 }, { "epoch": 0.5012705882352941, "grad_norm": 0.49482856737221986, "learning_rate": 2.825531309396988e-06, "loss": 0.014596784114837646, "step": 53260 }, { "epoch": 0.5013176470588235, "grad_norm": 0.3129291697002568, "learning_rate": 2.8253986870994655e-06, "loss": 0.01489812433719635, "step": 53265 }, { "epoch": 0.5013647058823529, "grad_norm": 0.4714925497620336, "learning_rate": 2.825266083474915e-06, "loss": 0.016904473304748535, "step": 53270 }, { "epoch": 0.5014117647058823, "grad_norm": 0.5964085186221553, "learning_rate": 2.825133498518956e-06, "loss": 0.020410941541194917, "step": 53275 }, { "epoch": 0.5014588235294117, "grad_norm": 0.39380606385374206, "learning_rate": 2.8250009322272077e-06, "loss": 0.017322638630867006, "step": 53280 }, { "epoch": 0.5015058823529411, "grad_norm": 0.4490867175708403, "learning_rate": 2.824868384595292e-06, "loss": 0.01792309284210205, "step": 53285 }, { "epoch": 0.5015529411764705, "grad_norm": 0.4738019734019173, "learning_rate": 2.824735855618832e-06, "loss": 0.01870785653591156, "step": 53290 }, { "epoch": 0.5016, "grad_norm": 0.6533446120714823, "learning_rate": 2.8246033452934514e-06, "loss": 0.01661435216665268, "step": 53295 }, { "epoch": 0.5016470588235294, "grad_norm": 0.49165759434781164, "learning_rate": 2.824470853614776e-06, "loss": 0.01643402725458145, "step": 53300 }, { "epoch": 0.5016941176470588, "grad_norm": 0.43469855886355724, "learning_rate": 2.824338380578433e-06, "loss": 0.013672801852226257, "step": 53305 }, { "epoch": 0.5017411764705882, "grad_norm": 0.5969981992032621, "learning_rate": 2.8242059261800518e-06, "loss": 0.02170599102973938, "step": 53310 }, { "epoch": 0.5017882352941176, "grad_norm": 1.026548012411139, "learning_rate": 2.8240734904152613e-06, "loss": 0.01384829580783844, "step": 53315 }, { "epoch": 0.501835294117647, "grad_norm": 0.7121256123741003, "learning_rate": 2.823941073279693e-06, "loss": 0.017401671409606932, "step": 53320 }, { "epoch": 0.5018823529411764, "grad_norm": 0.47218082979851755, "learning_rate": 2.823808674768981e-06, "loss": 0.02049923837184906, "step": 53325 }, { "epoch": 0.5019294117647058, "grad_norm": 0.6439360280477797, "learning_rate": 2.823676294878759e-06, "loss": 0.020197415351867677, "step": 53330 }, { "epoch": 0.5019764705882352, "grad_norm": 0.5088887392621576, "learning_rate": 2.8235439336046623e-06, "loss": 0.018132321536540985, "step": 53335 }, { "epoch": 0.5020235294117648, "grad_norm": 0.5715905491665996, "learning_rate": 2.8234115909423287e-06, "loss": 0.022375854849815368, "step": 53340 }, { "epoch": 0.5020705882352942, "grad_norm": 0.4394809781751801, "learning_rate": 2.8232792668873967e-06, "loss": 0.019101765751838685, "step": 53345 }, { "epoch": 0.5021176470588236, "grad_norm": 0.5654509266987063, "learning_rate": 2.8231469614355055e-06, "loss": 0.01570759117603302, "step": 53350 }, { "epoch": 0.502164705882353, "grad_norm": 0.41502829682980236, "learning_rate": 2.8230146745822983e-06, "loss": 0.019445541501045226, "step": 53355 }, { "epoch": 0.5022117647058824, "grad_norm": 0.8672363818989247, "learning_rate": 2.822882406323417e-06, "loss": 0.020241770148277282, "step": 53360 }, { "epoch": 0.5022588235294118, "grad_norm": 0.4407745052947064, "learning_rate": 2.8227501566545057e-06, "loss": 0.014957974851131439, "step": 53365 }, { "epoch": 0.5023058823529412, "grad_norm": 0.44806489691418266, "learning_rate": 2.822617925571211e-06, "loss": 0.021324929594993592, "step": 53370 }, { "epoch": 0.5023529411764706, "grad_norm": 0.4846133838271356, "learning_rate": 2.8224857130691795e-06, "loss": 0.01755220293998718, "step": 53375 }, { "epoch": 0.5024, "grad_norm": 0.5688593475195041, "learning_rate": 2.8223535191440603e-06, "loss": 0.024320361018180848, "step": 53380 }, { "epoch": 0.5024470588235294, "grad_norm": 0.5202963746493792, "learning_rate": 2.822221343791503e-06, "loss": 0.018740510940551756, "step": 53385 }, { "epoch": 0.5024941176470589, "grad_norm": 0.39178399178082174, "learning_rate": 2.8220891870071594e-06, "loss": 0.0164920836687088, "step": 53390 }, { "epoch": 0.5025411764705883, "grad_norm": 0.4065128230536039, "learning_rate": 2.821957048786682e-06, "loss": 0.019943724572658538, "step": 53395 }, { "epoch": 0.5025882352941177, "grad_norm": 0.5907274599275809, "learning_rate": 2.8218249291257255e-06, "loss": 0.021066117286682128, "step": 53400 }, { "epoch": 0.5026352941176471, "grad_norm": 0.44262047857345294, "learning_rate": 2.8216928280199458e-06, "loss": 0.014618158340454102, "step": 53405 }, { "epoch": 0.5026823529411765, "grad_norm": 0.7881587568333708, "learning_rate": 2.821560745465e-06, "loss": 0.016055554151535034, "step": 53410 }, { "epoch": 0.5027294117647059, "grad_norm": 0.45686470321638245, "learning_rate": 2.8214286814565465e-06, "loss": 0.019350314140319826, "step": 53415 }, { "epoch": 0.5027764705882353, "grad_norm": 0.42131927310668305, "learning_rate": 2.8212966359902456e-06, "loss": 0.020281612873077393, "step": 53420 }, { "epoch": 0.5028235294117647, "grad_norm": 0.38511482922421086, "learning_rate": 2.8211646090617576e-06, "loss": 0.017267940938472746, "step": 53425 }, { "epoch": 0.5028705882352941, "grad_norm": 0.40938591731323065, "learning_rate": 2.8210326006667478e-06, "loss": 0.01827298551797867, "step": 53430 }, { "epoch": 0.5029176470588236, "grad_norm": 0.3256369699556139, "learning_rate": 2.8209006108008784e-06, "loss": 0.017329689860343934, "step": 53435 }, { "epoch": 0.502964705882353, "grad_norm": 0.5808131031678286, "learning_rate": 2.820768639459816e-06, "loss": 0.019019144773483276, "step": 53440 }, { "epoch": 0.5030117647058824, "grad_norm": 0.3422579995942126, "learning_rate": 2.8206366866392274e-06, "loss": 0.023180460929870604, "step": 53445 }, { "epoch": 0.5030588235294118, "grad_norm": 0.5242943933993229, "learning_rate": 2.820504752334781e-06, "loss": 0.016990143060684203, "step": 53450 }, { "epoch": 0.5031058823529412, "grad_norm": 0.5439732502948131, "learning_rate": 2.820372836542148e-06, "loss": 0.015551000833511353, "step": 53455 }, { "epoch": 0.5031529411764706, "grad_norm": 0.4597480149776137, "learning_rate": 2.8202409392569983e-06, "loss": 0.015197345614433288, "step": 53460 }, { "epoch": 0.5032, "grad_norm": 0.395573065584906, "learning_rate": 2.8201090604750052e-06, "loss": 0.016899019479751587, "step": 53465 }, { "epoch": 0.5032470588235294, "grad_norm": 0.6345121126278349, "learning_rate": 2.8199772001918428e-06, "loss": 0.014997340738773346, "step": 53470 }, { "epoch": 0.5032941176470588, "grad_norm": 0.40145281183426496, "learning_rate": 2.8198453584031878e-06, "loss": 0.017142859101295472, "step": 53475 }, { "epoch": 0.5033411764705882, "grad_norm": 0.4197096083114961, "learning_rate": 2.8197135351047157e-06, "loss": 0.015983696281909942, "step": 53480 }, { "epoch": 0.5033882352941177, "grad_norm": 0.7150409428861743, "learning_rate": 2.8195817302921057e-06, "loss": 0.019602492451667786, "step": 53485 }, { "epoch": 0.5034352941176471, "grad_norm": 0.5960201863133212, "learning_rate": 2.8194499439610377e-06, "loss": 0.013950680196285248, "step": 53490 }, { "epoch": 0.5034823529411765, "grad_norm": 0.46239305303168826, "learning_rate": 2.819318176107193e-06, "loss": 0.01650842875242233, "step": 53495 }, { "epoch": 0.5035294117647059, "grad_norm": 0.5096408246296454, "learning_rate": 2.8191864267262542e-06, "loss": 0.01916680335998535, "step": 53500 }, { "epoch": 0.5035764705882353, "grad_norm": 0.4382804733089399, "learning_rate": 2.819054695813906e-06, "loss": 0.01864844411611557, "step": 53505 }, { "epoch": 0.5036235294117647, "grad_norm": 0.7218144331287861, "learning_rate": 2.8189229833658325e-06, "loss": 0.019100603461265565, "step": 53510 }, { "epoch": 0.5036705882352941, "grad_norm": 1.3294380689397354, "learning_rate": 2.818791289377722e-06, "loss": 0.010593089461326598, "step": 53515 }, { "epoch": 0.5037176470588235, "grad_norm": 0.4918634559377106, "learning_rate": 2.818659613845263e-06, "loss": 0.022623610496520997, "step": 53520 }, { "epoch": 0.5037647058823529, "grad_norm": 0.5526321184842888, "learning_rate": 2.818527956764144e-06, "loss": 0.019149024784564973, "step": 53525 }, { "epoch": 0.5038117647058824, "grad_norm": 0.517077062267758, "learning_rate": 2.818396318130057e-06, "loss": 0.019920185208320618, "step": 53530 }, { "epoch": 0.5038588235294118, "grad_norm": 0.388569680887872, "learning_rate": 2.818264697938694e-06, "loss": 0.01728326380252838, "step": 53535 }, { "epoch": 0.5039058823529412, "grad_norm": 1.017698285164432, "learning_rate": 2.8181330961857496e-06, "loss": 0.022917407751083373, "step": 53540 }, { "epoch": 0.5039529411764706, "grad_norm": 0.6094034551152522, "learning_rate": 2.818001512866919e-06, "loss": 0.022903460264205932, "step": 53545 }, { "epoch": 0.504, "grad_norm": 0.5452864667154529, "learning_rate": 2.8178699479778996e-06, "loss": 0.01697266399860382, "step": 53550 }, { "epoch": 0.5040470588235294, "grad_norm": 0.4558773558564846, "learning_rate": 2.8177384015143887e-06, "loss": 0.01929534673690796, "step": 53555 }, { "epoch": 0.5040941176470588, "grad_norm": 0.549642282530959, "learning_rate": 2.8176068734720864e-06, "loss": 0.03553933203220368, "step": 53560 }, { "epoch": 0.5041411764705882, "grad_norm": 0.6465968605197802, "learning_rate": 2.8174753638466935e-06, "loss": 0.021617376804351808, "step": 53565 }, { "epoch": 0.5041882352941176, "grad_norm": 0.5784482752798048, "learning_rate": 2.8173438726339127e-06, "loss": 0.017851820588111876, "step": 53570 }, { "epoch": 0.504235294117647, "grad_norm": 0.5562401377478001, "learning_rate": 2.817212399829447e-06, "loss": 0.02045438587665558, "step": 53575 }, { "epoch": 0.5042823529411765, "grad_norm": 0.6299306632360386, "learning_rate": 2.8170809454290026e-06, "loss": 0.018688708543777466, "step": 53580 }, { "epoch": 0.5043294117647059, "grad_norm": 0.59151049646109, "learning_rate": 2.8169495094282857e-06, "loss": 0.017954812943935396, "step": 53585 }, { "epoch": 0.5043764705882353, "grad_norm": 0.650231649387016, "learning_rate": 2.8168180918230047e-06, "loss": 0.015461510419845581, "step": 53590 }, { "epoch": 0.5044235294117647, "grad_norm": 0.5467416551450583, "learning_rate": 2.8166866926088685e-06, "loss": 0.019785596430301665, "step": 53595 }, { "epoch": 0.5044705882352941, "grad_norm": 0.5909702352491386, "learning_rate": 2.8165553117815887e-06, "loss": 0.022149020433425905, "step": 53600 }, { "epoch": 0.5045176470588235, "grad_norm": 0.8999195168013656, "learning_rate": 2.8164239493368766e-06, "loss": 0.020387905836105346, "step": 53605 }, { "epoch": 0.5045647058823529, "grad_norm": 0.44411098874781363, "learning_rate": 2.8162926052704464e-06, "loss": 0.020426352322101594, "step": 53610 }, { "epoch": 0.5046117647058823, "grad_norm": 1.2887064493871023, "learning_rate": 2.816161279578013e-06, "loss": 0.02078389674425125, "step": 53615 }, { "epoch": 0.5046588235294117, "grad_norm": 0.3737422646888746, "learning_rate": 2.816029972255293e-06, "loss": 0.015383675694465637, "step": 53620 }, { "epoch": 0.5047058823529412, "grad_norm": 0.5376048603057564, "learning_rate": 2.8158986832980044e-06, "loss": 0.01689123213291168, "step": 53625 }, { "epoch": 0.5047529411764706, "grad_norm": 0.3939870555604059, "learning_rate": 2.8157674127018656e-06, "loss": 0.012428107857704162, "step": 53630 }, { "epoch": 0.5048, "grad_norm": 0.564936302276466, "learning_rate": 2.8156361604625982e-06, "loss": 0.01824682354927063, "step": 53635 }, { "epoch": 0.5048470588235294, "grad_norm": 0.4679496346327662, "learning_rate": 2.815504926575924e-06, "loss": 0.01504894495010376, "step": 53640 }, { "epoch": 0.5048941176470588, "grad_norm": 0.47652745432753735, "learning_rate": 2.8153737110375657e-06, "loss": 0.015434789657592773, "step": 53645 }, { "epoch": 0.5049411764705882, "grad_norm": 0.4580616657317192, "learning_rate": 2.815242513843248e-06, "loss": 0.014986249804496764, "step": 53650 }, { "epoch": 0.5049882352941176, "grad_norm": 0.5341572334429202, "learning_rate": 2.8151113349886994e-06, "loss": 0.018539293110370635, "step": 53655 }, { "epoch": 0.505035294117647, "grad_norm": 0.5935428581016031, "learning_rate": 2.8149801744696446e-06, "loss": 0.019562169909477234, "step": 53660 }, { "epoch": 0.5050823529411764, "grad_norm": 0.4691297149807225, "learning_rate": 2.8148490322818143e-06, "loss": 0.020299157500267027, "step": 53665 }, { "epoch": 0.5051294117647059, "grad_norm": 0.5849320385266112, "learning_rate": 2.8147179084209386e-06, "loss": 0.017417436838150023, "step": 53670 }, { "epoch": 0.5051764705882353, "grad_norm": 0.6857218717068002, "learning_rate": 2.814586802882749e-06, "loss": 0.023906487226486205, "step": 53675 }, { "epoch": 0.5052235294117647, "grad_norm": 0.4282420227214458, "learning_rate": 2.8144557156629785e-06, "loss": 0.019456182420253754, "step": 53680 }, { "epoch": 0.5052705882352941, "grad_norm": 0.46375456154543204, "learning_rate": 2.814324646757362e-06, "loss": 0.015730154514312745, "step": 53685 }, { "epoch": 0.5053176470588235, "grad_norm": 0.29619820741514796, "learning_rate": 2.8141935961616356e-06, "loss": 0.018564772605895997, "step": 53690 }, { "epoch": 0.5053647058823529, "grad_norm": 0.6921430661103477, "learning_rate": 2.814062563871537e-06, "loss": 0.01678953766822815, "step": 53695 }, { "epoch": 0.5054117647058823, "grad_norm": 0.48405074074322885, "learning_rate": 2.8139315498828034e-06, "loss": 0.022335723042488098, "step": 53700 }, { "epoch": 0.5054588235294117, "grad_norm": 0.4742799623246158, "learning_rate": 2.8138005541911768e-06, "loss": 0.019012796878814697, "step": 53705 }, { "epoch": 0.5055058823529411, "grad_norm": 3.8652986123496245, "learning_rate": 2.8136695767923972e-06, "loss": 0.01892789304256439, "step": 53710 }, { "epoch": 0.5055529411764705, "grad_norm": 0.5638523602256125, "learning_rate": 2.813538617682209e-06, "loss": 0.01941792666912079, "step": 53715 }, { "epoch": 0.5056, "grad_norm": 0.5295741225883365, "learning_rate": 2.8134076768563545e-06, "loss": 0.016275663673877717, "step": 53720 }, { "epoch": 0.5056470588235294, "grad_norm": 0.5724260254622057, "learning_rate": 2.8132767543105816e-06, "loss": 0.014223767817020417, "step": 53725 }, { "epoch": 0.5056941176470588, "grad_norm": 0.7066205766558501, "learning_rate": 2.8131458500406354e-06, "loss": 0.0213690847158432, "step": 53730 }, { "epoch": 0.5057411764705882, "grad_norm": 0.7317470319588301, "learning_rate": 2.813014964042266e-06, "loss": 0.017966936528682708, "step": 53735 }, { "epoch": 0.5057882352941176, "grad_norm": 0.4394685724387417, "learning_rate": 2.812884096311222e-06, "loss": 0.019064465165138246, "step": 53740 }, { "epoch": 0.505835294117647, "grad_norm": 0.45937458183262003, "learning_rate": 2.812753246843255e-06, "loss": 0.02293653190135956, "step": 53745 }, { "epoch": 0.5058823529411764, "grad_norm": 0.6668829887090318, "learning_rate": 2.8126224156341177e-06, "loss": 0.022180645167827605, "step": 53750 }, { "epoch": 0.5059294117647058, "grad_norm": 0.46897247237675427, "learning_rate": 2.8124916026795635e-06, "loss": 0.013693147897720337, "step": 53755 }, { "epoch": 0.5059764705882352, "grad_norm": 0.5309498372652007, "learning_rate": 2.812360807975349e-06, "loss": 0.01834746599197388, "step": 53760 }, { "epoch": 0.5060235294117648, "grad_norm": 0.7680563857533489, "learning_rate": 2.8122300315172302e-06, "loss": 0.020154917240142824, "step": 53765 }, { "epoch": 0.5060705882352942, "grad_norm": 0.385920487971779, "learning_rate": 2.812099273300965e-06, "loss": 0.014985489845275878, "step": 53770 }, { "epoch": 0.5061176470588236, "grad_norm": 0.4310354176260973, "learning_rate": 2.811968533322313e-06, "loss": 0.021978680789470673, "step": 53775 }, { "epoch": 0.506164705882353, "grad_norm": 0.4757821852590886, "learning_rate": 2.8118378115770352e-06, "loss": 0.018188634514808656, "step": 53780 }, { "epoch": 0.5062117647058824, "grad_norm": 0.7991298206296829, "learning_rate": 2.811707108060894e-06, "loss": 0.02386879026889801, "step": 53785 }, { "epoch": 0.5062588235294118, "grad_norm": 0.5398051649833725, "learning_rate": 2.811576422769653e-06, "loss": 0.01772007644176483, "step": 53790 }, { "epoch": 0.5063058823529412, "grad_norm": 0.5073353196188428, "learning_rate": 2.811445755699077e-06, "loss": 0.015515971183776855, "step": 53795 }, { "epoch": 0.5063529411764706, "grad_norm": 0.5204712644268765, "learning_rate": 2.8113151068449328e-06, "loss": 0.01577509343624115, "step": 53800 }, { "epoch": 0.5064, "grad_norm": 0.3705454353368328, "learning_rate": 2.811184476202988e-06, "loss": 0.01747864782810211, "step": 53805 }, { "epoch": 0.5064470588235294, "grad_norm": 2.170316747967642, "learning_rate": 2.811053863769011e-06, "loss": 0.018099668622016906, "step": 53810 }, { "epoch": 0.5064941176470589, "grad_norm": 0.4718329770501439, "learning_rate": 2.810923269538774e-06, "loss": 0.01668870747089386, "step": 53815 }, { "epoch": 0.5065411764705883, "grad_norm": 0.42784586364680227, "learning_rate": 2.810792693508047e-06, "loss": 0.015417854487895965, "step": 53820 }, { "epoch": 0.5065882352941177, "grad_norm": 0.8410035035690459, "learning_rate": 2.8106621356726047e-06, "loss": 0.021935291588306427, "step": 53825 }, { "epoch": 0.5066352941176471, "grad_norm": 0.5506500612267647, "learning_rate": 2.810531596028221e-06, "loss": 0.017114754021167754, "step": 53830 }, { "epoch": 0.5066823529411765, "grad_norm": 0.5136534686398307, "learning_rate": 2.810401074570672e-06, "loss": 0.016559839248657227, "step": 53835 }, { "epoch": 0.5067294117647059, "grad_norm": 0.6797979200893837, "learning_rate": 2.8102705712957357e-06, "loss": 0.01933189183473587, "step": 53840 }, { "epoch": 0.5067764705882353, "grad_norm": 0.37635677840836335, "learning_rate": 2.81014008619919e-06, "loss": 0.014887471497058869, "step": 53845 }, { "epoch": 0.5068235294117647, "grad_norm": 0.6714361564417343, "learning_rate": 2.8100096192768163e-06, "loss": 0.01908904016017914, "step": 53850 }, { "epoch": 0.5068705882352941, "grad_norm": 0.5628463458592916, "learning_rate": 2.8098791705243943e-06, "loss": 0.018497681617736815, "step": 53855 }, { "epoch": 0.5069176470588236, "grad_norm": 0.31156663198897816, "learning_rate": 2.809748739937708e-06, "loss": 0.014682719111442566, "step": 53860 }, { "epoch": 0.506964705882353, "grad_norm": 0.41988674952463795, "learning_rate": 2.809618327512542e-06, "loss": 0.01888072192668915, "step": 53865 }, { "epoch": 0.5070117647058824, "grad_norm": 0.5098406694813621, "learning_rate": 2.8094879332446813e-06, "loss": 0.014050370454788208, "step": 53870 }, { "epoch": 0.5070588235294118, "grad_norm": 0.49585915591319696, "learning_rate": 2.8093575571299127e-06, "loss": 0.020060317218303682, "step": 53875 }, { "epoch": 0.5071058823529412, "grad_norm": 0.5568503861667383, "learning_rate": 2.8092271991640248e-06, "loss": 0.017330273985862732, "step": 53880 }, { "epoch": 0.5071529411764706, "grad_norm": 0.7034832949957853, "learning_rate": 2.8090968593428074e-06, "loss": 0.01839511692523956, "step": 53885 }, { "epoch": 0.5072, "grad_norm": 0.725516160442968, "learning_rate": 2.8089665376620517e-06, "loss": 0.015833342075347902, "step": 53890 }, { "epoch": 0.5072470588235294, "grad_norm": 0.4897646575097804, "learning_rate": 2.80883623411755e-06, "loss": 0.016097040474414827, "step": 53895 }, { "epoch": 0.5072941176470588, "grad_norm": 0.833130551611577, "learning_rate": 2.8087059487050968e-06, "loss": 0.01742132008075714, "step": 53900 }, { "epoch": 0.5073411764705882, "grad_norm": 0.600871090321867, "learning_rate": 2.808575681420486e-06, "loss": 0.018569856882095337, "step": 53905 }, { "epoch": 0.5073882352941177, "grad_norm": 0.5467222070383713, "learning_rate": 2.8084454322595158e-06, "loss": 0.020775532722473143, "step": 53910 }, { "epoch": 0.5074352941176471, "grad_norm": 0.6356813689387182, "learning_rate": 2.808315201217982e-06, "loss": 0.016026368737220763, "step": 53915 }, { "epoch": 0.5074823529411765, "grad_norm": 0.5607602125368795, "learning_rate": 2.8081849882916858e-06, "loss": 0.02184591442346573, "step": 53920 }, { "epoch": 0.5075294117647059, "grad_norm": 0.549666881582836, "learning_rate": 2.808054793476427e-06, "loss": 0.016852453351020813, "step": 53925 }, { "epoch": 0.5075764705882353, "grad_norm": 0.581806895241212, "learning_rate": 2.8079246167680075e-06, "loss": 0.02034778892993927, "step": 53930 }, { "epoch": 0.5076235294117647, "grad_norm": 0.4485471011869195, "learning_rate": 2.8077944581622314e-06, "loss": 0.015207847952842713, "step": 53935 }, { "epoch": 0.5076705882352941, "grad_norm": 0.40766667632289855, "learning_rate": 2.8076643176549024e-06, "loss": 0.016089898347854615, "step": 53940 }, { "epoch": 0.5077176470588235, "grad_norm": 0.5862291355552061, "learning_rate": 2.8075341952418273e-06, "loss": 0.016830834746360778, "step": 53945 }, { "epoch": 0.5077647058823529, "grad_norm": 0.47053149055290105, "learning_rate": 2.8074040909188133e-06, "loss": 0.014856351912021637, "step": 53950 }, { "epoch": 0.5078117647058824, "grad_norm": 0.7437320874428632, "learning_rate": 2.807274004681669e-06, "loss": 0.027507618069648743, "step": 53955 }, { "epoch": 0.5078588235294118, "grad_norm": 0.5563857328498308, "learning_rate": 2.807143936526206e-06, "loss": 0.017727525532245637, "step": 53960 }, { "epoch": 0.5079058823529412, "grad_norm": 0.34886612226561425, "learning_rate": 2.807013886448234e-06, "loss": 0.014297689497470855, "step": 53965 }, { "epoch": 0.5079529411764706, "grad_norm": 0.525374403046667, "learning_rate": 2.806883854443566e-06, "loss": 0.018243061006069185, "step": 53970 }, { "epoch": 0.508, "grad_norm": 0.5228376762789836, "learning_rate": 2.806753840508018e-06, "loss": 0.014612008631229401, "step": 53975 }, { "epoch": 0.5080470588235294, "grad_norm": 0.7478230665185414, "learning_rate": 2.8066238446374038e-06, "loss": 0.02177558243274689, "step": 53980 }, { "epoch": 0.5080941176470588, "grad_norm": 0.46869455121697323, "learning_rate": 2.8064938668275416e-06, "loss": 0.0187268003821373, "step": 53985 }, { "epoch": 0.5081411764705882, "grad_norm": 0.4400351981131546, "learning_rate": 2.8063639070742488e-06, "loss": 0.024734371900558473, "step": 53990 }, { "epoch": 0.5081882352941176, "grad_norm": 0.49819939638729405, "learning_rate": 2.8062339653733456e-06, "loss": 0.01864568144083023, "step": 53995 }, { "epoch": 0.508235294117647, "grad_norm": 0.9504932386402226, "learning_rate": 2.8061040417206525e-06, "loss": 0.016840991377830506, "step": 54000 }, { "epoch": 0.5082823529411765, "grad_norm": 0.6099419580555618, "learning_rate": 2.8059741361119923e-06, "loss": 0.018809893727302553, "step": 54005 }, { "epoch": 0.5083294117647059, "grad_norm": 0.47003807990186824, "learning_rate": 2.805844248543189e-06, "loss": 0.022028687596321105, "step": 54010 }, { "epoch": 0.5083764705882353, "grad_norm": 0.6228949734330744, "learning_rate": 2.8057143790100677e-06, "loss": 0.023486368358135223, "step": 54015 }, { "epoch": 0.5084235294117647, "grad_norm": 0.41392993278417645, "learning_rate": 2.805584527508454e-06, "loss": 0.01907360553741455, "step": 54020 }, { "epoch": 0.5084705882352941, "grad_norm": 0.30533080190362, "learning_rate": 2.805454694034177e-06, "loss": 0.013531769812107085, "step": 54025 }, { "epoch": 0.5085176470588235, "grad_norm": 0.5515536494173269, "learning_rate": 2.8053248785830643e-06, "loss": 0.01784202307462692, "step": 54030 }, { "epoch": 0.5085647058823529, "grad_norm": 0.7332792674630985, "learning_rate": 2.8051950811509477e-06, "loss": 0.017070603370666505, "step": 54035 }, { "epoch": 0.5086117647058823, "grad_norm": 0.6906760862426461, "learning_rate": 2.8050653017336583e-06, "loss": 0.016350778937339782, "step": 54040 }, { "epoch": 0.5086588235294117, "grad_norm": 0.5254437918193142, "learning_rate": 2.8049355403270295e-06, "loss": 0.023495444655418397, "step": 54045 }, { "epoch": 0.5087058823529412, "grad_norm": 0.5144759742328024, "learning_rate": 2.804805796926896e-06, "loss": 0.019257445633411408, "step": 54050 }, { "epoch": 0.5087529411764706, "grad_norm": 1.116708845571421, "learning_rate": 2.8046760715290933e-06, "loss": 0.019862447679042817, "step": 54055 }, { "epoch": 0.5088, "grad_norm": 0.5005066378177011, "learning_rate": 2.804546364129459e-06, "loss": 0.0208682119846344, "step": 54060 }, { "epoch": 0.5088470588235294, "grad_norm": 0.4803149843048366, "learning_rate": 2.8044166747238315e-06, "loss": 0.01848883628845215, "step": 54065 }, { "epoch": 0.5088941176470588, "grad_norm": 0.6524148198553703, "learning_rate": 2.8042870033080514e-06, "loss": 0.01944739818572998, "step": 54070 }, { "epoch": 0.5089411764705882, "grad_norm": 0.8502141526273904, "learning_rate": 2.8041573498779593e-06, "loss": 0.01973186880350113, "step": 54075 }, { "epoch": 0.5089882352941176, "grad_norm": 0.5945246771348957, "learning_rate": 2.804027714429398e-06, "loss": 0.014863082766532898, "step": 54080 }, { "epoch": 0.509035294117647, "grad_norm": 0.6829066168801182, "learning_rate": 2.8038980969582114e-06, "loss": 0.019752436876297, "step": 54085 }, { "epoch": 0.5090823529411764, "grad_norm": 0.4679878121746301, "learning_rate": 2.803768497460245e-06, "loss": 0.021175062656402587, "step": 54090 }, { "epoch": 0.5091294117647058, "grad_norm": 0.37301853149095887, "learning_rate": 2.803638915931345e-06, "loss": 0.01561153382062912, "step": 54095 }, { "epoch": 0.5091764705882353, "grad_norm": 0.5018795355304281, "learning_rate": 2.8035093523673603e-06, "loss": 0.017325718700885773, "step": 54100 }, { "epoch": 0.5092235294117647, "grad_norm": 0.40572991453600216, "learning_rate": 2.8033798067641393e-06, "loss": 0.014943122863769531, "step": 54105 }, { "epoch": 0.5092705882352941, "grad_norm": 0.5664877703526922, "learning_rate": 2.803250279117533e-06, "loss": 0.016336625814437865, "step": 54110 }, { "epoch": 0.5093176470588235, "grad_norm": 0.6288339115731879, "learning_rate": 2.803120769423394e-06, "loss": 0.019489532709121703, "step": 54115 }, { "epoch": 0.5093647058823529, "grad_norm": 0.6275117191607734, "learning_rate": 2.802991277677575e-06, "loss": 0.020534652471542358, "step": 54120 }, { "epoch": 0.5094117647058823, "grad_norm": 0.47362315585694864, "learning_rate": 2.802861803875931e-06, "loss": 0.014274793863296508, "step": 54125 }, { "epoch": 0.5094588235294117, "grad_norm": 0.5203386985532119, "learning_rate": 2.8027323480143177e-06, "loss": 0.019485250115394592, "step": 54130 }, { "epoch": 0.5095058823529411, "grad_norm": 1.0733605099291657, "learning_rate": 2.802602910088593e-06, "loss": 0.017065495252609253, "step": 54135 }, { "epoch": 0.5095529411764705, "grad_norm": 0.4588670906996008, "learning_rate": 2.8024734900946153e-06, "loss": 0.020077842473983764, "step": 54140 }, { "epoch": 0.5096, "grad_norm": 0.4921571916391907, "learning_rate": 2.802344088028245e-06, "loss": 0.02053496241569519, "step": 54145 }, { "epoch": 0.5096470588235295, "grad_norm": 0.8836462734361348, "learning_rate": 2.8022147038853435e-06, "loss": 0.018833082914352418, "step": 54150 }, { "epoch": 0.5096941176470589, "grad_norm": 0.6646876692063931, "learning_rate": 2.8020853376617736e-06, "loss": 0.025281798839569092, "step": 54155 }, { "epoch": 0.5097411764705883, "grad_norm": 0.38090294547704057, "learning_rate": 2.801955989353398e-06, "loss": 0.016125795245170594, "step": 54160 }, { "epoch": 0.5097882352941177, "grad_norm": 0.4208767527659298, "learning_rate": 2.8018266589560846e-06, "loss": 0.014511632919311523, "step": 54165 }, { "epoch": 0.509835294117647, "grad_norm": 0.6594969040428725, "learning_rate": 2.8016973464656984e-06, "loss": 0.016218632459640503, "step": 54170 }, { "epoch": 0.5098823529411765, "grad_norm": 0.5482647887592025, "learning_rate": 2.8015680518781074e-06, "loss": 0.01986956000328064, "step": 54175 }, { "epoch": 0.5099294117647059, "grad_norm": 0.4689794112793018, "learning_rate": 2.8014387751891824e-06, "loss": 0.016113331913948058, "step": 54180 }, { "epoch": 0.5099764705882353, "grad_norm": 0.6421299226327091, "learning_rate": 2.8013095163947927e-06, "loss": 0.01885565519332886, "step": 54185 }, { "epoch": 0.5100235294117647, "grad_norm": 0.3730614249174131, "learning_rate": 2.8011802754908116e-06, "loss": 0.016572719812393187, "step": 54190 }, { "epoch": 0.5100705882352942, "grad_norm": 0.34644002294761195, "learning_rate": 2.8010510524731116e-06, "loss": 0.013177531957626342, "step": 54195 }, { "epoch": 0.5101176470588236, "grad_norm": 0.35403597928697467, "learning_rate": 2.8009218473375684e-06, "loss": 0.01601347029209137, "step": 54200 }, { "epoch": 0.510164705882353, "grad_norm": 0.5265954693119391, "learning_rate": 2.8007926600800573e-06, "loss": 0.014658266305923462, "step": 54205 }, { "epoch": 0.5102117647058824, "grad_norm": 0.48755649669448287, "learning_rate": 2.800663490696455e-06, "loss": 0.018937113881111144, "step": 54210 }, { "epoch": 0.5102588235294118, "grad_norm": 0.6077480144373649, "learning_rate": 2.800534339182642e-06, "loss": 0.02056969404220581, "step": 54215 }, { "epoch": 0.5103058823529412, "grad_norm": 1.9799980480373143, "learning_rate": 2.800405205534498e-06, "loss": 0.01702505499124527, "step": 54220 }, { "epoch": 0.5103529411764706, "grad_norm": 0.6504208083367793, "learning_rate": 2.8002760897479035e-06, "loss": 0.024861161410808564, "step": 54225 }, { "epoch": 0.5104, "grad_norm": 0.3807976571215967, "learning_rate": 2.8001469918187425e-06, "loss": 0.016790664196014403, "step": 54230 }, { "epoch": 0.5104470588235294, "grad_norm": 0.7485670345765568, "learning_rate": 2.8000179117428976e-06, "loss": 0.019525419175624847, "step": 54235 }, { "epoch": 0.5104941176470589, "grad_norm": 0.40468795375060995, "learning_rate": 2.7998888495162553e-06, "loss": 0.015781626105308533, "step": 54240 }, { "epoch": 0.5105411764705883, "grad_norm": 0.4751141721492711, "learning_rate": 2.799759805134702e-06, "loss": 0.019308218359947206, "step": 54245 }, { "epoch": 0.5105882352941177, "grad_norm": 0.6131329207780243, "learning_rate": 2.7996307785941257e-06, "loss": 0.01930299699306488, "step": 54250 }, { "epoch": 0.5106352941176471, "grad_norm": 0.3926035945670413, "learning_rate": 2.7995017698904154e-06, "loss": 0.018890081346035002, "step": 54255 }, { "epoch": 0.5106823529411765, "grad_norm": 0.541321620600299, "learning_rate": 2.799372779019463e-06, "loss": 0.0215247243642807, "step": 54260 }, { "epoch": 0.5107294117647059, "grad_norm": 0.588315743510357, "learning_rate": 2.7992438059771594e-06, "loss": 0.01703965961933136, "step": 54265 }, { "epoch": 0.5107764705882353, "grad_norm": 0.5224731180720548, "learning_rate": 2.799114850759398e-06, "loss": 0.01835084557533264, "step": 54270 }, { "epoch": 0.5108235294117647, "grad_norm": 0.5998547488935871, "learning_rate": 2.798985913362074e-06, "loss": 0.020199695229530336, "step": 54275 }, { "epoch": 0.5108705882352941, "grad_norm": 0.5326884205900109, "learning_rate": 2.798856993781083e-06, "loss": 0.017830446362495422, "step": 54280 }, { "epoch": 0.5109176470588235, "grad_norm": 0.43731392689299264, "learning_rate": 2.7987280920123233e-06, "loss": 0.023381848633289338, "step": 54285 }, { "epoch": 0.510964705882353, "grad_norm": 0.5710705135220698, "learning_rate": 2.798599208051692e-06, "loss": 0.015428195893764495, "step": 54290 }, { "epoch": 0.5110117647058824, "grad_norm": 0.5090353421411806, "learning_rate": 2.7984703418950905e-06, "loss": 0.0177694171667099, "step": 54295 }, { "epoch": 0.5110588235294118, "grad_norm": 0.5871123864296942, "learning_rate": 2.798341493538419e-06, "loss": 0.022394433617591858, "step": 54300 }, { "epoch": 0.5111058823529412, "grad_norm": 0.46188222831801634, "learning_rate": 2.798212662977581e-06, "loss": 0.016901424527168273, "step": 54305 }, { "epoch": 0.5111529411764706, "grad_norm": 0.3907975041399448, "learning_rate": 2.7980838502084802e-06, "loss": 0.014594870805740356, "step": 54310 }, { "epoch": 0.5112, "grad_norm": 0.4532382930826833, "learning_rate": 2.797955055227021e-06, "loss": 0.015578846633434295, "step": 54315 }, { "epoch": 0.5112470588235294, "grad_norm": 0.5207570323700772, "learning_rate": 2.7978262780291116e-06, "loss": 0.023811420798301695, "step": 54320 }, { "epoch": 0.5112941176470588, "grad_norm": 0.38053606483634433, "learning_rate": 2.7976975186106587e-06, "loss": 0.014945319294929505, "step": 54325 }, { "epoch": 0.5113411764705882, "grad_norm": 0.6618433568030448, "learning_rate": 2.7975687769675707e-06, "loss": 0.01930379569530487, "step": 54330 }, { "epoch": 0.5113882352941177, "grad_norm": 0.409405806026615, "learning_rate": 2.79744005309576e-06, "loss": 0.015995346009731293, "step": 54335 }, { "epoch": 0.5114352941176471, "grad_norm": 0.5663040005135234, "learning_rate": 2.797311346991138e-06, "loss": 0.01983673870563507, "step": 54340 }, { "epoch": 0.5114823529411765, "grad_norm": 0.395306484555664, "learning_rate": 2.7971826586496176e-06, "loss": 0.017732524871826173, "step": 54345 }, { "epoch": 0.5115294117647059, "grad_norm": 0.39933783661950345, "learning_rate": 2.797053988067112e-06, "loss": 0.015974313020706177, "step": 54350 }, { "epoch": 0.5115764705882353, "grad_norm": 0.5289361157247569, "learning_rate": 2.7969253352395396e-06, "loss": 0.014867833256721497, "step": 54355 }, { "epoch": 0.5116235294117647, "grad_norm": 0.684346761306375, "learning_rate": 2.7967967001628153e-06, "loss": 0.022195249795913696, "step": 54360 }, { "epoch": 0.5116705882352941, "grad_norm": 0.5601271828704415, "learning_rate": 2.7966680828328586e-06, "loss": 0.02285194993019104, "step": 54365 }, { "epoch": 0.5117176470588235, "grad_norm": 0.4163712238782919, "learning_rate": 2.7965394832455887e-06, "loss": 0.014729762077331543, "step": 54370 }, { "epoch": 0.5117647058823529, "grad_norm": 0.5827343195049852, "learning_rate": 2.7964109013969278e-06, "loss": 0.021440139412879942, "step": 54375 }, { "epoch": 0.5118117647058823, "grad_norm": 0.49219489884019785, "learning_rate": 2.7962823372827968e-06, "loss": 0.014976586401462554, "step": 54380 }, { "epoch": 0.5118588235294118, "grad_norm": 0.5326087102710624, "learning_rate": 2.79615379089912e-06, "loss": 0.018061424791812896, "step": 54385 }, { "epoch": 0.5119058823529412, "grad_norm": 0.7054709945947211, "learning_rate": 2.7960252622418223e-06, "loss": 0.019107726216316224, "step": 54390 }, { "epoch": 0.5119529411764706, "grad_norm": 0.45274300310252075, "learning_rate": 2.7958967513068304e-06, "loss": 0.014658504724502563, "step": 54395 }, { "epoch": 0.512, "grad_norm": 0.5144021345768903, "learning_rate": 2.7957682580900708e-06, "loss": 0.015921710431575774, "step": 54400 }, { "epoch": 0.5120470588235294, "grad_norm": 0.7453127802051587, "learning_rate": 2.7956397825874737e-06, "loss": 0.01949254870414734, "step": 54405 }, { "epoch": 0.5120941176470588, "grad_norm": 0.6300000506111797, "learning_rate": 2.795511324794969e-06, "loss": 0.018010638654232025, "step": 54410 }, { "epoch": 0.5121411764705882, "grad_norm": 0.6900658147497019, "learning_rate": 2.7953828847084873e-06, "loss": 0.018434908986091614, "step": 54415 }, { "epoch": 0.5121882352941176, "grad_norm": 0.6470359207665921, "learning_rate": 2.795254462323963e-06, "loss": 0.017710892856121062, "step": 54420 }, { "epoch": 0.512235294117647, "grad_norm": 0.5394961174692917, "learning_rate": 2.7951260576373297e-06, "loss": 0.021108019351959228, "step": 54425 }, { "epoch": 0.5122823529411765, "grad_norm": 0.49977664177006553, "learning_rate": 2.794997670644522e-06, "loss": 0.022357966005802154, "step": 54430 }, { "epoch": 0.5123294117647059, "grad_norm": 0.7344504015441958, "learning_rate": 2.7948693013414776e-06, "loss": 0.019382403790950777, "step": 54435 }, { "epoch": 0.5123764705882353, "grad_norm": 0.9066104137787175, "learning_rate": 2.794740949724134e-06, "loss": 0.016764797270298004, "step": 54440 }, { "epoch": 0.5124235294117647, "grad_norm": 0.4770748681170068, "learning_rate": 2.7946126157884306e-06, "loss": 0.018862621486186983, "step": 54445 }, { "epoch": 0.5124705882352941, "grad_norm": 0.6246647792198219, "learning_rate": 2.794484299530309e-06, "loss": 0.020169413089752196, "step": 54450 }, { "epoch": 0.5125176470588235, "grad_norm": 0.4602211316701822, "learning_rate": 2.7943560009457105e-06, "loss": 0.017293483018875122, "step": 54455 }, { "epoch": 0.5125647058823529, "grad_norm": 0.7224676815325417, "learning_rate": 2.7942277200305784e-06, "loss": 0.020820152759552003, "step": 54460 }, { "epoch": 0.5126117647058823, "grad_norm": 0.30411066212214727, "learning_rate": 2.794099456780857e-06, "loss": 0.012746736407279968, "step": 54465 }, { "epoch": 0.5126588235294117, "grad_norm": 0.4173347196192839, "learning_rate": 2.793971211192492e-06, "loss": 0.01896085441112518, "step": 54470 }, { "epoch": 0.5127058823529411, "grad_norm": 0.494370603197173, "learning_rate": 2.793842983261433e-06, "loss": 0.023312526941299438, "step": 54475 }, { "epoch": 0.5127529411764706, "grad_norm": 0.3612086882258985, "learning_rate": 2.7937147729836245e-06, "loss": 0.017275068163871764, "step": 54480 }, { "epoch": 0.5128, "grad_norm": 0.6259687312373468, "learning_rate": 2.79358658035502e-06, "loss": 0.02000814974308014, "step": 54485 }, { "epoch": 0.5128470588235294, "grad_norm": 0.4144858523519561, "learning_rate": 2.793458405371568e-06, "loss": 0.01747497618198395, "step": 54490 }, { "epoch": 0.5128941176470588, "grad_norm": 0.4985909178628307, "learning_rate": 2.7933302480292223e-06, "loss": 0.014340990781784057, "step": 54495 }, { "epoch": 0.5129411764705882, "grad_norm": 0.6072942145712804, "learning_rate": 2.7932021083239365e-06, "loss": 0.016115009784698486, "step": 54500 }, { "epoch": 0.5129882352941176, "grad_norm": 0.47858969139944607, "learning_rate": 2.793073986251665e-06, "loss": 0.01833841800689697, "step": 54505 }, { "epoch": 0.513035294117647, "grad_norm": 0.40037595169532897, "learning_rate": 2.792945881808365e-06, "loss": 0.016307774186134338, "step": 54510 }, { "epoch": 0.5130823529411764, "grad_norm": 0.6988184428627877, "learning_rate": 2.792817794989993e-06, "loss": 0.017447060346603392, "step": 54515 }, { "epoch": 0.5131294117647058, "grad_norm": 0.6742853948938978, "learning_rate": 2.792689725792509e-06, "loss": 0.01987016201019287, "step": 54520 }, { "epoch": 0.5131764705882353, "grad_norm": 0.559535149789318, "learning_rate": 2.792561674211873e-06, "loss": 0.01820300370454788, "step": 54525 }, { "epoch": 0.5132235294117647, "grad_norm": 0.6242250968767681, "learning_rate": 2.792433640244045e-06, "loss": 0.016931340098381042, "step": 54530 }, { "epoch": 0.5132705882352941, "grad_norm": 0.4675283138056907, "learning_rate": 2.79230562388499e-06, "loss": 0.0256949245929718, "step": 54535 }, { "epoch": 0.5133176470588235, "grad_norm": 0.4150354908373168, "learning_rate": 2.7921776251306703e-06, "loss": 0.01822122037410736, "step": 54540 }, { "epoch": 0.513364705882353, "grad_norm": 0.7591615724758922, "learning_rate": 2.7920496439770524e-06, "loss": 0.020848071575164794, "step": 54545 }, { "epoch": 0.5134117647058823, "grad_norm": 0.6040951113357206, "learning_rate": 2.7919216804201027e-06, "loss": 0.0185492604970932, "step": 54550 }, { "epoch": 0.5134588235294117, "grad_norm": 0.5199756719752355, "learning_rate": 2.791793734455789e-06, "loss": 0.020829617977142334, "step": 54555 }, { "epoch": 0.5135058823529411, "grad_norm": 0.471683554559666, "learning_rate": 2.791665806080081e-06, "loss": 0.01699816882610321, "step": 54560 }, { "epoch": 0.5135529411764705, "grad_norm": 0.48450499290139754, "learning_rate": 2.791537895288948e-06, "loss": 0.015370464324951172, "step": 54565 }, { "epoch": 0.5136, "grad_norm": 0.4346927376040431, "learning_rate": 2.7914100020783634e-06, "loss": 0.013607659935951233, "step": 54570 }, { "epoch": 0.5136470588235295, "grad_norm": 0.8407927901483102, "learning_rate": 2.7912821264442997e-06, "loss": 0.01947081983089447, "step": 54575 }, { "epoch": 0.5136941176470589, "grad_norm": 0.5109331846227622, "learning_rate": 2.7911542683827316e-06, "loss": 0.01844581663608551, "step": 54580 }, { "epoch": 0.5137411764705883, "grad_norm": 0.5869447921065347, "learning_rate": 2.791026427889634e-06, "loss": 0.01719629466533661, "step": 54585 }, { "epoch": 0.5137882352941177, "grad_norm": 0.3986181172057272, "learning_rate": 2.790898604960984e-06, "loss": 0.012261311709880828, "step": 54590 }, { "epoch": 0.5138352941176471, "grad_norm": 0.49612943959953515, "learning_rate": 2.7907707995927614e-06, "loss": 0.01805433928966522, "step": 54595 }, { "epoch": 0.5138823529411765, "grad_norm": 0.5955638494551054, "learning_rate": 2.790643011780944e-06, "loss": 0.016924285888671876, "step": 54600 }, { "epoch": 0.5139294117647059, "grad_norm": 0.3679225829183224, "learning_rate": 2.7905152415215134e-06, "loss": 0.01972736567258835, "step": 54605 }, { "epoch": 0.5139764705882353, "grad_norm": 0.45410383714527025, "learning_rate": 2.7903874888104525e-06, "loss": 0.019434754550457, "step": 54610 }, { "epoch": 0.5140235294117647, "grad_norm": 0.7105001286953844, "learning_rate": 2.7902597536437436e-06, "loss": 0.017506003379821777, "step": 54615 }, { "epoch": 0.5140705882352942, "grad_norm": 0.42713402828253605, "learning_rate": 2.7901320360173717e-06, "loss": 0.017786473035812378, "step": 54620 }, { "epoch": 0.5141176470588236, "grad_norm": 0.5429389920733597, "learning_rate": 2.7900043359273233e-06, "loss": 0.019341260194778442, "step": 54625 }, { "epoch": 0.514164705882353, "grad_norm": 0.6247514618052817, "learning_rate": 2.7898766533695855e-06, "loss": 0.018229159712791442, "step": 54630 }, { "epoch": 0.5142117647058824, "grad_norm": 0.4724949654878035, "learning_rate": 2.7897489883401464e-06, "loss": 0.016497525572776794, "step": 54635 }, { "epoch": 0.5142588235294118, "grad_norm": 0.40463966924449263, "learning_rate": 2.7896213408349965e-06, "loss": 0.014319440722465515, "step": 54640 }, { "epoch": 0.5143058823529412, "grad_norm": 0.5601601424630418, "learning_rate": 2.7894937108501267e-06, "loss": 0.01627560257911682, "step": 54645 }, { "epoch": 0.5143529411764706, "grad_norm": 0.5862981773833249, "learning_rate": 2.7893660983815303e-06, "loss": 0.015834689140319824, "step": 54650 }, { "epoch": 0.5144, "grad_norm": 0.4972619801462882, "learning_rate": 2.789238503425199e-06, "loss": 0.01360945999622345, "step": 54655 }, { "epoch": 0.5144470588235294, "grad_norm": 0.43616894675147866, "learning_rate": 2.78911092597713e-06, "loss": 0.014311464130878448, "step": 54660 }, { "epoch": 0.5144941176470588, "grad_norm": 0.3970596055975035, "learning_rate": 2.7889833660333184e-06, "loss": 0.018135949969291687, "step": 54665 }, { "epoch": 0.5145411764705883, "grad_norm": 0.38514711817315966, "learning_rate": 2.7888558235897616e-06, "loss": 0.016675008833408354, "step": 54670 }, { "epoch": 0.5145882352941177, "grad_norm": 0.7230724773628232, "learning_rate": 2.7887282986424597e-06, "loss": 0.02003701627254486, "step": 54675 }, { "epoch": 0.5146352941176471, "grad_norm": 0.5131576047610668, "learning_rate": 2.788600791187412e-06, "loss": 0.018231084942817687, "step": 54680 }, { "epoch": 0.5146823529411765, "grad_norm": 0.5061119957428836, "learning_rate": 2.7884733012206196e-06, "loss": 0.017937180399894715, "step": 54685 }, { "epoch": 0.5147294117647059, "grad_norm": 0.6754219949913043, "learning_rate": 2.7883458287380853e-06, "loss": 0.01869749426841736, "step": 54690 }, { "epoch": 0.5147764705882353, "grad_norm": 0.4904329452964269, "learning_rate": 2.7882183737358136e-06, "loss": 0.01626814603805542, "step": 54695 }, { "epoch": 0.5148235294117647, "grad_norm": 0.7027761548234417, "learning_rate": 2.7880909362098103e-06, "loss": 0.019984647631645203, "step": 54700 }, { "epoch": 0.5148705882352941, "grad_norm": 0.6411650260548232, "learning_rate": 2.7879635161560803e-06, "loss": 0.023639196157455446, "step": 54705 }, { "epoch": 0.5149176470588235, "grad_norm": 0.4569066748034802, "learning_rate": 2.7878361135706324e-06, "loss": 0.01300450712442398, "step": 54710 }, { "epoch": 0.514964705882353, "grad_norm": 0.3726126835676448, "learning_rate": 2.787708728449476e-06, "loss": 0.015836437046527863, "step": 54715 }, { "epoch": 0.5150117647058824, "grad_norm": 0.40775008053974393, "learning_rate": 2.787581360788621e-06, "loss": 0.01895642876625061, "step": 54720 }, { "epoch": 0.5150588235294118, "grad_norm": 0.5685675690932035, "learning_rate": 2.787454010584078e-06, "loss": 0.01586957573890686, "step": 54725 }, { "epoch": 0.5151058823529412, "grad_norm": 0.5529866400368428, "learning_rate": 2.7873266778318627e-06, "loss": 0.02078450918197632, "step": 54730 }, { "epoch": 0.5151529411764706, "grad_norm": 0.49570402636787725, "learning_rate": 2.7871993625279863e-06, "loss": 0.017760279774665832, "step": 54735 }, { "epoch": 0.5152, "grad_norm": 0.5104608566938441, "learning_rate": 2.7870720646684664e-06, "loss": 0.01622791588306427, "step": 54740 }, { "epoch": 0.5152470588235294, "grad_norm": 0.6650322247084962, "learning_rate": 2.7869447842493184e-06, "loss": 0.01824805736541748, "step": 54745 }, { "epoch": 0.5152941176470588, "grad_norm": 0.5915001724652897, "learning_rate": 2.786817521266561e-06, "loss": 0.018059271574020385, "step": 54750 }, { "epoch": 0.5153411764705882, "grad_norm": 0.4825331541570341, "learning_rate": 2.7866902757162133e-06, "loss": 0.017513029277324677, "step": 54755 }, { "epoch": 0.5153882352941177, "grad_norm": 0.553789068283092, "learning_rate": 2.786563047594296e-06, "loss": 0.016545331478118895, "step": 54760 }, { "epoch": 0.5154352941176471, "grad_norm": 0.48413843848010396, "learning_rate": 2.786435836896831e-06, "loss": 0.015344415605068207, "step": 54765 }, { "epoch": 0.5154823529411765, "grad_norm": 0.30288340073497394, "learning_rate": 2.7863086436198405e-06, "loss": 0.017538774013519286, "step": 54770 }, { "epoch": 0.5155294117647059, "grad_norm": 0.42685410885578856, "learning_rate": 2.78618146775935e-06, "loss": 0.014237189292907714, "step": 54775 }, { "epoch": 0.5155764705882353, "grad_norm": 0.7749645664101451, "learning_rate": 2.786054309311385e-06, "loss": 0.018000562489032746, "step": 54780 }, { "epoch": 0.5156235294117647, "grad_norm": 0.46179429781279274, "learning_rate": 2.785927168271972e-06, "loss": 0.018219727277755737, "step": 54785 }, { "epoch": 0.5156705882352941, "grad_norm": 0.47816095734230396, "learning_rate": 2.7858000446371397e-06, "loss": 0.01770974099636078, "step": 54790 }, { "epoch": 0.5157176470588235, "grad_norm": 0.5069915154411306, "learning_rate": 2.785672938402917e-06, "loss": 0.016262033581733705, "step": 54795 }, { "epoch": 0.5157647058823529, "grad_norm": 0.4825367691233984, "learning_rate": 2.7855458495653335e-06, "loss": 0.019956314563751222, "step": 54800 }, { "epoch": 0.5158117647058823, "grad_norm": 0.5105426010291164, "learning_rate": 2.785418778120424e-06, "loss": 0.021657148003578187, "step": 54805 }, { "epoch": 0.5158588235294118, "grad_norm": 0.5327825510160861, "learning_rate": 2.78529172406422e-06, "loss": 0.02889035940170288, "step": 54810 }, { "epoch": 0.5159058823529412, "grad_norm": 0.5714135351212178, "learning_rate": 2.7851646873927564e-06, "loss": 0.018090739846229553, "step": 54815 }, { "epoch": 0.5159529411764706, "grad_norm": 0.3518341127645309, "learning_rate": 2.7850376681020686e-06, "loss": 0.01832612156867981, "step": 54820 }, { "epoch": 0.516, "grad_norm": 0.3683360983543801, "learning_rate": 2.7849106661881935e-06, "loss": 0.01606646329164505, "step": 54825 }, { "epoch": 0.5160470588235294, "grad_norm": 0.43578175445267814, "learning_rate": 2.78478368164717e-06, "loss": 0.015616491436958313, "step": 54830 }, { "epoch": 0.5160941176470588, "grad_norm": 0.5749286723527615, "learning_rate": 2.784656714475038e-06, "loss": 0.018422821164131166, "step": 54835 }, { "epoch": 0.5161411764705882, "grad_norm": 0.6697907279990934, "learning_rate": 2.7845297646678378e-06, "loss": 0.016389399766921997, "step": 54840 }, { "epoch": 0.5161882352941176, "grad_norm": 0.4529847979509379, "learning_rate": 2.784402832221611e-06, "loss": 0.016390183568000795, "step": 54845 }, { "epoch": 0.516235294117647, "grad_norm": 0.3858676959249936, "learning_rate": 2.784275917132402e-06, "loss": 0.017167966067790984, "step": 54850 }, { "epoch": 0.5162823529411765, "grad_norm": 0.7724415658277966, "learning_rate": 2.784149019396255e-06, "loss": 0.021202054619789124, "step": 54855 }, { "epoch": 0.5163294117647059, "grad_norm": 0.5120255150627498, "learning_rate": 2.784022139009215e-06, "loss": 0.01702738702297211, "step": 54860 }, { "epoch": 0.5163764705882353, "grad_norm": 0.6821647480543764, "learning_rate": 2.7838952759673316e-06, "loss": 0.021838270127773285, "step": 54865 }, { "epoch": 0.5164235294117647, "grad_norm": 0.509416914551378, "learning_rate": 2.78376843026665e-06, "loss": 0.016312819719314576, "step": 54870 }, { "epoch": 0.5164705882352941, "grad_norm": 0.4622119409426743, "learning_rate": 2.783641601903223e-06, "loss": 0.016489756107330323, "step": 54875 }, { "epoch": 0.5165176470588235, "grad_norm": 0.3513321979330946, "learning_rate": 2.7835147908730993e-06, "loss": 0.014898106455802917, "step": 54880 }, { "epoch": 0.5165647058823529, "grad_norm": 0.523725303520962, "learning_rate": 2.7833879971723317e-06, "loss": 0.01954033672809601, "step": 54885 }, { "epoch": 0.5166117647058823, "grad_norm": 0.4444965830896296, "learning_rate": 2.7832612207969738e-06, "loss": 0.018438050150871278, "step": 54890 }, { "epoch": 0.5166588235294117, "grad_norm": 0.6419074501916038, "learning_rate": 2.7831344617430805e-06, "loss": 0.01728884279727936, "step": 54895 }, { "epoch": 0.5167058823529411, "grad_norm": 0.48766196829259095, "learning_rate": 2.783007720006708e-06, "loss": 0.014870288968086242, "step": 54900 }, { "epoch": 0.5167529411764706, "grad_norm": 0.48054424675749746, "learning_rate": 2.7828809955839126e-06, "loss": 0.0222455695271492, "step": 54905 }, { "epoch": 0.5168, "grad_norm": 0.5571563813262832, "learning_rate": 2.782754288470754e-06, "loss": 0.016149576008319854, "step": 54910 }, { "epoch": 0.5168470588235294, "grad_norm": 0.4667331628122477, "learning_rate": 2.78262759866329e-06, "loss": 0.015717729926109314, "step": 54915 }, { "epoch": 0.5168941176470588, "grad_norm": 0.45780376883292057, "learning_rate": 2.7825009261575835e-06, "loss": 0.015534104406833648, "step": 54920 }, { "epoch": 0.5169411764705882, "grad_norm": 0.2797133651941961, "learning_rate": 2.7823742709496966e-06, "loss": 0.018463028967380522, "step": 54925 }, { "epoch": 0.5169882352941176, "grad_norm": 0.34652018982955834, "learning_rate": 2.782247633035691e-06, "loss": 0.015714484453201293, "step": 54930 }, { "epoch": 0.517035294117647, "grad_norm": 0.2920994419302549, "learning_rate": 2.782121012411634e-06, "loss": 0.012910506129264832, "step": 54935 }, { "epoch": 0.5170823529411764, "grad_norm": 0.5682211676060448, "learning_rate": 2.78199440907359e-06, "loss": 0.014202132821083069, "step": 54940 }, { "epoch": 0.5171294117647058, "grad_norm": 0.4257352645184764, "learning_rate": 2.7818678230176266e-06, "loss": 0.01997656226158142, "step": 54945 }, { "epoch": 0.5171764705882353, "grad_norm": 0.5304326505000937, "learning_rate": 2.781741254239812e-06, "loss": 0.02259426414966583, "step": 54950 }, { "epoch": 0.5172235294117647, "grad_norm": 1.185601405120585, "learning_rate": 2.781614702736217e-06, "loss": 0.019073516130447388, "step": 54955 }, { "epoch": 0.5172705882352941, "grad_norm": 0.73914894513846, "learning_rate": 2.7814881685029117e-06, "loss": 0.02137114107608795, "step": 54960 }, { "epoch": 0.5173176470588235, "grad_norm": 0.5385172603317585, "learning_rate": 2.7813616515359682e-06, "loss": 0.012973128259181977, "step": 54965 }, { "epoch": 0.517364705882353, "grad_norm": 0.30299240538825767, "learning_rate": 2.7812351518314605e-06, "loss": 0.017073848843574525, "step": 54970 }, { "epoch": 0.5174117647058823, "grad_norm": 0.3647105396487827, "learning_rate": 2.7811086693854643e-06, "loss": 0.016711713373661043, "step": 54975 }, { "epoch": 0.5174588235294117, "grad_norm": 0.501148481877612, "learning_rate": 2.780982204194054e-06, "loss": 0.01883351355791092, "step": 54980 }, { "epoch": 0.5175058823529411, "grad_norm": 0.5185433676871203, "learning_rate": 2.7808557562533073e-06, "loss": 0.019670003652572633, "step": 54985 }, { "epoch": 0.5175529411764705, "grad_norm": 0.5011372294713717, "learning_rate": 2.7807293255593034e-06, "loss": 0.019069108366966247, "step": 54990 }, { "epoch": 0.5176, "grad_norm": 1.1659471966601926, "learning_rate": 2.7806029121081213e-06, "loss": 0.021193115413188933, "step": 54995 }, { "epoch": 0.5176470588235295, "grad_norm": 0.6895024507391658, "learning_rate": 2.780476515895842e-06, "loss": 0.017859697341918945, "step": 55000 }, { "epoch": 0.5176941176470589, "grad_norm": 0.9565758164007616, "learning_rate": 2.780350136918549e-06, "loss": 0.025508388876914978, "step": 55005 }, { "epoch": 0.5177411764705883, "grad_norm": 0.3953500211234776, "learning_rate": 2.780223775172324e-06, "loss": 0.01893799751996994, "step": 55010 }, { "epoch": 0.5177882352941177, "grad_norm": 0.5522253239594764, "learning_rate": 2.7800974306532535e-06, "loss": 0.016280950605869295, "step": 55015 }, { "epoch": 0.5178352941176471, "grad_norm": 0.6267908450509342, "learning_rate": 2.7799711033574216e-06, "loss": 0.015941382944583894, "step": 55020 }, { "epoch": 0.5178823529411765, "grad_norm": 0.5218665441187319, "learning_rate": 2.779844793280918e-06, "loss": 0.020718052983283997, "step": 55025 }, { "epoch": 0.5179294117647059, "grad_norm": 0.4720587744105305, "learning_rate": 2.7797185004198285e-06, "loss": 0.017196112871170045, "step": 55030 }, { "epoch": 0.5179764705882353, "grad_norm": 0.5962500240491004, "learning_rate": 2.7795922247702445e-06, "loss": 0.019433918595314025, "step": 55035 }, { "epoch": 0.5180235294117647, "grad_norm": 0.6435618723269112, "learning_rate": 2.779465966328257e-06, "loss": 0.017887087166309358, "step": 55040 }, { "epoch": 0.5180705882352942, "grad_norm": 1.0878721776714988, "learning_rate": 2.7793397250899572e-06, "loss": 0.021182502806186675, "step": 55045 }, { "epoch": 0.5181176470588236, "grad_norm": 0.3924819728334896, "learning_rate": 2.7792135010514405e-06, "loss": 0.012983751296997071, "step": 55050 }, { "epoch": 0.518164705882353, "grad_norm": 0.6833017630910613, "learning_rate": 2.7790872942087988e-06, "loss": 0.019181197881698607, "step": 55055 }, { "epoch": 0.5182117647058824, "grad_norm": 0.6131136558279469, "learning_rate": 2.77896110455813e-06, "loss": 0.021253117918968202, "step": 55060 }, { "epoch": 0.5182588235294118, "grad_norm": 0.38645081453334545, "learning_rate": 2.778834932095531e-06, "loss": 0.022765535116195678, "step": 55065 }, { "epoch": 0.5183058823529412, "grad_norm": 0.42083527580378916, "learning_rate": 2.7787087768171007e-06, "loss": 0.02194490283727646, "step": 55070 }, { "epoch": 0.5183529411764706, "grad_norm": 0.34746474061288496, "learning_rate": 2.778582638718937e-06, "loss": 0.01684058755636215, "step": 55075 }, { "epoch": 0.5184, "grad_norm": 0.5115809572401732, "learning_rate": 2.778456517797142e-06, "loss": 0.020049016177654266, "step": 55080 }, { "epoch": 0.5184470588235294, "grad_norm": 0.6395891864939536, "learning_rate": 2.7783304140478185e-06, "loss": 0.01439371258020401, "step": 55085 }, { "epoch": 0.5184941176470588, "grad_norm": 0.5010367872092593, "learning_rate": 2.778204327467069e-06, "loss": 0.019785524904727937, "step": 55090 }, { "epoch": 0.5185411764705883, "grad_norm": 0.5145574427750008, "learning_rate": 2.778078258050998e-06, "loss": 0.017634913325309753, "step": 55095 }, { "epoch": 0.5185882352941177, "grad_norm": 0.2779149615920998, "learning_rate": 2.7779522057957113e-06, "loss": 0.017977210879325866, "step": 55100 }, { "epoch": 0.5186352941176471, "grad_norm": 0.3306221180257835, "learning_rate": 2.777826170697317e-06, "loss": 0.015553119778633117, "step": 55105 }, { "epoch": 0.5186823529411765, "grad_norm": 0.600428873232913, "learning_rate": 2.777700152751923e-06, "loss": 0.0161419153213501, "step": 55110 }, { "epoch": 0.5187294117647059, "grad_norm": 0.5011664183648419, "learning_rate": 2.7775741519556372e-06, "loss": 0.0170566588640213, "step": 55115 }, { "epoch": 0.5187764705882353, "grad_norm": 0.63266171321843, "learning_rate": 2.777448168304573e-06, "loss": 0.022697652876377105, "step": 55120 }, { "epoch": 0.5188235294117647, "grad_norm": 0.4452624728723294, "learning_rate": 2.777322201794841e-06, "loss": 0.01795312911272049, "step": 55125 }, { "epoch": 0.5188705882352941, "grad_norm": 0.37436370214848014, "learning_rate": 2.7771962524225545e-06, "loss": 0.016763004660606384, "step": 55130 }, { "epoch": 0.5189176470588235, "grad_norm": 0.6798320557452675, "learning_rate": 2.7770703201838286e-06, "loss": 0.017535687983036043, "step": 55135 }, { "epoch": 0.518964705882353, "grad_norm": 0.5557893539971825, "learning_rate": 2.7769444050747783e-06, "loss": 0.017397007346153258, "step": 55140 }, { "epoch": 0.5190117647058824, "grad_norm": 0.4215600306933302, "learning_rate": 2.776818507091521e-06, "loss": 0.01817578971385956, "step": 55145 }, { "epoch": 0.5190588235294118, "grad_norm": 0.592055860650183, "learning_rate": 2.776692626230175e-06, "loss": 0.019692474603652955, "step": 55150 }, { "epoch": 0.5191058823529412, "grad_norm": 0.4225960790182536, "learning_rate": 2.77656676248686e-06, "loss": 0.018199974298477174, "step": 55155 }, { "epoch": 0.5191529411764706, "grad_norm": 0.3290058785912699, "learning_rate": 2.7764409158576955e-06, "loss": 0.015605436265468597, "step": 55160 }, { "epoch": 0.5192, "grad_norm": 0.6510776855297589, "learning_rate": 2.7763150863388046e-06, "loss": 0.01743159294128418, "step": 55165 }, { "epoch": 0.5192470588235294, "grad_norm": 0.5692840098034564, "learning_rate": 2.7761892739263095e-06, "loss": 0.016533643007278442, "step": 55170 }, { "epoch": 0.5192941176470588, "grad_norm": 0.4320312151342824, "learning_rate": 2.7760634786163353e-06, "loss": 0.016926600039005278, "step": 55175 }, { "epoch": 0.5193411764705882, "grad_norm": 0.7761797801985729, "learning_rate": 2.775937700405007e-06, "loss": 0.021725189685821534, "step": 55180 }, { "epoch": 0.5193882352941176, "grad_norm": 0.6457418947173881, "learning_rate": 2.7758119392884524e-06, "loss": 0.016047364473342894, "step": 55185 }, { "epoch": 0.5194352941176471, "grad_norm": 0.49533760899091955, "learning_rate": 2.7756861952627983e-06, "loss": 0.02194750905036926, "step": 55190 }, { "epoch": 0.5194823529411765, "grad_norm": 0.38372122224522576, "learning_rate": 2.775560468324175e-06, "loss": 0.014573842287063599, "step": 55195 }, { "epoch": 0.5195294117647059, "grad_norm": 0.5091579014987068, "learning_rate": 2.7754347584687123e-06, "loss": 0.015493974089622498, "step": 55200 }, { "epoch": 0.5195764705882353, "grad_norm": 0.543691286191355, "learning_rate": 2.775309065692542e-06, "loss": 0.016103073954582214, "step": 55205 }, { "epoch": 0.5196235294117647, "grad_norm": 0.5567178548270539, "learning_rate": 2.7751833899917976e-06, "loss": 0.014210975170135498, "step": 55210 }, { "epoch": 0.5196705882352941, "grad_norm": 0.5147855071819857, "learning_rate": 2.775057731362612e-06, "loss": 0.019320482015609743, "step": 55215 }, { "epoch": 0.5197176470588235, "grad_norm": 0.46648930953737094, "learning_rate": 2.7749320898011222e-06, "loss": 0.012897008657455444, "step": 55220 }, { "epoch": 0.5197647058823529, "grad_norm": 0.47365020031445687, "learning_rate": 2.774806465303464e-06, "loss": 0.017039786279201507, "step": 55225 }, { "epoch": 0.5198117647058823, "grad_norm": 0.5455578571171683, "learning_rate": 2.774680857865775e-06, "loss": 0.016131645441055296, "step": 55230 }, { "epoch": 0.5198588235294118, "grad_norm": 0.36442579499855837, "learning_rate": 2.774555267484195e-06, "loss": 0.01641131043434143, "step": 55235 }, { "epoch": 0.5199058823529412, "grad_norm": 0.6715529393746155, "learning_rate": 2.7744296941548643e-06, "loss": 0.01806764453649521, "step": 55240 }, { "epoch": 0.5199529411764706, "grad_norm": 0.43827261797123757, "learning_rate": 2.774304137873923e-06, "loss": 0.016663622856140137, "step": 55245 }, { "epoch": 0.52, "grad_norm": 0.4589428305269699, "learning_rate": 2.7741785986375157e-06, "loss": 0.01577480435371399, "step": 55250 }, { "epoch": 0.5200470588235294, "grad_norm": 0.6885273299426958, "learning_rate": 2.774053076441785e-06, "loss": 0.017110222578048707, "step": 55255 }, { "epoch": 0.5200941176470588, "grad_norm": 0.7682282977266934, "learning_rate": 2.7739275712828772e-06, "loss": 0.019489528238773347, "step": 55260 }, { "epoch": 0.5201411764705882, "grad_norm": 0.5045904812835129, "learning_rate": 2.773802083156938e-06, "loss": 0.016489267349243164, "step": 55265 }, { "epoch": 0.5201882352941176, "grad_norm": 0.5084072853421258, "learning_rate": 2.773676612060115e-06, "loss": 0.017340442538261412, "step": 55270 }, { "epoch": 0.520235294117647, "grad_norm": 0.433235111997776, "learning_rate": 2.773551157988557e-06, "loss": 0.016749660670757293, "step": 55275 }, { "epoch": 0.5202823529411764, "grad_norm": 0.48267559621191086, "learning_rate": 2.773425720938414e-06, "loss": 0.018814797699451446, "step": 55280 }, { "epoch": 0.5203294117647059, "grad_norm": 0.6416415047653136, "learning_rate": 2.7733003009058386e-06, "loss": 0.016641104221343996, "step": 55285 }, { "epoch": 0.5203764705882353, "grad_norm": 0.2887281153038611, "learning_rate": 2.7731748978869814e-06, "loss": 0.01838148236274719, "step": 55290 }, { "epoch": 0.5204235294117647, "grad_norm": 0.36852685605606117, "learning_rate": 2.7730495118779976e-06, "loss": 0.01388384997844696, "step": 55295 }, { "epoch": 0.5204705882352941, "grad_norm": 0.31096343645162394, "learning_rate": 2.772924142875041e-06, "loss": 0.014714755117893219, "step": 55300 }, { "epoch": 0.5205176470588235, "grad_norm": 0.530879303173502, "learning_rate": 2.772798790874268e-06, "loss": 0.014628870785236359, "step": 55305 }, { "epoch": 0.5205647058823529, "grad_norm": 0.4926737210491492, "learning_rate": 2.7726734558718367e-06, "loss": 0.015851324796676634, "step": 55310 }, { "epoch": 0.5206117647058823, "grad_norm": 0.6833863844228385, "learning_rate": 2.772548137863905e-06, "loss": 0.01731959581375122, "step": 55315 }, { "epoch": 0.5206588235294117, "grad_norm": 0.49530211047682043, "learning_rate": 2.7724228368466333e-06, "loss": 0.017876917123794557, "step": 55320 }, { "epoch": 0.5207058823529411, "grad_norm": 0.5923110455198538, "learning_rate": 2.772297552816182e-06, "loss": 0.01970251798629761, "step": 55325 }, { "epoch": 0.5207529411764706, "grad_norm": 0.5069834847341144, "learning_rate": 2.7721722857687134e-06, "loss": 0.0155340775847435, "step": 55330 }, { "epoch": 0.5208, "grad_norm": 0.364637794562503, "learning_rate": 2.7720470357003907e-06, "loss": 0.014732474088668823, "step": 55335 }, { "epoch": 0.5208470588235294, "grad_norm": 0.49681232347099674, "learning_rate": 2.7719218026073796e-06, "loss": 0.021133516728878022, "step": 55340 }, { "epoch": 0.5208941176470588, "grad_norm": 0.5689713075918645, "learning_rate": 2.7717965864858454e-06, "loss": 0.018437840044498444, "step": 55345 }, { "epoch": 0.5209411764705882, "grad_norm": 0.4859127823414591, "learning_rate": 2.7716713873319545e-06, "loss": 0.015612056851387024, "step": 55350 }, { "epoch": 0.5209882352941176, "grad_norm": 0.761865650592196, "learning_rate": 2.771546205141876e-06, "loss": 0.019143223762512207, "step": 55355 }, { "epoch": 0.521035294117647, "grad_norm": 0.7132868238397341, "learning_rate": 2.7714210399117798e-06, "loss": 0.020585736632347106, "step": 55360 }, { "epoch": 0.5210823529411764, "grad_norm": 0.491394842159561, "learning_rate": 2.7712958916378354e-06, "loss": 0.017564624547958374, "step": 55365 }, { "epoch": 0.5211294117647058, "grad_norm": 0.31450494399277024, "learning_rate": 2.7711707603162152e-06, "loss": 0.012805844843387603, "step": 55370 }, { "epoch": 0.5211764705882352, "grad_norm": 0.3858950499819346, "learning_rate": 2.7710456459430925e-06, "loss": 0.013556241989135742, "step": 55375 }, { "epoch": 0.5212235294117648, "grad_norm": 0.4011541764187603, "learning_rate": 2.7709205485146416e-06, "loss": 0.01616852879524231, "step": 55380 }, { "epoch": 0.5212705882352942, "grad_norm": 0.4194124506766092, "learning_rate": 2.7707954680270384e-06, "loss": 0.012953491508960724, "step": 55385 }, { "epoch": 0.5213176470588236, "grad_norm": 0.3891206907158268, "learning_rate": 2.7706704044764592e-06, "loss": 0.01892693042755127, "step": 55390 }, { "epoch": 0.521364705882353, "grad_norm": 0.5677099515451727, "learning_rate": 2.7705453578590817e-06, "loss": 0.018315568566322327, "step": 55395 }, { "epoch": 0.5214117647058824, "grad_norm": 0.5129611590180395, "learning_rate": 2.770420328171086e-06, "loss": 0.0175957128405571, "step": 55400 }, { "epoch": 0.5214588235294118, "grad_norm": 0.27075997671295476, "learning_rate": 2.770295315408652e-06, "loss": 0.014874520897865295, "step": 55405 }, { "epoch": 0.5215058823529412, "grad_norm": 0.7770351312628897, "learning_rate": 2.7701703195679608e-06, "loss": 0.02117532044649124, "step": 55410 }, { "epoch": 0.5215529411764706, "grad_norm": 0.39764317264109006, "learning_rate": 2.770045340645196e-06, "loss": 0.01795981526374817, "step": 55415 }, { "epoch": 0.5216, "grad_norm": 0.7936698106164645, "learning_rate": 2.769920378636541e-06, "loss": 0.018333566188812257, "step": 55420 }, { "epoch": 0.5216470588235295, "grad_norm": 0.5689749218822847, "learning_rate": 2.7697954335381814e-06, "loss": 0.018539512157440187, "step": 55425 }, { "epoch": 0.5216941176470589, "grad_norm": 0.48974689074485694, "learning_rate": 2.7696705053463035e-06, "loss": 0.02237880975008011, "step": 55430 }, { "epoch": 0.5217411764705883, "grad_norm": 0.5310606321886794, "learning_rate": 2.769545594057094e-06, "loss": 0.02024870216846466, "step": 55435 }, { "epoch": 0.5217882352941177, "grad_norm": 0.44762798513168545, "learning_rate": 2.769420699666744e-06, "loss": 0.016587044298648834, "step": 55440 }, { "epoch": 0.5218352941176471, "grad_norm": 0.464719093883337, "learning_rate": 2.7692958221714418e-06, "loss": 0.013821919262409211, "step": 55445 }, { "epoch": 0.5218823529411765, "grad_norm": 0.4943057842013595, "learning_rate": 2.769170961567378e-06, "loss": 0.014491553604602813, "step": 55450 }, { "epoch": 0.5219294117647059, "grad_norm": 0.5569599189177037, "learning_rate": 2.7690461178507467e-06, "loss": 0.024866893887519836, "step": 55455 }, { "epoch": 0.5219764705882353, "grad_norm": 0.4691617334898328, "learning_rate": 2.7689212910177404e-06, "loss": 0.014964696764945985, "step": 55460 }, { "epoch": 0.5220235294117647, "grad_norm": 0.7729664757360217, "learning_rate": 2.7687964810645544e-06, "loss": 0.018697427213191987, "step": 55465 }, { "epoch": 0.5220705882352941, "grad_norm": 0.6838536839568325, "learning_rate": 2.7686716879873848e-06, "loss": 0.01977858990430832, "step": 55470 }, { "epoch": 0.5221176470588236, "grad_norm": 0.5090915711062527, "learning_rate": 2.7685469117824288e-06, "loss": 0.018249326944351198, "step": 55475 }, { "epoch": 0.522164705882353, "grad_norm": 0.4853996471249717, "learning_rate": 2.768422152445885e-06, "loss": 0.018489673733711243, "step": 55480 }, { "epoch": 0.5222117647058824, "grad_norm": 0.48952650237258855, "learning_rate": 2.768297409973952e-06, "loss": 0.018713609874248506, "step": 55485 }, { "epoch": 0.5222588235294118, "grad_norm": 0.7855442218653904, "learning_rate": 2.7681726843628317e-06, "loss": 0.022257424890995026, "step": 55490 }, { "epoch": 0.5223058823529412, "grad_norm": 0.5054760651973821, "learning_rate": 2.7680479756087254e-06, "loss": 0.01585821509361267, "step": 55495 }, { "epoch": 0.5223529411764706, "grad_norm": 0.4843790739598743, "learning_rate": 2.7679232837078367e-06, "loss": 0.017762430012226105, "step": 55500 }, { "epoch": 0.5224, "grad_norm": 0.49163511750472333, "learning_rate": 2.76779860865637e-06, "loss": 0.018460944294929504, "step": 55505 }, { "epoch": 0.5224470588235294, "grad_norm": 0.5271565602587377, "learning_rate": 2.7676739504505313e-06, "loss": 0.014665903151035308, "step": 55510 }, { "epoch": 0.5224941176470588, "grad_norm": 0.7874048162944178, "learning_rate": 2.7675493090865276e-06, "loss": 0.02514151334762573, "step": 55515 }, { "epoch": 0.5225411764705883, "grad_norm": 0.5007517425819532, "learning_rate": 2.7674246845605653e-06, "loss": 0.018238948285579683, "step": 55520 }, { "epoch": 0.5225882352941177, "grad_norm": 0.28538727515236256, "learning_rate": 2.7673000768688547e-06, "loss": 0.015198349952697754, "step": 55525 }, { "epoch": 0.5226352941176471, "grad_norm": 0.6174476541168432, "learning_rate": 2.7671754860076065e-06, "loss": 0.01917423903942108, "step": 55530 }, { "epoch": 0.5226823529411765, "grad_norm": 0.6861450117594645, "learning_rate": 2.767050911973032e-06, "loss": 0.026832437515258788, "step": 55535 }, { "epoch": 0.5227294117647059, "grad_norm": 0.5714546431586445, "learning_rate": 2.766926354761344e-06, "loss": 0.01934056282043457, "step": 55540 }, { "epoch": 0.5227764705882353, "grad_norm": 0.42175020556072756, "learning_rate": 2.7668018143687557e-06, "loss": 0.020887261629104613, "step": 55545 }, { "epoch": 0.5228235294117647, "grad_norm": 0.7349927257050831, "learning_rate": 2.7666772907914842e-06, "loss": 0.015363751351833344, "step": 55550 }, { "epoch": 0.5228705882352941, "grad_norm": 0.3837390208830558, "learning_rate": 2.766552784025743e-06, "loss": 0.014205984771251678, "step": 55555 }, { "epoch": 0.5229176470588235, "grad_norm": 0.41883689621226616, "learning_rate": 2.7664282940677527e-06, "loss": 0.020292657613754272, "step": 55560 }, { "epoch": 0.5229647058823529, "grad_norm": 0.590314352860931, "learning_rate": 2.76630382091373e-06, "loss": 0.019848921895027162, "step": 55565 }, { "epoch": 0.5230117647058824, "grad_norm": 0.31723394636036084, "learning_rate": 2.7661793645598955e-06, "loss": 0.017310526967048646, "step": 55570 }, { "epoch": 0.5230588235294118, "grad_norm": 0.6330007236128758, "learning_rate": 2.7660549250024703e-06, "loss": 0.01661112904548645, "step": 55575 }, { "epoch": 0.5231058823529412, "grad_norm": 0.3921129141417441, "learning_rate": 2.765930502237677e-06, "loss": 0.01811675578355789, "step": 55580 }, { "epoch": 0.5231529411764706, "grad_norm": 0.29249992371496586, "learning_rate": 2.765806096261738e-06, "loss": 0.01247725561261177, "step": 55585 }, { "epoch": 0.5232, "grad_norm": 0.6329483385353313, "learning_rate": 2.7656817070708796e-06, "loss": 0.01993980407714844, "step": 55590 }, { "epoch": 0.5232470588235294, "grad_norm": 0.6204522463511561, "learning_rate": 2.7655573346613268e-06, "loss": 0.016246184706687927, "step": 55595 }, { "epoch": 0.5232941176470588, "grad_norm": 0.5228019350168076, "learning_rate": 2.7654329790293066e-06, "loss": 0.015448030829429627, "step": 55600 }, { "epoch": 0.5233411764705882, "grad_norm": 0.7625584431145463, "learning_rate": 2.7653086401710473e-06, "loss": 0.016063904762268065, "step": 55605 }, { "epoch": 0.5233882352941176, "grad_norm": 0.425121428160888, "learning_rate": 2.765184318082779e-06, "loss": 0.020022386312484743, "step": 55610 }, { "epoch": 0.5234352941176471, "grad_norm": 0.6379320653899099, "learning_rate": 2.7650600127607315e-06, "loss": 0.017722320556640626, "step": 55615 }, { "epoch": 0.5234823529411765, "grad_norm": 0.43813312241280916, "learning_rate": 2.7649357242011364e-06, "loss": 0.020357292890548707, "step": 55620 }, { "epoch": 0.5235294117647059, "grad_norm": 0.3222495649019875, "learning_rate": 2.764811452400228e-06, "loss": 0.01326771080493927, "step": 55625 }, { "epoch": 0.5235764705882353, "grad_norm": 0.4708391286049453, "learning_rate": 2.7646871973542394e-06, "loss": 0.01854185312986374, "step": 55630 }, { "epoch": 0.5236235294117647, "grad_norm": 0.4966368949513511, "learning_rate": 2.7645629590594064e-06, "loss": 0.015028563141822816, "step": 55635 }, { "epoch": 0.5236705882352941, "grad_norm": 0.5877297811584471, "learning_rate": 2.7644387375119653e-06, "loss": 0.018229086697101594, "step": 55640 }, { "epoch": 0.5237176470588235, "grad_norm": 0.6171838602265765, "learning_rate": 2.7643145327081544e-06, "loss": 0.02025614082813263, "step": 55645 }, { "epoch": 0.5237647058823529, "grad_norm": 0.4119537057830937, "learning_rate": 2.7641903446442116e-06, "loss": 0.017573684453964233, "step": 55650 }, { "epoch": 0.5238117647058823, "grad_norm": 0.58954576871585, "learning_rate": 2.764066173316378e-06, "loss": 0.017505258321762085, "step": 55655 }, { "epoch": 0.5238588235294117, "grad_norm": 0.9326726363318338, "learning_rate": 2.763942018720894e-06, "loss": 0.02156561017036438, "step": 55660 }, { "epoch": 0.5239058823529412, "grad_norm": 0.3432595119556359, "learning_rate": 2.7638178808540034e-06, "loss": 0.018280762434005737, "step": 55665 }, { "epoch": 0.5239529411764706, "grad_norm": 0.38142001739078746, "learning_rate": 2.7636937597119484e-06, "loss": 0.018752601742744446, "step": 55670 }, { "epoch": 0.524, "grad_norm": 0.41207235114861085, "learning_rate": 2.7635696552909747e-06, "loss": 0.015907615423202515, "step": 55675 }, { "epoch": 0.5240470588235294, "grad_norm": 0.3999394938972601, "learning_rate": 2.7634455675873283e-06, "loss": 0.019595932960510255, "step": 55680 }, { "epoch": 0.5240941176470588, "grad_norm": 0.860233884844051, "learning_rate": 2.763321496597256e-06, "loss": 0.01842102110385895, "step": 55685 }, { "epoch": 0.5241411764705882, "grad_norm": 0.4221817298608945, "learning_rate": 2.7631974423170066e-06, "loss": 0.021593549847602846, "step": 55690 }, { "epoch": 0.5241882352941176, "grad_norm": 0.5865135912033873, "learning_rate": 2.7630734047428288e-06, "loss": 0.02408432960510254, "step": 55695 }, { "epoch": 0.524235294117647, "grad_norm": 1.2323755017890747, "learning_rate": 2.7629493838709743e-06, "loss": 0.021761974692344664, "step": 55700 }, { "epoch": 0.5242823529411764, "grad_norm": 0.595390639293299, "learning_rate": 2.7628253796976946e-06, "loss": 0.019152453541755675, "step": 55705 }, { "epoch": 0.5243294117647059, "grad_norm": 0.46025181048805186, "learning_rate": 2.7627013922192424e-06, "loss": 0.01813717484474182, "step": 55710 }, { "epoch": 0.5243764705882353, "grad_norm": 0.5563258861913489, "learning_rate": 2.762577421431873e-06, "loss": 0.015724778175354004, "step": 55715 }, { "epoch": 0.5244235294117647, "grad_norm": 0.7420497863331201, "learning_rate": 2.762453467331841e-06, "loss": 0.01933305859565735, "step": 55720 }, { "epoch": 0.5244705882352941, "grad_norm": 0.4140109357262576, "learning_rate": 2.7623295299154034e-06, "loss": 0.014740784466266633, "step": 55725 }, { "epoch": 0.5245176470588235, "grad_norm": 0.6002972151588915, "learning_rate": 2.762205609178818e-06, "loss": 0.019910657405853273, "step": 55730 }, { "epoch": 0.5245647058823529, "grad_norm": 0.4583010261073491, "learning_rate": 2.762081705118343e-06, "loss": 0.010837602615356445, "step": 55735 }, { "epoch": 0.5246117647058823, "grad_norm": 0.4711278463412841, "learning_rate": 2.7619578177302396e-06, "loss": 0.019674837589263916, "step": 55740 }, { "epoch": 0.5246588235294117, "grad_norm": 0.6300441787663025, "learning_rate": 2.7618339470107687e-06, "loss": 0.018225517868995667, "step": 55745 }, { "epoch": 0.5247058823529411, "grad_norm": 0.43079130784989583, "learning_rate": 2.7617100929561926e-06, "loss": 0.01801689714193344, "step": 55750 }, { "epoch": 0.5247529411764705, "grad_norm": 0.43573710089986056, "learning_rate": 2.761586255562775e-06, "loss": 0.01595795005559921, "step": 55755 }, { "epoch": 0.5248, "grad_norm": 0.4000139769208392, "learning_rate": 2.761462434826781e-06, "loss": 0.018164855241775513, "step": 55760 }, { "epoch": 0.5248470588235294, "grad_norm": 0.4743950416493466, "learning_rate": 2.7613386307444758e-06, "loss": 0.017716582119464874, "step": 55765 }, { "epoch": 0.5248941176470588, "grad_norm": 0.5103637251049119, "learning_rate": 2.761214843312128e-06, "loss": 0.017406195402145386, "step": 55770 }, { "epoch": 0.5249411764705882, "grad_norm": 0.31336892652120796, "learning_rate": 2.761091072526005e-06, "loss": 0.017394062876701356, "step": 55775 }, { "epoch": 0.5249882352941176, "grad_norm": 0.4812628864543715, "learning_rate": 2.760967318382376e-06, "loss": 0.014500166475772857, "step": 55780 }, { "epoch": 0.525035294117647, "grad_norm": 0.48890300125489006, "learning_rate": 2.760843580877513e-06, "loss": 0.0182549387216568, "step": 55785 }, { "epoch": 0.5250823529411764, "grad_norm": 0.5228657651514045, "learning_rate": 2.760719860007686e-06, "loss": 0.016878993809223176, "step": 55790 }, { "epoch": 0.5251294117647058, "grad_norm": 0.4135138265674219, "learning_rate": 2.7605961557691695e-06, "loss": 0.018982484936714172, "step": 55795 }, { "epoch": 0.5251764705882352, "grad_norm": 0.6769398869625975, "learning_rate": 2.7604724681582378e-06, "loss": 0.017356131970882416, "step": 55800 }, { "epoch": 0.5252235294117648, "grad_norm": 0.5376175455749811, "learning_rate": 2.760348797171165e-06, "loss": 0.015571993589401246, "step": 55805 }, { "epoch": 0.5252705882352942, "grad_norm": 0.49000808420572806, "learning_rate": 2.760225142804229e-06, "loss": 0.01670793890953064, "step": 55810 }, { "epoch": 0.5253176470588236, "grad_norm": 0.48390415504114287, "learning_rate": 2.760101505053707e-06, "loss": 0.013928006589412689, "step": 55815 }, { "epoch": 0.525364705882353, "grad_norm": 0.4995748893753444, "learning_rate": 2.759977883915877e-06, "loss": 0.02021121084690094, "step": 55820 }, { "epoch": 0.5254117647058824, "grad_norm": 0.5599390032855612, "learning_rate": 2.7598542793870203e-06, "loss": 0.020943066477775572, "step": 55825 }, { "epoch": 0.5254588235294118, "grad_norm": 0.45189231148196657, "learning_rate": 2.759730691463417e-06, "loss": 0.0172922283411026, "step": 55830 }, { "epoch": 0.5255058823529412, "grad_norm": 0.637066163482502, "learning_rate": 2.75960712014135e-06, "loss": 0.01853092908859253, "step": 55835 }, { "epoch": 0.5255529411764706, "grad_norm": 0.2584327935753985, "learning_rate": 2.7594835654171036e-06, "loss": 0.013392335176467896, "step": 55840 }, { "epoch": 0.5256, "grad_norm": 0.48822245255809515, "learning_rate": 2.7593600272869614e-06, "loss": 0.015361444652080536, "step": 55845 }, { "epoch": 0.5256470588235295, "grad_norm": 0.5238850733016079, "learning_rate": 2.7592365057472092e-06, "loss": 0.016462826728820802, "step": 55850 }, { "epoch": 0.5256941176470589, "grad_norm": 0.5242007596097826, "learning_rate": 2.7591130007941346e-06, "loss": 0.018257628381252288, "step": 55855 }, { "epoch": 0.5257411764705883, "grad_norm": 0.5467638717083256, "learning_rate": 2.7589895124240266e-06, "loss": 0.02175474464893341, "step": 55860 }, { "epoch": 0.5257882352941177, "grad_norm": 0.6696079821705172, "learning_rate": 2.7588660406331724e-06, "loss": 0.016426458954811096, "step": 55865 }, { "epoch": 0.5258352941176471, "grad_norm": 0.3740609321719969, "learning_rate": 2.7587425854178644e-06, "loss": 0.018084299564361573, "step": 55870 }, { "epoch": 0.5258823529411765, "grad_norm": 1.0087871140751914, "learning_rate": 2.7586191467743935e-06, "loss": 0.015557938814163208, "step": 55875 }, { "epoch": 0.5259294117647059, "grad_norm": 0.5822132183650093, "learning_rate": 2.758495724699052e-06, "loss": 0.015772560238838197, "step": 55880 }, { "epoch": 0.5259764705882353, "grad_norm": 0.6472786623550727, "learning_rate": 2.758372319188135e-06, "loss": 0.015036861598491668, "step": 55885 }, { "epoch": 0.5260235294117647, "grad_norm": 0.730100025548977, "learning_rate": 2.7582489302379374e-06, "loss": 0.021758128702640534, "step": 55890 }, { "epoch": 0.5260705882352941, "grad_norm": 0.5808048663510339, "learning_rate": 2.758125557844755e-06, "loss": 0.01598876863718033, "step": 55895 }, { "epoch": 0.5261176470588236, "grad_norm": 0.5301909359172736, "learning_rate": 2.758002202004886e-06, "loss": 0.016910864412784575, "step": 55900 }, { "epoch": 0.526164705882353, "grad_norm": 0.5000238270753657, "learning_rate": 2.7578788627146282e-06, "loss": 0.02031150460243225, "step": 55905 }, { "epoch": 0.5262117647058824, "grad_norm": 0.455679808010229, "learning_rate": 2.7577555399702816e-06, "loss": 0.016157540678977966, "step": 55910 }, { "epoch": 0.5262588235294118, "grad_norm": 0.5285801086714461, "learning_rate": 2.757632233768147e-06, "loss": 0.0218779519200325, "step": 55915 }, { "epoch": 0.5263058823529412, "grad_norm": 0.6757451209912684, "learning_rate": 2.757508944104528e-06, "loss": 0.021853625774383545, "step": 55920 }, { "epoch": 0.5263529411764706, "grad_norm": 0.736965206052472, "learning_rate": 2.7573856709757256e-06, "loss": 0.01954789161682129, "step": 55925 }, { "epoch": 0.5264, "grad_norm": 0.5760810751383559, "learning_rate": 2.757262414378046e-06, "loss": 0.02024119794368744, "step": 55930 }, { "epoch": 0.5264470588235294, "grad_norm": 0.4575464547353049, "learning_rate": 2.7571391743077942e-06, "loss": 0.019079360365867614, "step": 55935 }, { "epoch": 0.5264941176470588, "grad_norm": 0.5357967704883546, "learning_rate": 2.7570159507612764e-06, "loss": 0.018089441955089568, "step": 55940 }, { "epoch": 0.5265411764705883, "grad_norm": 0.5592372255793616, "learning_rate": 2.7568927437348014e-06, "loss": 0.01825835108757019, "step": 55945 }, { "epoch": 0.5265882352941177, "grad_norm": 0.5068283396583865, "learning_rate": 2.7567695532246776e-06, "loss": 0.018141455948352814, "step": 55950 }, { "epoch": 0.5266352941176471, "grad_norm": 0.45521130419409056, "learning_rate": 2.756646379227216e-06, "loss": 0.015853184461593627, "step": 55955 }, { "epoch": 0.5266823529411765, "grad_norm": 0.6227007048031776, "learning_rate": 2.7565232217387263e-06, "loss": 0.017807982861995697, "step": 55960 }, { "epoch": 0.5267294117647059, "grad_norm": 0.37374430139164966, "learning_rate": 2.7564000807555223e-06, "loss": 0.016009275615215302, "step": 55965 }, { "epoch": 0.5267764705882353, "grad_norm": 0.43473881127751307, "learning_rate": 2.7562769562739178e-06, "loss": 0.013298563659191132, "step": 55970 }, { "epoch": 0.5268235294117647, "grad_norm": 0.46266392858709804, "learning_rate": 2.7561538482902272e-06, "loss": 0.01254667341709137, "step": 55975 }, { "epoch": 0.5268705882352941, "grad_norm": 0.48126883566883655, "learning_rate": 2.7560307568007665e-06, "loss": 0.01825336217880249, "step": 55980 }, { "epoch": 0.5269176470588235, "grad_norm": 0.5046908953285292, "learning_rate": 2.755907681801853e-06, "loss": 0.018024522066116332, "step": 55985 }, { "epoch": 0.5269647058823529, "grad_norm": 0.5267276728393545, "learning_rate": 2.7557846232898044e-06, "loss": 0.014512470364570618, "step": 55990 }, { "epoch": 0.5270117647058824, "grad_norm": 0.5422196909237172, "learning_rate": 2.7556615812609404e-06, "loss": 0.01839744746685028, "step": 55995 }, { "epoch": 0.5270588235294118, "grad_norm": 0.339068956895988, "learning_rate": 2.755538555711582e-06, "loss": 0.013477233052253724, "step": 56000 }, { "epoch": 0.5271058823529412, "grad_norm": 0.4434674018184073, "learning_rate": 2.755415546638051e-06, "loss": 0.019597350060939787, "step": 56005 }, { "epoch": 0.5271529411764706, "grad_norm": 0.5720803119015431, "learning_rate": 2.7552925540366693e-06, "loss": 0.02046658992767334, "step": 56010 }, { "epoch": 0.5272, "grad_norm": 0.5278897746960178, "learning_rate": 2.7551695779037613e-06, "loss": 0.0161837637424469, "step": 56015 }, { "epoch": 0.5272470588235294, "grad_norm": 0.4667274035813883, "learning_rate": 2.7550466182356533e-06, "loss": 0.010624735057353974, "step": 56020 }, { "epoch": 0.5272941176470588, "grad_norm": 0.35217692901485975, "learning_rate": 2.75492367502867e-06, "loss": 0.017615333199501038, "step": 56025 }, { "epoch": 0.5273411764705882, "grad_norm": 0.33864013912380686, "learning_rate": 2.7548007482791393e-06, "loss": 0.017403605580329894, "step": 56030 }, { "epoch": 0.5273882352941176, "grad_norm": 0.6797427772329449, "learning_rate": 2.7546778379833905e-06, "loss": 0.02190990447998047, "step": 56035 }, { "epoch": 0.5274352941176471, "grad_norm": 0.51795888569567, "learning_rate": 2.754554944137753e-06, "loss": 0.016920439898967743, "step": 56040 }, { "epoch": 0.5274823529411765, "grad_norm": 0.4521891615341044, "learning_rate": 2.754432066738557e-06, "loss": 0.019965989887714385, "step": 56045 }, { "epoch": 0.5275294117647059, "grad_norm": 0.785150613854018, "learning_rate": 2.7543092057821357e-06, "loss": 0.01760825365781784, "step": 56050 }, { "epoch": 0.5275764705882353, "grad_norm": 0.4928428513388011, "learning_rate": 2.7541863612648217e-06, "loss": 0.015825799107551573, "step": 56055 }, { "epoch": 0.5276235294117647, "grad_norm": 0.5434739268248621, "learning_rate": 2.7540635331829496e-06, "loss": 0.02717248201370239, "step": 56060 }, { "epoch": 0.5276705882352941, "grad_norm": 0.6255385834041671, "learning_rate": 2.753940721532854e-06, "loss": 0.021693624556064606, "step": 56065 }, { "epoch": 0.5277176470588235, "grad_norm": 0.5037466193360889, "learning_rate": 2.7538179263108723e-06, "loss": 0.014547879993915557, "step": 56070 }, { "epoch": 0.5277647058823529, "grad_norm": 0.8026795578244051, "learning_rate": 2.753695147513343e-06, "loss": 0.020499786734580992, "step": 56075 }, { "epoch": 0.5278117647058823, "grad_norm": 0.4503326935864905, "learning_rate": 2.753572385136603e-06, "loss": 0.018897087872028352, "step": 56080 }, { "epoch": 0.5278588235294117, "grad_norm": 0.5441155719884041, "learning_rate": 2.7534496391769942e-06, "loss": 0.01570843756198883, "step": 56085 }, { "epoch": 0.5279058823529412, "grad_norm": 0.5172056623245124, "learning_rate": 2.7533269096308566e-06, "loss": 0.02054229974746704, "step": 56090 }, { "epoch": 0.5279529411764706, "grad_norm": 0.4574675866360593, "learning_rate": 2.753204196494533e-06, "loss": 0.01921029984951019, "step": 56095 }, { "epoch": 0.528, "grad_norm": 1.4390410941673477, "learning_rate": 2.753081499764368e-06, "loss": 0.022897498309612276, "step": 56100 }, { "epoch": 0.5280470588235294, "grad_norm": 0.6631246333710753, "learning_rate": 2.752958819436704e-06, "loss": 0.01692184805870056, "step": 56105 }, { "epoch": 0.5280941176470588, "grad_norm": 0.5758459078546813, "learning_rate": 2.7528361555078887e-06, "loss": 0.01636822074651718, "step": 56110 }, { "epoch": 0.5281411764705882, "grad_norm": 0.38372948529435985, "learning_rate": 2.7527135079742674e-06, "loss": 0.01825103759765625, "step": 56115 }, { "epoch": 0.5281882352941176, "grad_norm": 0.6121749444362705, "learning_rate": 2.7525908768321896e-06, "loss": 0.02150402367115021, "step": 56120 }, { "epoch": 0.528235294117647, "grad_norm": 0.7081319400255961, "learning_rate": 2.7524682620780034e-06, "loss": 0.01855032444000244, "step": 56125 }, { "epoch": 0.5282823529411764, "grad_norm": 0.6195825218231025, "learning_rate": 2.75234566370806e-06, "loss": 0.01781225949525833, "step": 56130 }, { "epoch": 0.5283294117647059, "grad_norm": 0.38146837360629626, "learning_rate": 2.7522230817187103e-06, "loss": 0.018437559902668, "step": 56135 }, { "epoch": 0.5283764705882353, "grad_norm": 0.6079772336211136, "learning_rate": 2.7521005161063065e-06, "loss": 0.01849481612443924, "step": 56140 }, { "epoch": 0.5284235294117647, "grad_norm": 0.6317508571266472, "learning_rate": 2.7519779668672025e-06, "loss": 0.017786476016044616, "step": 56145 }, { "epoch": 0.5284705882352941, "grad_norm": 0.6452512397364509, "learning_rate": 2.7518554339977543e-06, "loss": 0.02301175594329834, "step": 56150 }, { "epoch": 0.5285176470588235, "grad_norm": 0.5046450138708384, "learning_rate": 2.751732917494317e-06, "loss": 0.017572715878486633, "step": 56155 }, { "epoch": 0.5285647058823529, "grad_norm": 0.6929065954961534, "learning_rate": 2.751610417353247e-06, "loss": 0.022072061896324158, "step": 56160 }, { "epoch": 0.5286117647058823, "grad_norm": 0.5059365331422307, "learning_rate": 2.7514879335709044e-06, "loss": 0.01489577442407608, "step": 56165 }, { "epoch": 0.5286588235294117, "grad_norm": 0.331451189523634, "learning_rate": 2.7513654661436465e-06, "loss": 0.016113656759262084, "step": 56170 }, { "epoch": 0.5287058823529411, "grad_norm": 0.3576040009448626, "learning_rate": 2.7512430150678354e-06, "loss": 0.019048592448234557, "step": 56175 }, { "epoch": 0.5287529411764705, "grad_norm": 0.4381817978913083, "learning_rate": 2.751120580339833e-06, "loss": 0.022979113459587096, "step": 56180 }, { "epoch": 0.5288, "grad_norm": 0.43774307719677785, "learning_rate": 2.750998161956e-06, "loss": 0.020181970298290254, "step": 56185 }, { "epoch": 0.5288470588235294, "grad_norm": 0.5029041011107658, "learning_rate": 2.750875759912703e-06, "loss": 0.015772292017936708, "step": 56190 }, { "epoch": 0.5288941176470588, "grad_norm": 0.5523877238333297, "learning_rate": 2.7507533742063047e-06, "loss": 0.01697629541158676, "step": 56195 }, { "epoch": 0.5289411764705882, "grad_norm": 0.6045498495920012, "learning_rate": 2.750631004833173e-06, "loss": 0.01520344614982605, "step": 56200 }, { "epoch": 0.5289882352941176, "grad_norm": 0.6694424647582836, "learning_rate": 2.7505086517896743e-06, "loss": 0.015981684625148772, "step": 56205 }, { "epoch": 0.529035294117647, "grad_norm": 0.5969393396239091, "learning_rate": 2.7503863150721778e-06, "loss": 0.01592278480529785, "step": 56210 }, { "epoch": 0.5290823529411764, "grad_norm": 0.38678496598401546, "learning_rate": 2.7502639946770527e-06, "loss": 0.01708417236804962, "step": 56215 }, { "epoch": 0.5291294117647058, "grad_norm": 0.5130206406583784, "learning_rate": 2.7501416906006696e-06, "loss": 0.014900901913642883, "step": 56220 }, { "epoch": 0.5291764705882352, "grad_norm": 0.7314614667531919, "learning_rate": 2.7500194028394e-06, "loss": 0.017218485474586487, "step": 56225 }, { "epoch": 0.5292235294117648, "grad_norm": 0.6439026914715207, "learning_rate": 2.749897131389618e-06, "loss": 0.02047998011112213, "step": 56230 }, { "epoch": 0.5292705882352942, "grad_norm": 0.5575221776542765, "learning_rate": 2.749774876247697e-06, "loss": 0.020690840482711793, "step": 56235 }, { "epoch": 0.5293176470588236, "grad_norm": 0.4630406544567414, "learning_rate": 2.749652637410012e-06, "loss": 0.016361746191978454, "step": 56240 }, { "epoch": 0.529364705882353, "grad_norm": 0.47019229604206936, "learning_rate": 2.7495304148729396e-06, "loss": 0.01782901883125305, "step": 56245 }, { "epoch": 0.5294117647058824, "grad_norm": 0.4717084317264325, "learning_rate": 2.7494082086328577e-06, "loss": 0.014278209209442139, "step": 56250 }, { "epoch": 0.5294588235294118, "grad_norm": 0.725526101123486, "learning_rate": 2.749286018686144e-06, "loss": 0.021264702081680298, "step": 56255 }, { "epoch": 0.5295058823529412, "grad_norm": 0.6290973068891831, "learning_rate": 2.749163845029179e-06, "loss": 0.0259322464466095, "step": 56260 }, { "epoch": 0.5295529411764706, "grad_norm": 0.7048977362135274, "learning_rate": 2.749041687658344e-06, "loss": 0.017320162057876586, "step": 56265 }, { "epoch": 0.5296, "grad_norm": 0.4421113311173545, "learning_rate": 2.7489195465700197e-06, "loss": 0.014575928449630737, "step": 56270 }, { "epoch": 0.5296470588235294, "grad_norm": 0.6680318699250752, "learning_rate": 2.748797421760591e-06, "loss": 0.02122267782688141, "step": 56275 }, { "epoch": 0.5296941176470589, "grad_norm": 0.48246571928992393, "learning_rate": 2.74867531322644e-06, "loss": 0.01657586395740509, "step": 56280 }, { "epoch": 0.5297411764705883, "grad_norm": 0.4851287635615476, "learning_rate": 2.7485532209639535e-06, "loss": 0.017806798219680786, "step": 56285 }, { "epoch": 0.5297882352941177, "grad_norm": 0.5591284669098051, "learning_rate": 2.7484311449695183e-06, "loss": 0.01795713007450104, "step": 56290 }, { "epoch": 0.5298352941176471, "grad_norm": 0.5604815565975397, "learning_rate": 2.7483090852395204e-06, "loss": 0.0156668484210968, "step": 56295 }, { "epoch": 0.5298823529411765, "grad_norm": 0.446492023664168, "learning_rate": 2.7481870417703503e-06, "loss": 0.015213628113269807, "step": 56300 }, { "epoch": 0.5299294117647059, "grad_norm": 0.4810359220095789, "learning_rate": 2.748065014558397e-06, "loss": 0.015962588787078857, "step": 56305 }, { "epoch": 0.5299764705882353, "grad_norm": 0.46882413811266815, "learning_rate": 2.7479430036000515e-06, "loss": 0.013361233472824096, "step": 56310 }, { "epoch": 0.5300235294117647, "grad_norm": 0.5094034535018063, "learning_rate": 2.7478210088917056e-06, "loss": 0.017871487140655517, "step": 56315 }, { "epoch": 0.5300705882352941, "grad_norm": 0.537038825092733, "learning_rate": 2.7476990304297536e-06, "loss": 0.02073524594306946, "step": 56320 }, { "epoch": 0.5301176470588236, "grad_norm": 0.4460239916115489, "learning_rate": 2.747577068210589e-06, "loss": 0.018198929727077484, "step": 56325 }, { "epoch": 0.530164705882353, "grad_norm": 0.42232051175188096, "learning_rate": 2.7474551222306073e-06, "loss": 0.017174383997917174, "step": 56330 }, { "epoch": 0.5302117647058824, "grad_norm": 0.42427169840737694, "learning_rate": 2.747333192486205e-06, "loss": 0.017234230041503908, "step": 56335 }, { "epoch": 0.5302588235294118, "grad_norm": 0.5863613763319161, "learning_rate": 2.747211278973781e-06, "loss": 0.014780709147453308, "step": 56340 }, { "epoch": 0.5303058823529412, "grad_norm": 0.5293259021783878, "learning_rate": 2.7470893816897324e-06, "loss": 0.018356123566627504, "step": 56345 }, { "epoch": 0.5303529411764706, "grad_norm": 0.5894709041654899, "learning_rate": 2.74696750063046e-06, "loss": 0.016842703521251678, "step": 56350 }, { "epoch": 0.5304, "grad_norm": 0.4734894490417301, "learning_rate": 2.746845635792365e-06, "loss": 0.02172313630580902, "step": 56355 }, { "epoch": 0.5304470588235294, "grad_norm": 0.4063193505502653, "learning_rate": 2.7467237871718493e-06, "loss": 0.015452134609222411, "step": 56360 }, { "epoch": 0.5304941176470588, "grad_norm": 0.6838461391072528, "learning_rate": 2.7466019547653165e-06, "loss": 0.019380143284797667, "step": 56365 }, { "epoch": 0.5305411764705882, "grad_norm": 0.5252751481783868, "learning_rate": 2.7464801385691714e-06, "loss": 0.016237773001194, "step": 56370 }, { "epoch": 0.5305882352941177, "grad_norm": 0.7651040882358323, "learning_rate": 2.746358338579818e-06, "loss": 0.017480644583702087, "step": 56375 }, { "epoch": 0.5306352941176471, "grad_norm": 0.41714517947732577, "learning_rate": 2.7462365547936647e-06, "loss": 0.015202641487121582, "step": 56380 }, { "epoch": 0.5306823529411765, "grad_norm": 0.4244403564045334, "learning_rate": 2.746114787207118e-06, "loss": 0.015267182886600495, "step": 56385 }, { "epoch": 0.5307294117647059, "grad_norm": 0.47882820026045136, "learning_rate": 2.7459930358165875e-06, "loss": 0.015427112579345703, "step": 56390 }, { "epoch": 0.5307764705882353, "grad_norm": 0.44789679657979886, "learning_rate": 2.745871300618483e-06, "loss": 0.0197293683886528, "step": 56395 }, { "epoch": 0.5308235294117647, "grad_norm": 0.6402584337755128, "learning_rate": 2.745749581609215e-06, "loss": 0.021434506773948668, "step": 56400 }, { "epoch": 0.5308705882352941, "grad_norm": 0.8378029904909845, "learning_rate": 2.745627878785197e-06, "loss": 0.018556934595108033, "step": 56405 }, { "epoch": 0.5309176470588235, "grad_norm": 0.587061766916196, "learning_rate": 2.7455061921428418e-06, "loss": 0.01985180824995041, "step": 56410 }, { "epoch": 0.5309647058823529, "grad_norm": 0.42935277381432724, "learning_rate": 2.745384521678563e-06, "loss": 0.013703912496566772, "step": 56415 }, { "epoch": 0.5310117647058824, "grad_norm": 0.601566907612475, "learning_rate": 2.745262867388777e-06, "loss": 0.018389853835105895, "step": 56420 }, { "epoch": 0.5310588235294118, "grad_norm": 0.36138920030482957, "learning_rate": 2.7451412292699004e-06, "loss": 0.018380916118621825, "step": 56425 }, { "epoch": 0.5311058823529412, "grad_norm": 0.31349359087335793, "learning_rate": 2.745019607318351e-06, "loss": 0.014071646332740783, "step": 56430 }, { "epoch": 0.5311529411764706, "grad_norm": 0.630589738203433, "learning_rate": 2.7448980015305476e-06, "loss": 0.020571720600128175, "step": 56435 }, { "epoch": 0.5312, "grad_norm": 0.6684251462517063, "learning_rate": 2.74477641190291e-06, "loss": 0.025530368089675903, "step": 56440 }, { "epoch": 0.5312470588235294, "grad_norm": 0.6141296919500112, "learning_rate": 2.7446548384318598e-06, "loss": 0.017690578103065492, "step": 56445 }, { "epoch": 0.5312941176470588, "grad_norm": 0.5175737833854286, "learning_rate": 2.7445332811138193e-06, "loss": 0.019827544689178467, "step": 56450 }, { "epoch": 0.5313411764705882, "grad_norm": 0.4193060725617692, "learning_rate": 2.7444117399452107e-06, "loss": 0.018256083130836487, "step": 56455 }, { "epoch": 0.5313882352941176, "grad_norm": 0.5900317533763042, "learning_rate": 2.744290214922459e-06, "loss": 0.021841096878051757, "step": 56460 }, { "epoch": 0.531435294117647, "grad_norm": 0.5151798782134875, "learning_rate": 2.7441687060419913e-06, "loss": 0.014162574708461762, "step": 56465 }, { "epoch": 0.5314823529411765, "grad_norm": 0.5053510994857228, "learning_rate": 2.744047213300232e-06, "loss": 0.014425122737884521, "step": 56470 }, { "epoch": 0.5315294117647059, "grad_norm": 0.5095775863131091, "learning_rate": 2.74392573669361e-06, "loss": 0.01666684150695801, "step": 56475 }, { "epoch": 0.5315764705882353, "grad_norm": 0.40882580581759215, "learning_rate": 2.743804276218554e-06, "loss": 0.01952155530452728, "step": 56480 }, { "epoch": 0.5316235294117647, "grad_norm": 0.44229427375821506, "learning_rate": 2.743682831871494e-06, "loss": 0.017323556542396545, "step": 56485 }, { "epoch": 0.5316705882352941, "grad_norm": 0.3829592144014924, "learning_rate": 2.7435614036488613e-06, "loss": 0.016143126785755156, "step": 56490 }, { "epoch": 0.5317176470588235, "grad_norm": 0.604759868647183, "learning_rate": 2.7434399915470878e-06, "loss": 0.024705913662910462, "step": 56495 }, { "epoch": 0.5317647058823529, "grad_norm": 0.6154947966044646, "learning_rate": 2.743318595562606e-06, "loss": 0.018423700332641603, "step": 56500 }, { "epoch": 0.5318117647058823, "grad_norm": 0.43115661298484464, "learning_rate": 2.743197215691852e-06, "loss": 0.014114709198474884, "step": 56505 }, { "epoch": 0.5318588235294117, "grad_norm": 0.585459238163026, "learning_rate": 2.74307585193126e-06, "loss": 0.017158102989196778, "step": 56510 }, { "epoch": 0.5319058823529412, "grad_norm": 0.40602244298165335, "learning_rate": 2.742954504277267e-06, "loss": 0.017795345187187193, "step": 56515 }, { "epoch": 0.5319529411764706, "grad_norm": 0.6212385876699105, "learning_rate": 2.7428331727263107e-06, "loss": 0.02328795790672302, "step": 56520 }, { "epoch": 0.532, "grad_norm": 0.30767304478201335, "learning_rate": 2.7427118572748306e-06, "loss": 0.016253140568733216, "step": 56525 }, { "epoch": 0.5320470588235294, "grad_norm": 0.6849510731489092, "learning_rate": 2.742590557919266e-06, "loss": 0.014428915083408355, "step": 56530 }, { "epoch": 0.5320941176470588, "grad_norm": 0.6315606697231279, "learning_rate": 2.7424692746560572e-06, "loss": 0.019675691425800324, "step": 56535 }, { "epoch": 0.5321411764705882, "grad_norm": 0.6187403656378077, "learning_rate": 2.7423480074816472e-06, "loss": 0.020590466260910035, "step": 56540 }, { "epoch": 0.5321882352941176, "grad_norm": 0.3784303484507245, "learning_rate": 2.7422267563924793e-06, "loss": 0.01685192883014679, "step": 56545 }, { "epoch": 0.532235294117647, "grad_norm": 0.3779392907865808, "learning_rate": 2.7421055213849972e-06, "loss": 0.01816314160823822, "step": 56550 }, { "epoch": 0.5322823529411764, "grad_norm": 0.6154809397731017, "learning_rate": 2.7419843024556467e-06, "loss": 0.015306925773620606, "step": 56555 }, { "epoch": 0.5323294117647058, "grad_norm": 0.4572248234372029, "learning_rate": 2.741863099600875e-06, "loss": 0.013013564050197601, "step": 56560 }, { "epoch": 0.5323764705882353, "grad_norm": 0.6224973664134803, "learning_rate": 2.7417419128171284e-06, "loss": 0.01835920810699463, "step": 56565 }, { "epoch": 0.5324235294117647, "grad_norm": 0.66175449283157, "learning_rate": 2.741620742100857e-06, "loss": 0.02019823491573334, "step": 56570 }, { "epoch": 0.5324705882352941, "grad_norm": 0.5386220224347763, "learning_rate": 2.741499587448509e-06, "loss": 0.018285024166107177, "step": 56575 }, { "epoch": 0.5325176470588235, "grad_norm": 0.46169052327618965, "learning_rate": 2.741378448856537e-06, "loss": 0.015973037481307982, "step": 56580 }, { "epoch": 0.5325647058823529, "grad_norm": 0.4895072614582477, "learning_rate": 2.7412573263213913e-06, "loss": 0.019658660888671874, "step": 56585 }, { "epoch": 0.5326117647058823, "grad_norm": 0.3958154964452116, "learning_rate": 2.7411362198395262e-06, "loss": 0.016032522916793822, "step": 56590 }, { "epoch": 0.5326588235294117, "grad_norm": 0.5723823122337488, "learning_rate": 2.741015129407397e-06, "loss": 0.016059556603431703, "step": 56595 }, { "epoch": 0.5327058823529411, "grad_norm": 0.5971090905142233, "learning_rate": 2.7408940550214558e-06, "loss": 0.01771729290485382, "step": 56600 }, { "epoch": 0.5327529411764705, "grad_norm": 0.47324447587039453, "learning_rate": 2.7407729966781616e-06, "loss": 0.014508911967277527, "step": 56605 }, { "epoch": 0.5328, "grad_norm": 0.5845263624453387, "learning_rate": 2.7406519543739713e-06, "loss": 0.013898783922195434, "step": 56610 }, { "epoch": 0.5328470588235295, "grad_norm": 0.4322759800633222, "learning_rate": 2.740530928105343e-06, "loss": 0.016369998455047607, "step": 56615 }, { "epoch": 0.5328941176470589, "grad_norm": 0.8397076899994894, "learning_rate": 2.7404099178687372e-06, "loss": 0.02136770486831665, "step": 56620 }, { "epoch": 0.5329411764705883, "grad_norm": 0.5963303351222675, "learning_rate": 2.740288923660614e-06, "loss": 0.017967623472213746, "step": 56625 }, { "epoch": 0.5329882352941177, "grad_norm": 0.3992671030507006, "learning_rate": 2.7401679454774353e-06, "loss": 0.014830633997917175, "step": 56630 }, { "epoch": 0.533035294117647, "grad_norm": 0.4089660949660706, "learning_rate": 2.7400469833156645e-06, "loss": 0.017608261108398436, "step": 56635 }, { "epoch": 0.5330823529411765, "grad_norm": 0.5644758279380425, "learning_rate": 2.7399260371717653e-06, "loss": 0.016276772320270538, "step": 56640 }, { "epoch": 0.5331294117647059, "grad_norm": 0.565894269557054, "learning_rate": 2.7398051070422026e-06, "loss": 0.018569324910640717, "step": 56645 }, { "epoch": 0.5331764705882353, "grad_norm": 0.6060737369445574, "learning_rate": 2.7396841929234434e-06, "loss": 0.01760660856962204, "step": 56650 }, { "epoch": 0.5332235294117647, "grad_norm": 0.5702335421190509, "learning_rate": 2.739563294811955e-06, "loss": 0.016903451085090636, "step": 56655 }, { "epoch": 0.5332705882352942, "grad_norm": 0.41210023561064846, "learning_rate": 2.739442412704205e-06, "loss": 0.01880822479724884, "step": 56660 }, { "epoch": 0.5333176470588236, "grad_norm": 0.4978266559105084, "learning_rate": 2.7393215465966638e-06, "loss": 0.01381203681230545, "step": 56665 }, { "epoch": 0.533364705882353, "grad_norm": 0.373161383031783, "learning_rate": 2.7392006964858013e-06, "loss": 0.019193968176841734, "step": 56670 }, { "epoch": 0.5334117647058824, "grad_norm": 0.5674957922251856, "learning_rate": 2.7390798623680894e-06, "loss": 0.02014148235321045, "step": 56675 }, { "epoch": 0.5334588235294118, "grad_norm": 0.5171896385574016, "learning_rate": 2.7389590442400017e-06, "loss": 0.015346536040306091, "step": 56680 }, { "epoch": 0.5335058823529412, "grad_norm": 0.4069598261129621, "learning_rate": 2.73883824209801e-06, "loss": 0.01418493390083313, "step": 56685 }, { "epoch": 0.5335529411764706, "grad_norm": 0.5909113604153585, "learning_rate": 2.738717455938592e-06, "loss": 0.016742517054080964, "step": 56690 }, { "epoch": 0.5336, "grad_norm": 0.6672105881449553, "learning_rate": 2.738596685758222e-06, "loss": 0.016668014228343964, "step": 56695 }, { "epoch": 0.5336470588235294, "grad_norm": 0.6214758025579409, "learning_rate": 2.738475931553378e-06, "loss": 0.017518171668052675, "step": 56700 }, { "epoch": 0.5336941176470589, "grad_norm": 0.7189941998320932, "learning_rate": 2.7383551933205366e-06, "loss": 0.020549291372299196, "step": 56705 }, { "epoch": 0.5337411764705883, "grad_norm": 0.5345323419438623, "learning_rate": 2.738234471056179e-06, "loss": 0.023191529512405395, "step": 56710 }, { "epoch": 0.5337882352941177, "grad_norm": 0.5109696733601925, "learning_rate": 2.738113764756785e-06, "loss": 0.015097701549530029, "step": 56715 }, { "epoch": 0.5338352941176471, "grad_norm": 0.4588213747266287, "learning_rate": 2.737993074418836e-06, "loss": 0.013623307645320892, "step": 56720 }, { "epoch": 0.5338823529411765, "grad_norm": 0.5828800473565949, "learning_rate": 2.737872400038814e-06, "loss": 0.01872929185628891, "step": 56725 }, { "epoch": 0.5339294117647059, "grad_norm": 0.7287152939783115, "learning_rate": 2.7377517416132036e-06, "loss": 0.01559857428073883, "step": 56730 }, { "epoch": 0.5339764705882353, "grad_norm": 0.36148193655139566, "learning_rate": 2.737631099138489e-06, "loss": 0.014206501841545104, "step": 56735 }, { "epoch": 0.5340235294117647, "grad_norm": 0.647178116603443, "learning_rate": 2.7375104726111566e-06, "loss": 0.015521001815795899, "step": 56740 }, { "epoch": 0.5340705882352941, "grad_norm": 0.5799171172859588, "learning_rate": 2.737389862027692e-06, "loss": 0.016839960217475893, "step": 56745 }, { "epoch": 0.5341176470588235, "grad_norm": 0.48311086921694507, "learning_rate": 2.737269267384585e-06, "loss": 0.014880210161209106, "step": 56750 }, { "epoch": 0.534164705882353, "grad_norm": 0.5873666667527258, "learning_rate": 2.737148688678324e-06, "loss": 0.013984620571136475, "step": 56755 }, { "epoch": 0.5342117647058824, "grad_norm": 0.4694017029828594, "learning_rate": 2.7370281259053982e-06, "loss": 0.018420089781284333, "step": 56760 }, { "epoch": 0.5342588235294118, "grad_norm": 0.7378440940084879, "learning_rate": 2.7369075790623e-06, "loss": 0.020026436448097228, "step": 56765 }, { "epoch": 0.5343058823529412, "grad_norm": 0.5659844028562947, "learning_rate": 2.736787048145521e-06, "loss": 0.016537463665008544, "step": 56770 }, { "epoch": 0.5343529411764706, "grad_norm": 0.554972323138598, "learning_rate": 2.736666533151555e-06, "loss": 0.018098706007003786, "step": 56775 }, { "epoch": 0.5344, "grad_norm": 0.8939648353066987, "learning_rate": 2.736546034076897e-06, "loss": 0.019942152500152587, "step": 56780 }, { "epoch": 0.5344470588235294, "grad_norm": 0.4867262007119681, "learning_rate": 2.736425550918041e-06, "loss": 0.015841081738471985, "step": 56785 }, { "epoch": 0.5344941176470588, "grad_norm": 0.582653885615804, "learning_rate": 2.736305083671485e-06, "loss": 0.01612675338983536, "step": 56790 }, { "epoch": 0.5345411764705882, "grad_norm": 0.4477750612107043, "learning_rate": 2.7361846323337266e-06, "loss": 0.019532498717308045, "step": 56795 }, { "epoch": 0.5345882352941177, "grad_norm": 0.4829474119056065, "learning_rate": 2.7360641969012643e-06, "loss": 0.01672291159629822, "step": 56800 }, { "epoch": 0.5346352941176471, "grad_norm": 0.40189941586919153, "learning_rate": 2.735943777370597e-06, "loss": 0.01545068770647049, "step": 56805 }, { "epoch": 0.5346823529411765, "grad_norm": 0.22172190284890342, "learning_rate": 2.735823373738228e-06, "loss": 0.016225171089172364, "step": 56810 }, { "epoch": 0.5347294117647059, "grad_norm": 0.5340416339900773, "learning_rate": 2.7357029860006572e-06, "loss": 0.01550745964050293, "step": 56815 }, { "epoch": 0.5347764705882353, "grad_norm": 0.7469407534296725, "learning_rate": 2.735582614154389e-06, "loss": 0.020064926147460936, "step": 56820 }, { "epoch": 0.5348235294117647, "grad_norm": 0.8191878502663495, "learning_rate": 2.735462258195927e-06, "loss": 0.021569260954856874, "step": 56825 }, { "epoch": 0.5348705882352941, "grad_norm": 0.6817113600147564, "learning_rate": 2.7353419181217768e-06, "loss": 0.014818167686462403, "step": 56830 }, { "epoch": 0.5349176470588235, "grad_norm": 0.32673635044949423, "learning_rate": 2.735221593928444e-06, "loss": 0.016337865591049196, "step": 56835 }, { "epoch": 0.5349647058823529, "grad_norm": 0.43004353155281133, "learning_rate": 2.7351012856124376e-06, "loss": 0.01186879575252533, "step": 56840 }, { "epoch": 0.5350117647058823, "grad_norm": 0.35705718245870294, "learning_rate": 2.7349809931702643e-06, "loss": 0.016000695526599884, "step": 56845 }, { "epoch": 0.5350588235294118, "grad_norm": 0.6118890762842747, "learning_rate": 2.7348607165984348e-06, "loss": 0.020743384957313538, "step": 56850 }, { "epoch": 0.5351058823529412, "grad_norm": 0.5478036818592795, "learning_rate": 2.7347404558934586e-06, "loss": 0.01734204888343811, "step": 56855 }, { "epoch": 0.5351529411764706, "grad_norm": 0.6891545811119696, "learning_rate": 2.734620211051849e-06, "loss": 0.01815049946308136, "step": 56860 }, { "epoch": 0.5352, "grad_norm": 0.5037062608922844, "learning_rate": 2.7344999820701178e-06, "loss": 0.017613482475280762, "step": 56865 }, { "epoch": 0.5352470588235294, "grad_norm": 0.728877538900313, "learning_rate": 2.734379768944779e-06, "loss": 0.016088779270648956, "step": 56870 }, { "epoch": 0.5352941176470588, "grad_norm": 0.603471885472452, "learning_rate": 2.734259571672348e-06, "loss": 0.02622467279434204, "step": 56875 }, { "epoch": 0.5353411764705882, "grad_norm": 0.5026039494578954, "learning_rate": 2.73413939024934e-06, "loss": 0.01580934375524521, "step": 56880 }, { "epoch": 0.5353882352941176, "grad_norm": 0.6148868697540268, "learning_rate": 2.734019224672273e-06, "loss": 0.02154376357793808, "step": 56885 }, { "epoch": 0.535435294117647, "grad_norm": 0.5262682297210296, "learning_rate": 2.7338990749376642e-06, "loss": 0.020274102687835693, "step": 56890 }, { "epoch": 0.5354823529411765, "grad_norm": 0.7081251287234899, "learning_rate": 2.7337789410420333e-06, "loss": 0.020348848402500154, "step": 56895 }, { "epoch": 0.5355294117647059, "grad_norm": 0.6356769087821906, "learning_rate": 2.733658822981901e-06, "loss": 0.018008941411972047, "step": 56900 }, { "epoch": 0.5355764705882353, "grad_norm": 0.47509140571722447, "learning_rate": 2.733538720753788e-06, "loss": 0.014341574907302857, "step": 56905 }, { "epoch": 0.5356235294117647, "grad_norm": 0.627378552060489, "learning_rate": 2.7334186343542173e-06, "loss": 0.018288280069828033, "step": 56910 }, { "epoch": 0.5356705882352941, "grad_norm": 0.6818251546538181, "learning_rate": 2.7332985637797117e-06, "loss": 0.014308527112007141, "step": 56915 }, { "epoch": 0.5357176470588235, "grad_norm": 0.5246297567787306, "learning_rate": 2.7331785090267965e-06, "loss": 0.015995746850967406, "step": 56920 }, { "epoch": 0.5357647058823529, "grad_norm": 0.516757645070045, "learning_rate": 2.733058470091997e-06, "loss": 0.016409650444984436, "step": 56925 }, { "epoch": 0.5358117647058823, "grad_norm": 0.5676959663186166, "learning_rate": 2.73293844697184e-06, "loss": 0.017962832748889924, "step": 56930 }, { "epoch": 0.5358588235294117, "grad_norm": 0.4948759814442104, "learning_rate": 2.7328184396628532e-06, "loss": 0.01979365348815918, "step": 56935 }, { "epoch": 0.5359058823529412, "grad_norm": 0.5123007163845456, "learning_rate": 2.7326984481615647e-06, "loss": 0.021170410513877868, "step": 56940 }, { "epoch": 0.5359529411764706, "grad_norm": 0.5582017338768129, "learning_rate": 2.7325784724645067e-06, "loss": 0.01914211958646774, "step": 56945 }, { "epoch": 0.536, "grad_norm": 0.47087042687906683, "learning_rate": 2.7324585125682074e-06, "loss": 0.019515827298164368, "step": 56950 }, { "epoch": 0.5360470588235294, "grad_norm": 0.5184953145147134, "learning_rate": 2.732338568469201e-06, "loss": 0.015035748481750488, "step": 56955 }, { "epoch": 0.5360941176470588, "grad_norm": 0.7097427944183566, "learning_rate": 2.7322186401640188e-06, "loss": 0.017400741577148438, "step": 56960 }, { "epoch": 0.5361411764705882, "grad_norm": 0.6794724156413995, "learning_rate": 2.7320987276491968e-06, "loss": 0.016845446825027467, "step": 56965 }, { "epoch": 0.5361882352941176, "grad_norm": 0.5164189454039808, "learning_rate": 2.7319788309212685e-06, "loss": 0.01552589237689972, "step": 56970 }, { "epoch": 0.536235294117647, "grad_norm": 0.43336571343773445, "learning_rate": 2.731858949976772e-06, "loss": 0.011610119044780732, "step": 56975 }, { "epoch": 0.5362823529411764, "grad_norm": 0.3967433615935731, "learning_rate": 2.7317390848122427e-06, "loss": 0.015319812297821044, "step": 56980 }, { "epoch": 0.5363294117647058, "grad_norm": 0.5436116122889154, "learning_rate": 2.731619235424221e-06, "loss": 0.014368550479412079, "step": 56985 }, { "epoch": 0.5363764705882353, "grad_norm": 0.5062126629020561, "learning_rate": 2.731499401809245e-06, "loss": 0.0199651837348938, "step": 56990 }, { "epoch": 0.5364235294117647, "grad_norm": 0.3914695381277974, "learning_rate": 2.7313795839638557e-06, "loss": 0.01569526791572571, "step": 56995 }, { "epoch": 0.5364705882352941, "grad_norm": 0.5605959897744415, "learning_rate": 2.7312597818845947e-06, "loss": 0.01898496150970459, "step": 57000 }, { "epoch": 0.5365176470588235, "grad_norm": 0.615031585704733, "learning_rate": 2.731139995568005e-06, "loss": 0.014632657170295715, "step": 57005 }, { "epoch": 0.5365647058823529, "grad_norm": 0.6419069672470293, "learning_rate": 2.73102022501063e-06, "loss": 0.021555060148239137, "step": 57010 }, { "epoch": 0.5366117647058823, "grad_norm": 0.4984213797147428, "learning_rate": 2.730900470209014e-06, "loss": 0.023595654964447023, "step": 57015 }, { "epoch": 0.5366588235294117, "grad_norm": 0.5785607565214892, "learning_rate": 2.730780731159704e-06, "loss": 0.01954755187034607, "step": 57020 }, { "epoch": 0.5367058823529411, "grad_norm": 0.48550152424652854, "learning_rate": 2.730661007859246e-06, "loss": 0.017605912685394288, "step": 57025 }, { "epoch": 0.5367529411764705, "grad_norm": 0.40896122250168476, "learning_rate": 2.7305413003041884e-06, "loss": 0.013282647728919983, "step": 57030 }, { "epoch": 0.5368, "grad_norm": 0.6126488124595831, "learning_rate": 2.7304216084910807e-06, "loss": 0.016013994812965393, "step": 57035 }, { "epoch": 0.5368470588235295, "grad_norm": 0.5529873642101288, "learning_rate": 2.730301932416472e-06, "loss": 0.015896685421466827, "step": 57040 }, { "epoch": 0.5368941176470589, "grad_norm": 0.7347711817994419, "learning_rate": 2.7301822720769138e-06, "loss": 0.018509912490844726, "step": 57045 }, { "epoch": 0.5369411764705883, "grad_norm": 0.4790693199994117, "learning_rate": 2.730062627468959e-06, "loss": 0.02035147249698639, "step": 57050 }, { "epoch": 0.5369882352941177, "grad_norm": 0.4417655483061393, "learning_rate": 2.72994299858916e-06, "loss": 0.013123990595340728, "step": 57055 }, { "epoch": 0.537035294117647, "grad_norm": 0.567703244675921, "learning_rate": 2.7298233854340715e-06, "loss": 0.015169751644134522, "step": 57060 }, { "epoch": 0.5370823529411765, "grad_norm": 0.638120087600214, "learning_rate": 2.729703788000249e-06, "loss": 0.01759117841720581, "step": 57065 }, { "epoch": 0.5371294117647059, "grad_norm": 0.46423710170339644, "learning_rate": 2.7295842062842488e-06, "loss": 0.023622792959213258, "step": 57070 }, { "epoch": 0.5371764705882353, "grad_norm": 0.6137120437637271, "learning_rate": 2.7294646402826286e-06, "loss": 0.01828029453754425, "step": 57075 }, { "epoch": 0.5372235294117647, "grad_norm": 0.6876665460018685, "learning_rate": 2.729345089991946e-06, "loss": 0.019008240103721617, "step": 57080 }, { "epoch": 0.5372705882352942, "grad_norm": 0.3005101417698202, "learning_rate": 2.729225555408762e-06, "loss": 0.012660861015319824, "step": 57085 }, { "epoch": 0.5373176470588236, "grad_norm": 0.35240828828487364, "learning_rate": 2.7291060365296367e-06, "loss": 0.012562179565429687, "step": 57090 }, { "epoch": 0.537364705882353, "grad_norm": 0.5520985966763531, "learning_rate": 2.728986533351132e-06, "loss": 0.01713077425956726, "step": 57095 }, { "epoch": 0.5374117647058824, "grad_norm": 0.6086381441955766, "learning_rate": 2.7288670458698097e-06, "loss": 0.01723720282316208, "step": 57100 }, { "epoch": 0.5374588235294118, "grad_norm": 0.3384204854043146, "learning_rate": 2.728747574082235e-06, "loss": 0.015004368126392364, "step": 57105 }, { "epoch": 0.5375058823529412, "grad_norm": 0.5935392095224047, "learning_rate": 2.7286281179849717e-06, "loss": 0.017024698853492736, "step": 57110 }, { "epoch": 0.5375529411764706, "grad_norm": 0.5454081216519628, "learning_rate": 2.7285086775745867e-06, "loss": 0.013969585299491882, "step": 57115 }, { "epoch": 0.5376, "grad_norm": 0.6777770938595005, "learning_rate": 2.7283892528476458e-06, "loss": 0.020813187956809996, "step": 57120 }, { "epoch": 0.5376470588235294, "grad_norm": 0.4458636421375204, "learning_rate": 2.7282698438007182e-06, "loss": 0.018669518828392028, "step": 57125 }, { "epoch": 0.5376941176470589, "grad_norm": 0.604963099365688, "learning_rate": 2.7281504504303724e-06, "loss": 0.016637100279331206, "step": 57130 }, { "epoch": 0.5377411764705883, "grad_norm": 0.642411025025832, "learning_rate": 2.7280310727331795e-06, "loss": 0.021862795948982237, "step": 57135 }, { "epoch": 0.5377882352941177, "grad_norm": 0.43157153499851997, "learning_rate": 2.727911710705709e-06, "loss": 0.01561729609966278, "step": 57140 }, { "epoch": 0.5378352941176471, "grad_norm": 0.674496715479752, "learning_rate": 2.727792364344534e-06, "loss": 0.017545291781425477, "step": 57145 }, { "epoch": 0.5378823529411765, "grad_norm": 0.7213771388275011, "learning_rate": 2.7276730336462278e-06, "loss": 0.0172463595867157, "step": 57150 }, { "epoch": 0.5379294117647059, "grad_norm": 0.5090649751874756, "learning_rate": 2.7275537186073645e-06, "loss": 0.018716812133789062, "step": 57155 }, { "epoch": 0.5379764705882353, "grad_norm": 0.5638183584808013, "learning_rate": 2.72743441922452e-06, "loss": 0.017946460843086244, "step": 57160 }, { "epoch": 0.5380235294117647, "grad_norm": 0.5816400720269996, "learning_rate": 2.7273151354942705e-06, "loss": 0.01897108554840088, "step": 57165 }, { "epoch": 0.5380705882352941, "grad_norm": 0.4686119530169877, "learning_rate": 2.727195867413193e-06, "loss": 0.016230401396751405, "step": 57170 }, { "epoch": 0.5381176470588235, "grad_norm": 0.3662598167376469, "learning_rate": 2.727076614977867e-06, "loss": 0.016334390640258788, "step": 57175 }, { "epoch": 0.538164705882353, "grad_norm": 0.5060903263090576, "learning_rate": 2.726957378184872e-06, "loss": 0.017189884185791017, "step": 57180 }, { "epoch": 0.5382117647058824, "grad_norm": 0.4732004884770138, "learning_rate": 2.7268381570307872e-06, "loss": 0.01764177829027176, "step": 57185 }, { "epoch": 0.5382588235294118, "grad_norm": 0.48694527601024784, "learning_rate": 2.726718951512196e-06, "loss": 0.01405602991580963, "step": 57190 }, { "epoch": 0.5383058823529412, "grad_norm": 0.4645834755368689, "learning_rate": 2.72659976162568e-06, "loss": 0.018545253574848174, "step": 57195 }, { "epoch": 0.5383529411764706, "grad_norm": 0.5349188364630074, "learning_rate": 2.726480587367823e-06, "loss": 0.02179001271724701, "step": 57200 }, { "epoch": 0.5384, "grad_norm": 0.6452111071305144, "learning_rate": 2.7263614287352107e-06, "loss": 0.013310903310775756, "step": 57205 }, { "epoch": 0.5384470588235294, "grad_norm": 0.3642280233317822, "learning_rate": 2.7262422857244285e-06, "loss": 0.01709693968296051, "step": 57210 }, { "epoch": 0.5384941176470588, "grad_norm": 0.538014997222923, "learning_rate": 2.7261231583320623e-06, "loss": 0.018562600016593933, "step": 57215 }, { "epoch": 0.5385411764705882, "grad_norm": 0.5957584508119953, "learning_rate": 2.7260040465547014e-06, "loss": 0.014839491248130799, "step": 57220 }, { "epoch": 0.5385882352941177, "grad_norm": 0.6843564774966544, "learning_rate": 2.725884950388935e-06, "loss": 0.02071480005979538, "step": 57225 }, { "epoch": 0.5386352941176471, "grad_norm": 0.49269171494041014, "learning_rate": 2.725765869831352e-06, "loss": 0.01358758807182312, "step": 57230 }, { "epoch": 0.5386823529411765, "grad_norm": 0.5905579988197971, "learning_rate": 2.725646804878544e-06, "loss": 0.02007361948490143, "step": 57235 }, { "epoch": 0.5387294117647059, "grad_norm": 0.6341728576730368, "learning_rate": 2.7255277555271027e-06, "loss": 0.021479830145835876, "step": 57240 }, { "epoch": 0.5387764705882353, "grad_norm": 0.43634774406946586, "learning_rate": 2.725408721773622e-06, "loss": 0.015622156858444213, "step": 57245 }, { "epoch": 0.5388235294117647, "grad_norm": 0.6261656442585457, "learning_rate": 2.725289703614695e-06, "loss": 0.014529436826705933, "step": 57250 }, { "epoch": 0.5388705882352941, "grad_norm": 0.47586272840859356, "learning_rate": 2.7251707010469185e-06, "loss": 0.01787661463022232, "step": 57255 }, { "epoch": 0.5389176470588235, "grad_norm": 0.5377596352924203, "learning_rate": 2.7250517140668876e-06, "loss": 0.015026885271072387, "step": 57260 }, { "epoch": 0.5389647058823529, "grad_norm": 0.6193737308364226, "learning_rate": 2.7249327426712e-06, "loss": 0.015572759509086608, "step": 57265 }, { "epoch": 0.5390117647058823, "grad_norm": 0.6029676057818896, "learning_rate": 2.724813786856454e-06, "loss": 0.01836467981338501, "step": 57270 }, { "epoch": 0.5390588235294118, "grad_norm": 0.371616865674361, "learning_rate": 2.724694846619249e-06, "loss": 0.01650952100753784, "step": 57275 }, { "epoch": 0.5391058823529412, "grad_norm": 0.6337429240684278, "learning_rate": 2.7245759219561854e-06, "loss": 0.022099702060222624, "step": 57280 }, { "epoch": 0.5391529411764706, "grad_norm": 0.5883430836777634, "learning_rate": 2.7244570128638646e-06, "loss": 0.018120551109313966, "step": 57285 }, { "epoch": 0.5392, "grad_norm": 0.48312566829482956, "learning_rate": 2.724338119338889e-06, "loss": 0.014714618027210236, "step": 57290 }, { "epoch": 0.5392470588235294, "grad_norm": 0.48321993559021986, "learning_rate": 2.7242192413778634e-06, "loss": 0.017438723146915434, "step": 57295 }, { "epoch": 0.5392941176470588, "grad_norm": 0.3390901379493995, "learning_rate": 2.7241003789773902e-06, "loss": 0.014621368050575257, "step": 57300 }, { "epoch": 0.5393411764705882, "grad_norm": 0.5994115680380829, "learning_rate": 2.723981532134077e-06, "loss": 0.01669062376022339, "step": 57305 }, { "epoch": 0.5393882352941176, "grad_norm": 0.5111460457243416, "learning_rate": 2.7238627008445288e-06, "loss": 0.01509677916765213, "step": 57310 }, { "epoch": 0.539435294117647, "grad_norm": 0.7147455145466863, "learning_rate": 2.723743885105355e-06, "loss": 0.02040170431137085, "step": 57315 }, { "epoch": 0.5394823529411765, "grad_norm": 0.4266383784348404, "learning_rate": 2.723625084913163e-06, "loss": 0.018597251176834105, "step": 57320 }, { "epoch": 0.5395294117647059, "grad_norm": 0.37831155020660673, "learning_rate": 2.7235063002645634e-06, "loss": 0.016150118410587312, "step": 57325 }, { "epoch": 0.5395764705882353, "grad_norm": 0.4454647896081221, "learning_rate": 2.7233875311561664e-06, "loss": 0.01707882583141327, "step": 57330 }, { "epoch": 0.5396235294117647, "grad_norm": 0.4052702958842952, "learning_rate": 2.723268777584584e-06, "loss": 0.017686201632022856, "step": 57335 }, { "epoch": 0.5396705882352941, "grad_norm": 0.6577602916187476, "learning_rate": 2.72315003954643e-06, "loss": 0.01907694935798645, "step": 57340 }, { "epoch": 0.5397176470588235, "grad_norm": 0.5453348705038228, "learning_rate": 2.7230313170383164e-06, "loss": 0.016305622458457947, "step": 57345 }, { "epoch": 0.5397647058823529, "grad_norm": 0.8687288626533821, "learning_rate": 2.7229126100568605e-06, "loss": 0.02299649715423584, "step": 57350 }, { "epoch": 0.5398117647058823, "grad_norm": 0.4314717939300812, "learning_rate": 2.722793918598676e-06, "loss": 0.015898600220680237, "step": 57355 }, { "epoch": 0.5398588235294117, "grad_norm": 0.48589769623576984, "learning_rate": 2.7226752426603807e-06, "loss": 0.01679406464099884, "step": 57360 }, { "epoch": 0.5399058823529411, "grad_norm": 0.5371312645308544, "learning_rate": 2.7225565822385933e-06, "loss": 0.017596770823001862, "step": 57365 }, { "epoch": 0.5399529411764706, "grad_norm": 0.5607324223460192, "learning_rate": 2.722437937329933e-06, "loss": 0.01999499797821045, "step": 57370 }, { "epoch": 0.54, "grad_norm": 0.3838936688056678, "learning_rate": 2.7223193079310184e-06, "loss": 0.015894603729248048, "step": 57375 }, { "epoch": 0.5400470588235294, "grad_norm": 0.47792377415108656, "learning_rate": 2.722200694038472e-06, "loss": 0.014087578654289246, "step": 57380 }, { "epoch": 0.5400941176470588, "grad_norm": 0.5452885974176835, "learning_rate": 2.722082095648916e-06, "loss": 0.01589720547199249, "step": 57385 }, { "epoch": 0.5401411764705882, "grad_norm": 0.7231019908798026, "learning_rate": 2.721963512758972e-06, "loss": 0.01397811770439148, "step": 57390 }, { "epoch": 0.5401882352941176, "grad_norm": 0.5198772456805528, "learning_rate": 2.7218449453652662e-06, "loss": 0.01756083369255066, "step": 57395 }, { "epoch": 0.540235294117647, "grad_norm": 0.7328518924405736, "learning_rate": 2.7217263934644227e-06, "loss": 0.01861548572778702, "step": 57400 }, { "epoch": 0.5402823529411764, "grad_norm": 0.4579212598347513, "learning_rate": 2.721607857053068e-06, "loss": 0.01636570394039154, "step": 57405 }, { "epoch": 0.5403294117647058, "grad_norm": 0.2722652195708931, "learning_rate": 2.7214893361278295e-06, "loss": 0.016352900862693788, "step": 57410 }, { "epoch": 0.5403764705882353, "grad_norm": 0.5047351067689909, "learning_rate": 2.7213708306853354e-06, "loss": 0.019990548491477966, "step": 57415 }, { "epoch": 0.5404235294117647, "grad_norm": 0.6383898665462479, "learning_rate": 2.721252340722215e-06, "loss": 0.019469231367111206, "step": 57420 }, { "epoch": 0.5404705882352941, "grad_norm": 0.5429763742999553, "learning_rate": 2.7211338662350994e-06, "loss": 0.017663344740867615, "step": 57425 }, { "epoch": 0.5405176470588235, "grad_norm": 1.0776109427945506, "learning_rate": 2.721015407220619e-06, "loss": 0.01736789643764496, "step": 57430 }, { "epoch": 0.540564705882353, "grad_norm": 0.5456532237981508, "learning_rate": 2.7208969636754077e-06, "loss": 0.0168790340423584, "step": 57435 }, { "epoch": 0.5406117647058823, "grad_norm": 0.6189278081712477, "learning_rate": 2.720778535596097e-06, "loss": 0.02007904350757599, "step": 57440 }, { "epoch": 0.5406588235294117, "grad_norm": 0.364367068316826, "learning_rate": 2.720660122979323e-06, "loss": 0.020259061455726625, "step": 57445 }, { "epoch": 0.5407058823529411, "grad_norm": 0.5283690292840367, "learning_rate": 2.72054172582172e-06, "loss": 0.01648219972848892, "step": 57450 }, { "epoch": 0.5407529411764705, "grad_norm": 0.5546530686338148, "learning_rate": 2.7204233441199257e-06, "loss": 0.016911199688911437, "step": 57455 }, { "epoch": 0.5408, "grad_norm": 0.45786120855778856, "learning_rate": 2.720304977870577e-06, "loss": 0.01891021430492401, "step": 57460 }, { "epoch": 0.5408470588235295, "grad_norm": 0.35625963487802353, "learning_rate": 2.7201866270703127e-06, "loss": 0.0200804203748703, "step": 57465 }, { "epoch": 0.5408941176470589, "grad_norm": 0.5671283041067414, "learning_rate": 2.7200682917157715e-06, "loss": 0.020953670144081116, "step": 57470 }, { "epoch": 0.5409411764705883, "grad_norm": 0.6800551642026422, "learning_rate": 2.719949971803596e-06, "loss": 0.023505660891532897, "step": 57475 }, { "epoch": 0.5409882352941177, "grad_norm": 0.4567812545352843, "learning_rate": 2.7198316673304263e-06, "loss": 0.025207948684692384, "step": 57480 }, { "epoch": 0.5410352941176471, "grad_norm": 0.7003300567672336, "learning_rate": 2.7197133782929057e-06, "loss": 0.020199567079544067, "step": 57485 }, { "epoch": 0.5410823529411765, "grad_norm": 0.5296352352547387, "learning_rate": 2.719595104687678e-06, "loss": 0.02092807590961456, "step": 57490 }, { "epoch": 0.5411294117647059, "grad_norm": 0.48728876906268703, "learning_rate": 2.719476846511387e-06, "loss": 0.014746937155723571, "step": 57495 }, { "epoch": 0.5411764705882353, "grad_norm": 0.38749171179911884, "learning_rate": 2.7193586037606797e-06, "loss": 0.014388218522071838, "step": 57500 }, { "epoch": 0.5412235294117647, "grad_norm": 0.4936059323040532, "learning_rate": 2.7192403764322016e-06, "loss": 0.01588689684867859, "step": 57505 }, { "epoch": 0.5412705882352942, "grad_norm": 0.3606136782515974, "learning_rate": 2.7191221645226023e-06, "loss": 0.010295384377241135, "step": 57510 }, { "epoch": 0.5413176470588236, "grad_norm": 0.49895052313832117, "learning_rate": 2.7190039680285286e-06, "loss": 0.019984975457191467, "step": 57515 }, { "epoch": 0.541364705882353, "grad_norm": 0.5175194245655811, "learning_rate": 2.7188857869466316e-06, "loss": 0.01637338399887085, "step": 57520 }, { "epoch": 0.5414117647058824, "grad_norm": 0.4781795610617384, "learning_rate": 2.718767621273562e-06, "loss": 0.017782585322856904, "step": 57525 }, { "epoch": 0.5414588235294118, "grad_norm": 0.45127571634070357, "learning_rate": 2.7186494710059713e-06, "loss": 0.016765278577804566, "step": 57530 }, { "epoch": 0.5415058823529412, "grad_norm": 0.40955603398876317, "learning_rate": 2.7185313361405127e-06, "loss": 0.014923605322837829, "step": 57535 }, { "epoch": 0.5415529411764706, "grad_norm": 0.38781003547173365, "learning_rate": 2.71841321667384e-06, "loss": 0.01600094735622406, "step": 57540 }, { "epoch": 0.5416, "grad_norm": 0.5964767404140645, "learning_rate": 2.7182951126026075e-06, "loss": 0.01586398184299469, "step": 57545 }, { "epoch": 0.5416470588235294, "grad_norm": 3.447330071994289, "learning_rate": 2.7181770239234724e-06, "loss": 0.019530606269836426, "step": 57550 }, { "epoch": 0.5416941176470588, "grad_norm": 0.5479615248659514, "learning_rate": 2.7180589506330905e-06, "loss": 0.014252835512161255, "step": 57555 }, { "epoch": 0.5417411764705883, "grad_norm": 0.5697650516002495, "learning_rate": 2.717940892728121e-06, "loss": 0.01667717397212982, "step": 57560 }, { "epoch": 0.5417882352941177, "grad_norm": 0.7664979984532526, "learning_rate": 2.717822850205221e-06, "loss": 0.021039727330207824, "step": 57565 }, { "epoch": 0.5418352941176471, "grad_norm": 0.4656588769197778, "learning_rate": 2.717704823061052e-06, "loss": 0.02104523628950119, "step": 57570 }, { "epoch": 0.5418823529411765, "grad_norm": 1.0484574541521652, "learning_rate": 2.7175868112922752e-06, "loss": 0.02676595449447632, "step": 57575 }, { "epoch": 0.5419294117647059, "grad_norm": 0.7268892553360323, "learning_rate": 2.717468814895552e-06, "loss": 0.017429548501968383, "step": 57580 }, { "epoch": 0.5419764705882353, "grad_norm": 0.45558817824253695, "learning_rate": 2.7173508338675454e-06, "loss": 0.016194081306457518, "step": 57585 }, { "epoch": 0.5420235294117647, "grad_norm": 0.4516032422923402, "learning_rate": 2.7172328682049204e-06, "loss": 0.01924319565296173, "step": 57590 }, { "epoch": 0.5420705882352941, "grad_norm": 0.5838015374443316, "learning_rate": 2.7171149179043407e-06, "loss": 0.020108242332935334, "step": 57595 }, { "epoch": 0.5421176470588235, "grad_norm": 0.4896083410375514, "learning_rate": 2.716996982962473e-06, "loss": 0.014424130320549011, "step": 57600 }, { "epoch": 0.542164705882353, "grad_norm": 0.37340839117063623, "learning_rate": 2.716879063375984e-06, "loss": 0.017205223441123962, "step": 57605 }, { "epoch": 0.5422117647058824, "grad_norm": 0.464629600754584, "learning_rate": 2.716761159141543e-06, "loss": 0.014686784148216248, "step": 57610 }, { "epoch": 0.5422588235294118, "grad_norm": 0.7132668098886452, "learning_rate": 2.7166432702558175e-06, "loss": 0.01742546260356903, "step": 57615 }, { "epoch": 0.5423058823529412, "grad_norm": 0.7334647724345135, "learning_rate": 2.7165253967154794e-06, "loss": 0.015016816556453705, "step": 57620 }, { "epoch": 0.5423529411764706, "grad_norm": 0.4208772106074866, "learning_rate": 2.7164075385171983e-06, "loss": 0.017607176303863527, "step": 57625 }, { "epoch": 0.5424, "grad_norm": 0.501224295136415, "learning_rate": 2.716289695657648e-06, "loss": 0.014193560183048248, "step": 57630 }, { "epoch": 0.5424470588235294, "grad_norm": 0.4849292457387782, "learning_rate": 2.7161718681335e-06, "loss": 0.018399052321910858, "step": 57635 }, { "epoch": 0.5424941176470588, "grad_norm": 0.6498097945568982, "learning_rate": 2.716054055941429e-06, "loss": 0.024350519478321075, "step": 57640 }, { "epoch": 0.5425411764705882, "grad_norm": 0.4247956024626789, "learning_rate": 2.715936259078111e-06, "loss": 0.01488502323627472, "step": 57645 }, { "epoch": 0.5425882352941176, "grad_norm": 0.4641047068186854, "learning_rate": 2.7158184775402206e-06, "loss": 0.01613411009311676, "step": 57650 }, { "epoch": 0.5426352941176471, "grad_norm": 0.4520687046858626, "learning_rate": 2.7157007113244367e-06, "loss": 0.02008388042449951, "step": 57655 }, { "epoch": 0.5426823529411765, "grad_norm": 0.8369981867712597, "learning_rate": 2.715582960427437e-06, "loss": 0.024498747289180757, "step": 57660 }, { "epoch": 0.5427294117647059, "grad_norm": 0.6988075601378002, "learning_rate": 2.7154652248458998e-06, "loss": 0.02056371867656708, "step": 57665 }, { "epoch": 0.5427764705882353, "grad_norm": 0.4181508029151413, "learning_rate": 2.7153475045765067e-06, "loss": 0.015860798954963683, "step": 57670 }, { "epoch": 0.5428235294117647, "grad_norm": 0.5389003958279344, "learning_rate": 2.715229799615938e-06, "loss": 0.01735852360725403, "step": 57675 }, { "epoch": 0.5428705882352941, "grad_norm": 0.6140109343361883, "learning_rate": 2.715112109960876e-06, "loss": 0.022500115633010864, "step": 57680 }, { "epoch": 0.5429176470588235, "grad_norm": 0.5529077881873612, "learning_rate": 2.714994435608005e-06, "loss": 0.01742641031742096, "step": 57685 }, { "epoch": 0.5429647058823529, "grad_norm": 0.5008578041891931, "learning_rate": 2.7148767765540083e-06, "loss": 0.019289781153202058, "step": 57690 }, { "epoch": 0.5430117647058823, "grad_norm": 0.5249689015326028, "learning_rate": 2.714759132795571e-06, "loss": 0.01965768337249756, "step": 57695 }, { "epoch": 0.5430588235294118, "grad_norm": 0.32115695557241414, "learning_rate": 2.7146415043293802e-06, "loss": 0.01399199664592743, "step": 57700 }, { "epoch": 0.5431058823529412, "grad_norm": 0.4141832522960811, "learning_rate": 2.714523891152122e-06, "loss": 0.01928628087043762, "step": 57705 }, { "epoch": 0.5431529411764706, "grad_norm": 0.284134047664789, "learning_rate": 2.7144062932604864e-06, "loss": 0.018293841183185576, "step": 57710 }, { "epoch": 0.5432, "grad_norm": 0.2155983306339799, "learning_rate": 2.7142887106511607e-06, "loss": 0.017533090710639954, "step": 57715 }, { "epoch": 0.5432470588235294, "grad_norm": 0.6043512961704658, "learning_rate": 2.714171143320837e-06, "loss": 0.016512525081634522, "step": 57720 }, { "epoch": 0.5432941176470588, "grad_norm": 0.3830968975616279, "learning_rate": 2.7140535912662054e-06, "loss": 0.013712197542190552, "step": 57725 }, { "epoch": 0.5433411764705882, "grad_norm": 0.6420666050737751, "learning_rate": 2.7139360544839587e-06, "loss": 0.015449231863021851, "step": 57730 }, { "epoch": 0.5433882352941176, "grad_norm": 0.9476109203287809, "learning_rate": 2.71381853297079e-06, "loss": 0.017226448655128478, "step": 57735 }, { "epoch": 0.543435294117647, "grad_norm": 0.5326175217997978, "learning_rate": 2.7137010267233933e-06, "loss": 0.015898634493350983, "step": 57740 }, { "epoch": 0.5434823529411764, "grad_norm": 0.6696822518150827, "learning_rate": 2.7135835357384648e-06, "loss": 0.01773635149002075, "step": 57745 }, { "epoch": 0.5435294117647059, "grad_norm": 0.4507262213649242, "learning_rate": 2.7134660600127005e-06, "loss": 0.016209104657173158, "step": 57750 }, { "epoch": 0.5435764705882353, "grad_norm": 0.9034278208848507, "learning_rate": 2.7133485995427966e-06, "loss": 0.024951353669166565, "step": 57755 }, { "epoch": 0.5436235294117647, "grad_norm": 0.9230857869922837, "learning_rate": 2.713231154325453e-06, "loss": 0.01611440032720566, "step": 57760 }, { "epoch": 0.5436705882352941, "grad_norm": 0.7641057999575408, "learning_rate": 2.7131137243573684e-06, "loss": 0.018950049579143525, "step": 57765 }, { "epoch": 0.5437176470588235, "grad_norm": 0.49858086208155866, "learning_rate": 2.712996309635243e-06, "loss": 0.017156058549880983, "step": 57770 }, { "epoch": 0.5437647058823529, "grad_norm": 0.5079383703248752, "learning_rate": 2.7128789101557777e-06, "loss": 0.01824670732021332, "step": 57775 }, { "epoch": 0.5438117647058823, "grad_norm": 0.7161325142889335, "learning_rate": 2.7127615259156754e-06, "loss": 0.016591763496398924, "step": 57780 }, { "epoch": 0.5438588235294117, "grad_norm": 0.8557834553688066, "learning_rate": 2.7126441569116397e-06, "loss": 0.0181264728307724, "step": 57785 }, { "epoch": 0.5439058823529411, "grad_norm": 0.5315073080539017, "learning_rate": 2.712526803140374e-06, "loss": 0.016912004351615904, "step": 57790 }, { "epoch": 0.5439529411764706, "grad_norm": 0.5618575616372475, "learning_rate": 2.7124094645985843e-06, "loss": 0.01918458640575409, "step": 57795 }, { "epoch": 0.544, "grad_norm": 0.7081669572180422, "learning_rate": 2.712292141282977e-06, "loss": 0.014617878198623657, "step": 57800 }, { "epoch": 0.5440470588235294, "grad_norm": 0.5755604800430971, "learning_rate": 2.712174833190258e-06, "loss": 0.020965766906738282, "step": 57805 }, { "epoch": 0.5440941176470588, "grad_norm": 0.4686088324433, "learning_rate": 2.712057540317137e-06, "loss": 0.02034490704536438, "step": 57810 }, { "epoch": 0.5441411764705882, "grad_norm": 0.7474651200554466, "learning_rate": 2.7119402626603236e-06, "loss": 0.023517316579818724, "step": 57815 }, { "epoch": 0.5441882352941176, "grad_norm": 0.37127491609749763, "learning_rate": 2.711823000216527e-06, "loss": 0.01789594888687134, "step": 57820 }, { "epoch": 0.544235294117647, "grad_norm": 0.6496666337742991, "learning_rate": 2.7117057529824586e-06, "loss": 0.020497797429561614, "step": 57825 }, { "epoch": 0.5442823529411764, "grad_norm": 0.6383070695411672, "learning_rate": 2.7115885209548313e-06, "loss": 0.015465062856674195, "step": 57830 }, { "epoch": 0.5443294117647058, "grad_norm": 0.6052154321605014, "learning_rate": 2.7114713041303582e-06, "loss": 0.01499326229095459, "step": 57835 }, { "epoch": 0.5443764705882352, "grad_norm": 0.5564041488580526, "learning_rate": 2.7113541025057534e-06, "loss": 0.01762325465679169, "step": 57840 }, { "epoch": 0.5444235294117648, "grad_norm": 0.4345104632858947, "learning_rate": 2.711236916077732e-06, "loss": 0.018684886395931244, "step": 57845 }, { "epoch": 0.5444705882352942, "grad_norm": 0.5784529090857281, "learning_rate": 2.7111197448430106e-06, "loss": 0.01576288938522339, "step": 57850 }, { "epoch": 0.5445176470588236, "grad_norm": 0.4667671272895707, "learning_rate": 2.711002588798306e-06, "loss": 0.01658608466386795, "step": 57855 }, { "epoch": 0.544564705882353, "grad_norm": 0.39919188423287133, "learning_rate": 2.7108854479403374e-06, "loss": 0.018859505653381348, "step": 57860 }, { "epoch": 0.5446117647058824, "grad_norm": 0.3227202398450506, "learning_rate": 2.7107683222658227e-06, "loss": 0.017809849977493287, "step": 57865 }, { "epoch": 0.5446588235294118, "grad_norm": 0.6152095908436896, "learning_rate": 2.710651211771483e-06, "loss": 0.017537827789783477, "step": 57870 }, { "epoch": 0.5447058823529412, "grad_norm": 0.5038312395296528, "learning_rate": 2.7105341164540395e-06, "loss": 0.015668770670890807, "step": 57875 }, { "epoch": 0.5447529411764706, "grad_norm": 0.5083801783733701, "learning_rate": 2.710417036310214e-06, "loss": 0.020032407343387605, "step": 57880 }, { "epoch": 0.5448, "grad_norm": 0.7391438986767864, "learning_rate": 2.7102999713367296e-06, "loss": 0.01923419237136841, "step": 57885 }, { "epoch": 0.5448470588235295, "grad_norm": 0.38194399732041684, "learning_rate": 2.710182921530311e-06, "loss": 0.0168008953332901, "step": 57890 }, { "epoch": 0.5448941176470589, "grad_norm": 0.5857385082105178, "learning_rate": 2.710065886887683e-06, "loss": 0.017469581961631776, "step": 57895 }, { "epoch": 0.5449411764705883, "grad_norm": 0.3798844795130337, "learning_rate": 2.709948867405573e-06, "loss": 0.01996859908103943, "step": 57900 }, { "epoch": 0.5449882352941177, "grad_norm": 0.5518369546880841, "learning_rate": 2.7098318630807065e-06, "loss": 0.017353081703186037, "step": 57905 }, { "epoch": 0.5450352941176471, "grad_norm": 0.4109983787260505, "learning_rate": 2.7097148739098118e-06, "loss": 0.015090709924697876, "step": 57910 }, { "epoch": 0.5450823529411765, "grad_norm": 0.3939486489551874, "learning_rate": 2.7095978998896188e-06, "loss": 0.019390985369682312, "step": 57915 }, { "epoch": 0.5451294117647059, "grad_norm": 0.46929807741069807, "learning_rate": 2.709480941016857e-06, "loss": 0.01405223309993744, "step": 57920 }, { "epoch": 0.5451764705882353, "grad_norm": 0.6255601276950591, "learning_rate": 2.7093639972882584e-06, "loss": 0.02104613184928894, "step": 57925 }, { "epoch": 0.5452235294117647, "grad_norm": 0.6468072653458083, "learning_rate": 2.7092470687005546e-06, "loss": 0.019563883543014526, "step": 57930 }, { "epoch": 0.5452705882352941, "grad_norm": 0.6757791059211979, "learning_rate": 2.7091301552504785e-06, "loss": 0.022108617424964904, "step": 57935 }, { "epoch": 0.5453176470588236, "grad_norm": 0.5957145811770559, "learning_rate": 2.709013256934764e-06, "loss": 0.020437654852867127, "step": 57940 }, { "epoch": 0.545364705882353, "grad_norm": 0.48584854145317163, "learning_rate": 2.7088963737501467e-06, "loss": 0.014354203641414643, "step": 57945 }, { "epoch": 0.5454117647058824, "grad_norm": 1.1046092762633555, "learning_rate": 2.708779505693362e-06, "loss": 0.016140642762184142, "step": 57950 }, { "epoch": 0.5454588235294118, "grad_norm": 0.7457101657946632, "learning_rate": 2.708662652761148e-06, "loss": 0.015992306172847748, "step": 57955 }, { "epoch": 0.5455058823529412, "grad_norm": 0.5691079422776042, "learning_rate": 2.7085458149502413e-06, "loss": 0.0163420170545578, "step": 57960 }, { "epoch": 0.5455529411764706, "grad_norm": 0.961595963709587, "learning_rate": 2.7084289922573827e-06, "loss": 0.021408206224441527, "step": 57965 }, { "epoch": 0.5456, "grad_norm": 0.7313127481113467, "learning_rate": 2.70831218467931e-06, "loss": 0.01708735227584839, "step": 57970 }, { "epoch": 0.5456470588235294, "grad_norm": 0.3992480090277253, "learning_rate": 2.708195392212766e-06, "loss": 0.014416651427745819, "step": 57975 }, { "epoch": 0.5456941176470588, "grad_norm": 0.454293831673635, "learning_rate": 2.708078614854492e-06, "loss": 0.01607825458049774, "step": 57980 }, { "epoch": 0.5457411764705883, "grad_norm": 0.6166260381906048, "learning_rate": 2.7079618526012306e-06, "loss": 0.015992966294288636, "step": 57985 }, { "epoch": 0.5457882352941177, "grad_norm": 0.4888730416549046, "learning_rate": 2.7078451054497264e-06, "loss": 0.014512935280799865, "step": 57990 }, { "epoch": 0.5458352941176471, "grad_norm": 0.8029077096836935, "learning_rate": 2.7077283733967234e-06, "loss": 0.015999795496463777, "step": 57995 }, { "epoch": 0.5458823529411765, "grad_norm": 0.6552487739516739, "learning_rate": 2.7076116564389685e-06, "loss": 0.01997392773628235, "step": 58000 }, { "epoch": 0.5459294117647059, "grad_norm": 0.5212146743591336, "learning_rate": 2.7074949545732084e-06, "loss": 0.013234928250312805, "step": 58005 }, { "epoch": 0.5459764705882353, "grad_norm": 0.7037648977638494, "learning_rate": 2.7073782677961895e-06, "loss": 0.017665117979049683, "step": 58010 }, { "epoch": 0.5460235294117647, "grad_norm": 0.6755338642801156, "learning_rate": 2.7072615961046627e-06, "loss": 0.016294586658477783, "step": 58015 }, { "epoch": 0.5460705882352941, "grad_norm": 0.3698747869730763, "learning_rate": 2.707144939495377e-06, "loss": 0.014708501100540162, "step": 58020 }, { "epoch": 0.5461176470588235, "grad_norm": 0.5717215576094704, "learning_rate": 2.7070282979650824e-06, "loss": 0.015639758110046385, "step": 58025 }, { "epoch": 0.546164705882353, "grad_norm": 0.5333566909293935, "learning_rate": 2.706911671510532e-06, "loss": 0.01598818600177765, "step": 58030 }, { "epoch": 0.5462117647058824, "grad_norm": 0.43039550818573774, "learning_rate": 2.7067950601284775e-06, "loss": 0.01776481419801712, "step": 58035 }, { "epoch": 0.5462588235294118, "grad_norm": 0.5559034614210221, "learning_rate": 2.7066784638156734e-06, "loss": 0.016539068520069124, "step": 58040 }, { "epoch": 0.5463058823529412, "grad_norm": 0.6596037466386333, "learning_rate": 2.7065618825688733e-06, "loss": 0.014111310243606567, "step": 58045 }, { "epoch": 0.5463529411764706, "grad_norm": 0.5016957047139528, "learning_rate": 2.7064453163848347e-06, "loss": 0.015366962552070618, "step": 58050 }, { "epoch": 0.5464, "grad_norm": 0.5467997662662839, "learning_rate": 2.706328765260312e-06, "loss": 0.012402472645044326, "step": 58055 }, { "epoch": 0.5464470588235294, "grad_norm": 0.662426683856191, "learning_rate": 2.706212229192065e-06, "loss": 0.013015541434288024, "step": 58060 }, { "epoch": 0.5464941176470588, "grad_norm": 0.5913044045112247, "learning_rate": 2.7060957081768506e-06, "loss": 0.017477957904338835, "step": 58065 }, { "epoch": 0.5465411764705882, "grad_norm": 0.5707759776057735, "learning_rate": 2.70597920221143e-06, "loss": 0.018491657078266145, "step": 58070 }, { "epoch": 0.5465882352941176, "grad_norm": 0.678829838963452, "learning_rate": 2.705862711292563e-06, "loss": 0.019004788994789124, "step": 58075 }, { "epoch": 0.5466352941176471, "grad_norm": 0.929148023410638, "learning_rate": 2.70574623541701e-06, "loss": 0.01730984151363373, "step": 58080 }, { "epoch": 0.5466823529411765, "grad_norm": 0.6141215349543416, "learning_rate": 2.7056297745815353e-06, "loss": 0.018171364068984987, "step": 58085 }, { "epoch": 0.5467294117647059, "grad_norm": 0.5681590569046454, "learning_rate": 2.705513328782902e-06, "loss": 0.01978359669446945, "step": 58090 }, { "epoch": 0.5467764705882353, "grad_norm": 0.5160722026248313, "learning_rate": 2.7053968980178736e-06, "loss": 0.01444479525089264, "step": 58095 }, { "epoch": 0.5468235294117647, "grad_norm": 0.4047472824820147, "learning_rate": 2.7052804822832164e-06, "loss": 0.013898982107639313, "step": 58100 }, { "epoch": 0.5468705882352941, "grad_norm": 0.4218846075809336, "learning_rate": 2.705164081575697e-06, "loss": 0.014959809184074403, "step": 58105 }, { "epoch": 0.5469176470588235, "grad_norm": 0.659315119844123, "learning_rate": 2.705047695892082e-06, "loss": 0.01894387900829315, "step": 58110 }, { "epoch": 0.5469647058823529, "grad_norm": 0.7576997069815047, "learning_rate": 2.7049313252291404e-06, "loss": 0.02100331485271454, "step": 58115 }, { "epoch": 0.5470117647058823, "grad_norm": 0.5494119833719051, "learning_rate": 2.704814969583641e-06, "loss": 0.0183677613735199, "step": 58120 }, { "epoch": 0.5470588235294118, "grad_norm": 0.6418745433730435, "learning_rate": 2.7046986289523554e-06, "loss": 0.01648457795381546, "step": 58125 }, { "epoch": 0.5471058823529412, "grad_norm": 0.4372037624184264, "learning_rate": 2.7045823033320528e-06, "loss": 0.015297913551330566, "step": 58130 }, { "epoch": 0.5471529411764706, "grad_norm": 0.5059018365585034, "learning_rate": 2.704465992719508e-06, "loss": 0.01899507939815521, "step": 58135 }, { "epoch": 0.5472, "grad_norm": 0.713449709021072, "learning_rate": 2.704349697111492e-06, "loss": 0.02121659815311432, "step": 58140 }, { "epoch": 0.5472470588235294, "grad_norm": 0.7317342653034626, "learning_rate": 2.7042334165047795e-06, "loss": 0.02290850579738617, "step": 58145 }, { "epoch": 0.5472941176470588, "grad_norm": 0.33705627773919566, "learning_rate": 2.7041171508961467e-06, "loss": 0.013856858015060425, "step": 58150 }, { "epoch": 0.5473411764705882, "grad_norm": 0.442713301140552, "learning_rate": 2.704000900282369e-06, "loss": 0.017372983694076537, "step": 58155 }, { "epoch": 0.5473882352941176, "grad_norm": 0.615475476952264, "learning_rate": 2.7038846646602236e-06, "loss": 0.016530799865722656, "step": 58160 }, { "epoch": 0.547435294117647, "grad_norm": 0.3988824722636736, "learning_rate": 2.703768444026488e-06, "loss": 0.018456459045410156, "step": 58165 }, { "epoch": 0.5474823529411764, "grad_norm": 0.38232248834276744, "learning_rate": 2.703652238377943e-06, "loss": 0.016604891419410704, "step": 58170 }, { "epoch": 0.5475294117647059, "grad_norm": 0.5190869991040127, "learning_rate": 2.703536047711367e-06, "loss": 0.019663286209106446, "step": 58175 }, { "epoch": 0.5475764705882353, "grad_norm": 0.6193809629022241, "learning_rate": 2.703419872023541e-06, "loss": 0.01605663597583771, "step": 58180 }, { "epoch": 0.5476235294117647, "grad_norm": 0.3648489474314296, "learning_rate": 2.7033037113112482e-06, "loss": 0.012888316810131074, "step": 58185 }, { "epoch": 0.5476705882352941, "grad_norm": 0.6208383844900592, "learning_rate": 2.7031875655712703e-06, "loss": 0.020401869714260102, "step": 58190 }, { "epoch": 0.5477176470588235, "grad_norm": 0.48753313607317195, "learning_rate": 2.703071434800392e-06, "loss": 0.018385046720504762, "step": 58195 }, { "epoch": 0.5477647058823529, "grad_norm": 0.5027312449445323, "learning_rate": 2.702955318995397e-06, "loss": 0.015817669034004212, "step": 58200 }, { "epoch": 0.5478117647058823, "grad_norm": 0.6320272209141061, "learning_rate": 2.7028392181530723e-06, "loss": 0.01820799559354782, "step": 58205 }, { "epoch": 0.5478588235294117, "grad_norm": 0.5857960602383219, "learning_rate": 2.702723132270205e-06, "loss": 0.01570991277694702, "step": 58210 }, { "epoch": 0.5479058823529411, "grad_norm": 0.7028251105354982, "learning_rate": 2.7026070613435814e-06, "loss": 0.016635915637016295, "step": 58215 }, { "epoch": 0.5479529411764706, "grad_norm": 0.8016615420119988, "learning_rate": 2.702491005369991e-06, "loss": 0.014447981119155883, "step": 58220 }, { "epoch": 0.548, "grad_norm": 0.6267629775853404, "learning_rate": 2.7023749643462243e-06, "loss": 0.018368685245513917, "step": 58225 }, { "epoch": 0.5480470588235294, "grad_norm": 0.646410844256113, "learning_rate": 2.7022589382690705e-06, "loss": 0.019843044877052306, "step": 58230 }, { "epoch": 0.5480941176470588, "grad_norm": 0.4206831670037173, "learning_rate": 2.7021429271353223e-06, "loss": 0.017647755146026612, "step": 58235 }, { "epoch": 0.5481411764705882, "grad_norm": 0.600320487863607, "learning_rate": 2.7020269309417712e-06, "loss": 0.01735737770795822, "step": 58240 }, { "epoch": 0.5481882352941176, "grad_norm": 0.5291240006882766, "learning_rate": 2.701910949685212e-06, "loss": 0.01845899671316147, "step": 58245 }, { "epoch": 0.548235294117647, "grad_norm": 0.34833417125277955, "learning_rate": 2.7017949833624384e-06, "loss": 0.01843397170305252, "step": 58250 }, { "epoch": 0.5482823529411764, "grad_norm": 0.56489718688609, "learning_rate": 2.7016790319702468e-06, "loss": 0.017072805762290956, "step": 58255 }, { "epoch": 0.5483294117647058, "grad_norm": 0.8130525522248991, "learning_rate": 2.7015630955054324e-06, "loss": 0.01757957637310028, "step": 58260 }, { "epoch": 0.5483764705882352, "grad_norm": 0.48952997313270136, "learning_rate": 2.7014471739647934e-06, "loss": 0.024511174857616426, "step": 58265 }, { "epoch": 0.5484235294117648, "grad_norm": 0.40526389060241985, "learning_rate": 2.7013312673451277e-06, "loss": 0.014144963026046753, "step": 58270 }, { "epoch": 0.5484705882352942, "grad_norm": 0.4937645762733386, "learning_rate": 2.701215375643235e-06, "loss": 0.015394636988639831, "step": 58275 }, { "epoch": 0.5485176470588236, "grad_norm": 0.39388716306828386, "learning_rate": 2.7010994988559153e-06, "loss": 0.015397346019744873, "step": 58280 }, { "epoch": 0.548564705882353, "grad_norm": 0.6049594363998528, "learning_rate": 2.70098363697997e-06, "loss": 0.020595349371433258, "step": 58285 }, { "epoch": 0.5486117647058824, "grad_norm": 0.5972051327093035, "learning_rate": 2.7008677900122016e-06, "loss": 0.02079022079706192, "step": 58290 }, { "epoch": 0.5486588235294118, "grad_norm": 0.5126490956875134, "learning_rate": 2.700751957949413e-06, "loss": 0.01806877851486206, "step": 58295 }, { "epoch": 0.5487058823529412, "grad_norm": 0.4994885050073025, "learning_rate": 2.700636140788408e-06, "loss": 0.0229437991976738, "step": 58300 }, { "epoch": 0.5487529411764706, "grad_norm": 0.47660041842389134, "learning_rate": 2.7005203385259915e-06, "loss": 0.01768539845943451, "step": 58305 }, { "epoch": 0.5488, "grad_norm": 0.4880710966885175, "learning_rate": 2.7004045511589713e-06, "loss": 0.015939822793006896, "step": 58310 }, { "epoch": 0.5488470588235295, "grad_norm": 0.3735851508915793, "learning_rate": 2.7002887786841514e-06, "loss": 0.015627017617225646, "step": 58315 }, { "epoch": 0.5488941176470589, "grad_norm": 0.4227797249301344, "learning_rate": 2.700173021098343e-06, "loss": 0.0140347421169281, "step": 58320 }, { "epoch": 0.5489411764705883, "grad_norm": 0.4351673325283849, "learning_rate": 2.7000572783983524e-06, "loss": 0.01501278430223465, "step": 58325 }, { "epoch": 0.5489882352941177, "grad_norm": 0.6187783507988892, "learning_rate": 2.6999415505809913e-06, "loss": 0.02024504691362381, "step": 58330 }, { "epoch": 0.5490352941176471, "grad_norm": 0.5573071418002016, "learning_rate": 2.6998258376430695e-06, "loss": 0.019412735104560853, "step": 58335 }, { "epoch": 0.5490823529411765, "grad_norm": 0.4837229271047185, "learning_rate": 2.6997101395813994e-06, "loss": 0.013691799342632293, "step": 58340 }, { "epoch": 0.5491294117647059, "grad_norm": 0.6633976269006511, "learning_rate": 2.6995944563927935e-06, "loss": 0.017888829112052917, "step": 58345 }, { "epoch": 0.5491764705882353, "grad_norm": 0.730237721418422, "learning_rate": 2.699478788074065e-06, "loss": 0.015099403262138367, "step": 58350 }, { "epoch": 0.5492235294117647, "grad_norm": 0.47301066295365984, "learning_rate": 2.699363134622029e-06, "loss": 0.020670421421527863, "step": 58355 }, { "epoch": 0.5492705882352941, "grad_norm": 0.40683947359272205, "learning_rate": 2.6992474960335013e-06, "loss": 0.01680062711238861, "step": 58360 }, { "epoch": 0.5493176470588236, "grad_norm": 0.4355724828351166, "learning_rate": 2.6991318723052983e-06, "loss": 0.01639411449432373, "step": 58365 }, { "epoch": 0.549364705882353, "grad_norm": 0.5500059477609939, "learning_rate": 2.6990162634342375e-06, "loss": 0.020456013083457947, "step": 58370 }, { "epoch": 0.5494117647058824, "grad_norm": 0.6279465832709751, "learning_rate": 2.6989006694171378e-06, "loss": 0.019565775990486145, "step": 58375 }, { "epoch": 0.5494588235294118, "grad_norm": 0.504971107893772, "learning_rate": 2.6987850902508177e-06, "loss": 0.015666310489177705, "step": 58380 }, { "epoch": 0.5495058823529412, "grad_norm": 0.5106462609872643, "learning_rate": 2.698669525932098e-06, "loss": 0.01609531044960022, "step": 58385 }, { "epoch": 0.5495529411764706, "grad_norm": 0.5785185249174924, "learning_rate": 2.6985539764578008e-06, "loss": 0.01648617535829544, "step": 58390 }, { "epoch": 0.5496, "grad_norm": 0.5113220218046998, "learning_rate": 2.6984384418247474e-06, "loss": 0.0190652996301651, "step": 58395 }, { "epoch": 0.5496470588235294, "grad_norm": 0.558905058650892, "learning_rate": 2.698322922029761e-06, "loss": 0.013897398114204406, "step": 58400 }, { "epoch": 0.5496941176470588, "grad_norm": 0.45895388725338754, "learning_rate": 2.698207417069666e-06, "loss": 0.014603143930435181, "step": 58405 }, { "epoch": 0.5497411764705883, "grad_norm": 0.347522984195043, "learning_rate": 2.698091926941288e-06, "loss": 0.01563885509967804, "step": 58410 }, { "epoch": 0.5497882352941177, "grad_norm": 0.5396720110978952, "learning_rate": 2.697976451641453e-06, "loss": 0.0149262934923172, "step": 58415 }, { "epoch": 0.5498352941176471, "grad_norm": 0.7475438161887615, "learning_rate": 2.697860991166988e-06, "loss": 0.015987807512283327, "step": 58420 }, { "epoch": 0.5498823529411765, "grad_norm": 0.6592650154629552, "learning_rate": 2.6977455455147207e-06, "loss": 0.018461839854717256, "step": 58425 }, { "epoch": 0.5499294117647059, "grad_norm": 0.5315982344755322, "learning_rate": 2.69763011468148e-06, "loss": 0.016392895579338075, "step": 58430 }, { "epoch": 0.5499764705882353, "grad_norm": 0.3624011986636369, "learning_rate": 2.697514698664096e-06, "loss": 0.01748291850090027, "step": 58435 }, { "epoch": 0.5500235294117647, "grad_norm": 0.5620017051410353, "learning_rate": 2.6973992974593994e-06, "loss": 0.01774614453315735, "step": 58440 }, { "epoch": 0.5500705882352941, "grad_norm": 0.3968257927132113, "learning_rate": 2.697283911064223e-06, "loss": 0.014778301119804382, "step": 58445 }, { "epoch": 0.5501176470588235, "grad_norm": 0.4915591260966097, "learning_rate": 2.6971685394753978e-06, "loss": 0.016734832525253297, "step": 58450 }, { "epoch": 0.5501647058823529, "grad_norm": 0.4241120222902696, "learning_rate": 2.697053182689758e-06, "loss": 0.01718320548534393, "step": 58455 }, { "epoch": 0.5502117647058824, "grad_norm": 0.6488136431058491, "learning_rate": 2.6969378407041398e-06, "loss": 0.013824498653411866, "step": 58460 }, { "epoch": 0.5502588235294118, "grad_norm": 0.6338467016889031, "learning_rate": 2.6968225135153768e-06, "loss": 0.015638861060142516, "step": 58465 }, { "epoch": 0.5503058823529412, "grad_norm": 0.6384391911149544, "learning_rate": 2.6967072011203068e-06, "loss": 0.017289043962955476, "step": 58470 }, { "epoch": 0.5503529411764706, "grad_norm": 0.4638264406270029, "learning_rate": 2.696591903515766e-06, "loss": 0.01860431730747223, "step": 58475 }, { "epoch": 0.5504, "grad_norm": 0.45166581905523473, "learning_rate": 2.6964766206985945e-06, "loss": 0.014218424260616303, "step": 58480 }, { "epoch": 0.5504470588235294, "grad_norm": 0.4571446098918405, "learning_rate": 2.69636135266563e-06, "loss": 0.01746383011341095, "step": 58485 }, { "epoch": 0.5504941176470588, "grad_norm": 0.5694255825280783, "learning_rate": 2.6962460994137148e-06, "loss": 0.0182550311088562, "step": 58490 }, { "epoch": 0.5505411764705882, "grad_norm": 0.5613022111688148, "learning_rate": 2.696130860939688e-06, "loss": 0.022666826844215393, "step": 58495 }, { "epoch": 0.5505882352941176, "grad_norm": 0.5935476705368045, "learning_rate": 2.6960156372403934e-06, "loss": 0.018597951531410216, "step": 58500 }, { "epoch": 0.5506352941176471, "grad_norm": 0.4862550847555619, "learning_rate": 2.6959004283126735e-06, "loss": 0.014873659610748291, "step": 58505 }, { "epoch": 0.5506823529411765, "grad_norm": 0.3901301727707562, "learning_rate": 2.695785234153372e-06, "loss": 0.01940300762653351, "step": 58510 }, { "epoch": 0.5507294117647059, "grad_norm": 0.6327185080872, "learning_rate": 2.6956700547593344e-06, "loss": 0.014566163718700408, "step": 58515 }, { "epoch": 0.5507764705882353, "grad_norm": 0.4036066471714918, "learning_rate": 2.695554890127407e-06, "loss": 0.014803999662399292, "step": 58520 }, { "epoch": 0.5508235294117647, "grad_norm": 0.5294180016164721, "learning_rate": 2.6954397402544362e-06, "loss": 0.017545923590660095, "step": 58525 }, { "epoch": 0.5508705882352941, "grad_norm": 0.5726924012367645, "learning_rate": 2.6953246051372706e-06, "loss": 0.017600944638252257, "step": 58530 }, { "epoch": 0.5509176470588235, "grad_norm": 0.7291231185924267, "learning_rate": 2.6952094847727576e-06, "loss": 0.018264222145080566, "step": 58535 }, { "epoch": 0.5509647058823529, "grad_norm": 0.6137466887422878, "learning_rate": 2.6950943791577487e-06, "loss": 0.018941935896873475, "step": 58540 }, { "epoch": 0.5510117647058823, "grad_norm": 0.8670366006627372, "learning_rate": 2.6949792882890935e-06, "loss": 0.01673731058835983, "step": 58545 }, { "epoch": 0.5510588235294117, "grad_norm": 0.4839150212785534, "learning_rate": 2.694864212163644e-06, "loss": 0.02001097798347473, "step": 58550 }, { "epoch": 0.5511058823529412, "grad_norm": 0.5264708013977836, "learning_rate": 2.6947491507782526e-06, "loss": 0.02400903105735779, "step": 58555 }, { "epoch": 0.5511529411764706, "grad_norm": 0.6297324613469362, "learning_rate": 2.6946341041297723e-06, "loss": 0.018748313188552856, "step": 58560 }, { "epoch": 0.5512, "grad_norm": 0.437639859249244, "learning_rate": 2.6945190722150586e-06, "loss": 0.018037045001983644, "step": 58565 }, { "epoch": 0.5512470588235294, "grad_norm": 0.6207135752401115, "learning_rate": 2.694404055030967e-06, "loss": 0.019662919640541076, "step": 58570 }, { "epoch": 0.5512941176470588, "grad_norm": 0.4554651260571529, "learning_rate": 2.694289052574353e-06, "loss": 0.014345233142375947, "step": 58575 }, { "epoch": 0.5513411764705882, "grad_norm": 0.47921793977966887, "learning_rate": 2.694174064842074e-06, "loss": 0.017659586668014527, "step": 58580 }, { "epoch": 0.5513882352941176, "grad_norm": 0.6122223092241756, "learning_rate": 2.694059091830988e-06, "loss": 0.016180548071861266, "step": 58585 }, { "epoch": 0.551435294117647, "grad_norm": 0.684581182983284, "learning_rate": 2.6939441335379553e-06, "loss": 0.017556507885456086, "step": 58590 }, { "epoch": 0.5514823529411764, "grad_norm": 0.5097003236312325, "learning_rate": 2.693829189959835e-06, "loss": 0.01684485375881195, "step": 58595 }, { "epoch": 0.5515294117647059, "grad_norm": 0.517292723232773, "learning_rate": 2.6937142610934884e-06, "loss": 0.016329415142536163, "step": 58600 }, { "epoch": 0.5515764705882353, "grad_norm": 0.29740556368526705, "learning_rate": 2.6935993469357773e-06, "loss": 0.014192202687263488, "step": 58605 }, { "epoch": 0.5516235294117647, "grad_norm": 0.7733046197184532, "learning_rate": 2.6934844474835653e-06, "loss": 0.02208489179611206, "step": 58610 }, { "epoch": 0.5516705882352941, "grad_norm": 0.6205350114092756, "learning_rate": 2.6933695627337154e-06, "loss": 0.016569234430789948, "step": 58615 }, { "epoch": 0.5517176470588235, "grad_norm": 0.5008609512099017, "learning_rate": 2.6932546926830923e-06, "loss": 0.01774653196334839, "step": 58620 }, { "epoch": 0.5517647058823529, "grad_norm": 0.45587544448310374, "learning_rate": 2.693139837328562e-06, "loss": 0.019995129108428954, "step": 58625 }, { "epoch": 0.5518117647058823, "grad_norm": 0.6107002916808818, "learning_rate": 2.6930249966669918e-06, "loss": 0.01940786838531494, "step": 58630 }, { "epoch": 0.5518588235294117, "grad_norm": 0.4665470714820105, "learning_rate": 2.692910170695249e-06, "loss": 0.02300065606832504, "step": 58635 }, { "epoch": 0.5519058823529411, "grad_norm": 0.41552041680910384, "learning_rate": 2.6927953594102008e-06, "loss": 0.017780414223670958, "step": 58640 }, { "epoch": 0.5519529411764705, "grad_norm": 0.5168845123990957, "learning_rate": 2.6926805628087183e-06, "loss": 0.019722127914428712, "step": 58645 }, { "epoch": 0.552, "grad_norm": 0.4582180614408377, "learning_rate": 2.6925657808876717e-06, "loss": 0.016847936809062956, "step": 58650 }, { "epoch": 0.5520470588235294, "grad_norm": 0.249423073789949, "learning_rate": 2.6924510136439315e-06, "loss": 0.013451072573661804, "step": 58655 }, { "epoch": 0.5520941176470588, "grad_norm": 0.564022488805453, "learning_rate": 2.692336261074371e-06, "loss": 0.016916742920875548, "step": 58660 }, { "epoch": 0.5521411764705882, "grad_norm": 0.5704570775467643, "learning_rate": 2.692221523175862e-06, "loss": 0.016358092427253723, "step": 58665 }, { "epoch": 0.5521882352941176, "grad_norm": 0.28095030009429006, "learning_rate": 2.69210679994528e-06, "loss": 0.017133039236068726, "step": 58670 }, { "epoch": 0.552235294117647, "grad_norm": 0.4729409392921954, "learning_rate": 2.6919920913794987e-06, "loss": 0.01850852519273758, "step": 58675 }, { "epoch": 0.5522823529411764, "grad_norm": 0.5332819048307516, "learning_rate": 2.691877397475395e-06, "loss": 0.015067344903945923, "step": 58680 }, { "epoch": 0.5523294117647058, "grad_norm": 0.46389833928118274, "learning_rate": 2.6917627182298465e-06, "loss": 0.011504241824150085, "step": 58685 }, { "epoch": 0.5523764705882352, "grad_norm": 0.3345910037084917, "learning_rate": 2.69164805363973e-06, "loss": 0.015578356385231019, "step": 58690 }, { "epoch": 0.5524235294117648, "grad_norm": 0.4918200838566386, "learning_rate": 2.6915334037019236e-06, "loss": 0.019410496950149535, "step": 58695 }, { "epoch": 0.5524705882352942, "grad_norm": 0.5909938106462913, "learning_rate": 2.6914187684133085e-06, "loss": 0.016707789897918702, "step": 58700 }, { "epoch": 0.5525176470588236, "grad_norm": 2.152335501109919, "learning_rate": 2.691304147770765e-06, "loss": 0.020153725147247316, "step": 58705 }, { "epoch": 0.552564705882353, "grad_norm": 0.5338772404619425, "learning_rate": 2.691189541771174e-06, "loss": 0.018900777399539947, "step": 58710 }, { "epoch": 0.5526117647058824, "grad_norm": 0.42391645112056536, "learning_rate": 2.6910749504114187e-06, "loss": 0.014899697899818421, "step": 58715 }, { "epoch": 0.5526588235294118, "grad_norm": 0.6780324439418847, "learning_rate": 2.6909603736883823e-06, "loss": 0.020721520483493804, "step": 58720 }, { "epoch": 0.5527058823529412, "grad_norm": 0.5563725983944096, "learning_rate": 2.690845811598949e-06, "loss": 0.015163218975067139, "step": 58725 }, { "epoch": 0.5527529411764706, "grad_norm": 1.3791939169507628, "learning_rate": 2.690731264140004e-06, "loss": 0.015731793642044068, "step": 58730 }, { "epoch": 0.5528, "grad_norm": 0.5462407896501456, "learning_rate": 2.6906167313084343e-06, "loss": 0.0135793074965477, "step": 58735 }, { "epoch": 0.5528470588235294, "grad_norm": 0.8333033086203707, "learning_rate": 2.690502213101126e-06, "loss": 0.018729478120803833, "step": 58740 }, { "epoch": 0.5528941176470589, "grad_norm": 0.43301679437003704, "learning_rate": 2.6903877095149675e-06, "loss": 0.016647769510746, "step": 58745 }, { "epoch": 0.5529411764705883, "grad_norm": 1.5935014194455384, "learning_rate": 2.690273220546848e-06, "loss": 0.017465832829475402, "step": 58750 }, { "epoch": 0.5529882352941177, "grad_norm": 0.5765671348373936, "learning_rate": 2.6901587461936577e-06, "loss": 0.014964593946933747, "step": 58755 }, { "epoch": 0.5530352941176471, "grad_norm": 0.41549058972750147, "learning_rate": 2.690044286452287e-06, "loss": 0.013636365532875061, "step": 58760 }, { "epoch": 0.5530823529411765, "grad_norm": 0.6174036994749489, "learning_rate": 2.689929841319628e-06, "loss": 0.019350709021091463, "step": 58765 }, { "epoch": 0.5531294117647059, "grad_norm": 0.5930300019576316, "learning_rate": 2.689815410792573e-06, "loss": 0.01623956859111786, "step": 58770 }, { "epoch": 0.5531764705882353, "grad_norm": 0.557379407095611, "learning_rate": 2.689700994868015e-06, "loss": 0.015380178391933442, "step": 58775 }, { "epoch": 0.5532235294117647, "grad_norm": 0.48905691042463006, "learning_rate": 2.6895865935428507e-06, "loss": 0.015049973130226135, "step": 58780 }, { "epoch": 0.5532705882352941, "grad_norm": 0.5791252456984854, "learning_rate": 2.6894722068139733e-06, "loss": 0.016089698672294615, "step": 58785 }, { "epoch": 0.5533176470588236, "grad_norm": 0.6790750276575038, "learning_rate": 2.6893578346782804e-06, "loss": 0.015968680381774902, "step": 58790 }, { "epoch": 0.553364705882353, "grad_norm": 0.6261295398415858, "learning_rate": 2.689243477132669e-06, "loss": 0.018567875027656555, "step": 58795 }, { "epoch": 0.5534117647058824, "grad_norm": 0.7023827329585127, "learning_rate": 2.6891291341740384e-06, "loss": 0.016244684159755707, "step": 58800 }, { "epoch": 0.5534588235294118, "grad_norm": 0.49068164707790385, "learning_rate": 2.6890148057992855e-06, "loss": 0.017569583654403687, "step": 58805 }, { "epoch": 0.5535058823529412, "grad_norm": 1.1498729976808932, "learning_rate": 2.688900492005312e-06, "loss": 0.01660442352294922, "step": 58810 }, { "epoch": 0.5535529411764706, "grad_norm": 0.3872600096396726, "learning_rate": 2.688786192789019e-06, "loss": 0.018542966246604918, "step": 58815 }, { "epoch": 0.5536, "grad_norm": 0.43320730221917186, "learning_rate": 2.688671908147308e-06, "loss": 0.01399267017841339, "step": 58820 }, { "epoch": 0.5536470588235294, "grad_norm": 0.5232644478503127, "learning_rate": 2.6885576380770815e-06, "loss": 0.019811293482780455, "step": 58825 }, { "epoch": 0.5536941176470588, "grad_norm": 0.480781715586855, "learning_rate": 2.6884433825752438e-06, "loss": 0.01982171684503555, "step": 58830 }, { "epoch": 0.5537411764705882, "grad_norm": 0.40171091927998576, "learning_rate": 2.6883291416386996e-06, "loss": 0.0163064643740654, "step": 58835 }, { "epoch": 0.5537882352941177, "grad_norm": 0.4697700721562218, "learning_rate": 2.6882149152643543e-06, "loss": 0.021121712028980257, "step": 58840 }, { "epoch": 0.5538352941176471, "grad_norm": 0.35428649742720475, "learning_rate": 2.6881007034491145e-06, "loss": 0.012807567417621613, "step": 58845 }, { "epoch": 0.5538823529411765, "grad_norm": 0.6644838128852216, "learning_rate": 2.687986506189888e-06, "loss": 0.0192914143204689, "step": 58850 }, { "epoch": 0.5539294117647059, "grad_norm": 0.5963547274763681, "learning_rate": 2.6878723234835828e-06, "loss": 0.015735283493995667, "step": 58855 }, { "epoch": 0.5539764705882353, "grad_norm": 0.39950995303828374, "learning_rate": 2.6877581553271087e-06, "loss": 0.01660539209842682, "step": 58860 }, { "epoch": 0.5540235294117647, "grad_norm": 0.42511268901676963, "learning_rate": 2.687644001717375e-06, "loss": 0.012238596379756928, "step": 58865 }, { "epoch": 0.5540705882352941, "grad_norm": 0.3497666708219979, "learning_rate": 2.687529862651293e-06, "loss": 0.01572139859199524, "step": 58870 }, { "epoch": 0.5541176470588235, "grad_norm": 0.5335322377781133, "learning_rate": 2.687415738125776e-06, "loss": 0.01953994631767273, "step": 58875 }, { "epoch": 0.5541647058823529, "grad_norm": 1.2082532528475038, "learning_rate": 2.6873016281377352e-06, "loss": 0.01743361949920654, "step": 58880 }, { "epoch": 0.5542117647058824, "grad_norm": 0.478745859182833, "learning_rate": 2.6871875326840862e-06, "loss": 0.016886287927627565, "step": 58885 }, { "epoch": 0.5542588235294118, "grad_norm": 0.49990803161924335, "learning_rate": 2.6870734517617427e-06, "loss": 0.015431584417819976, "step": 58890 }, { "epoch": 0.5543058823529412, "grad_norm": 0.41128721237240096, "learning_rate": 2.68695938536762e-06, "loss": 0.019715364277362823, "step": 58895 }, { "epoch": 0.5543529411764706, "grad_norm": 0.7886326263126436, "learning_rate": 2.6868453334986367e-06, "loss": 0.027162396907806398, "step": 58900 }, { "epoch": 0.5544, "grad_norm": 0.4459700489188173, "learning_rate": 2.6867312961517083e-06, "loss": 0.015358181297779083, "step": 58905 }, { "epoch": 0.5544470588235294, "grad_norm": 0.6835187463040147, "learning_rate": 2.6866172733237543e-06, "loss": 0.015782031416893005, "step": 58910 }, { "epoch": 0.5544941176470588, "grad_norm": 0.4675802604334161, "learning_rate": 2.686503265011694e-06, "loss": 0.01434611678123474, "step": 58915 }, { "epoch": 0.5545411764705882, "grad_norm": 0.45709922116619134, "learning_rate": 2.6863892712124473e-06, "loss": 0.013569715619087219, "step": 58920 }, { "epoch": 0.5545882352941176, "grad_norm": 0.7305304978170138, "learning_rate": 2.6862752919229363e-06, "loss": 0.014712497591972351, "step": 58925 }, { "epoch": 0.554635294117647, "grad_norm": 0.4939359783071649, "learning_rate": 2.686161327140082e-06, "loss": 0.016708925366401672, "step": 58930 }, { "epoch": 0.5546823529411765, "grad_norm": 0.4586499157011502, "learning_rate": 2.6860473768608088e-06, "loss": 0.016760490834712982, "step": 58935 }, { "epoch": 0.5547294117647059, "grad_norm": 0.4359205512155616, "learning_rate": 2.6859334410820394e-06, "loss": 0.021377724409103394, "step": 58940 }, { "epoch": 0.5547764705882353, "grad_norm": 0.45070731982292794, "learning_rate": 2.6858195198006985e-06, "loss": 0.012178445607423783, "step": 58945 }, { "epoch": 0.5548235294117647, "grad_norm": 0.33877513305268375, "learning_rate": 2.6857056130137134e-06, "loss": 0.014907029271125794, "step": 58950 }, { "epoch": 0.5548705882352941, "grad_norm": 0.49832614882721066, "learning_rate": 2.6855917207180093e-06, "loss": 0.018584299087524413, "step": 58955 }, { "epoch": 0.5549176470588235, "grad_norm": 0.5610537985311866, "learning_rate": 2.685477842910515e-06, "loss": 0.019368845224380492, "step": 58960 }, { "epoch": 0.5549647058823529, "grad_norm": 0.44177328762489093, "learning_rate": 2.6853639795881577e-06, "loss": 0.016957125067710875, "step": 58965 }, { "epoch": 0.5550117647058823, "grad_norm": 0.3975415944835874, "learning_rate": 2.6852501307478683e-06, "loss": 0.01798146069049835, "step": 58970 }, { "epoch": 0.5550588235294117, "grad_norm": 0.5613945159360001, "learning_rate": 2.6851362963865764e-06, "loss": 0.018685847520828247, "step": 58975 }, { "epoch": 0.5551058823529412, "grad_norm": 0.736103643704713, "learning_rate": 2.6850224765012135e-06, "loss": 0.021814680099487303, "step": 58980 }, { "epoch": 0.5551529411764706, "grad_norm": 0.4493075429861351, "learning_rate": 2.6849086710887107e-06, "loss": 0.01821206510066986, "step": 58985 }, { "epoch": 0.5552, "grad_norm": 0.31128834091729735, "learning_rate": 2.684794880146003e-06, "loss": 0.011489403992891311, "step": 58990 }, { "epoch": 0.5552470588235294, "grad_norm": 0.5961676655517915, "learning_rate": 2.6846811036700226e-06, "loss": 0.012872633337974549, "step": 58995 }, { "epoch": 0.5552941176470588, "grad_norm": 0.5405309216073646, "learning_rate": 2.684567341657705e-06, "loss": 0.01828121542930603, "step": 59000 }, { "epoch": 0.5553411764705882, "grad_norm": 0.5264129354743676, "learning_rate": 2.684453594105987e-06, "loss": 0.014879858493804932, "step": 59005 }, { "epoch": 0.5553882352941176, "grad_norm": 0.5513837721561862, "learning_rate": 2.6843398610118038e-06, "loss": 0.017848461866378784, "step": 59010 }, { "epoch": 0.555435294117647, "grad_norm": 0.42472732922461276, "learning_rate": 2.6842261423720938e-06, "loss": 0.014115269482135772, "step": 59015 }, { "epoch": 0.5554823529411764, "grad_norm": 0.48679851126007806, "learning_rate": 2.684112438183796e-06, "loss": 0.015450379252433777, "step": 59020 }, { "epoch": 0.5555294117647058, "grad_norm": 0.5625022694943113, "learning_rate": 2.6839987484438484e-06, "loss": 0.0173139363527298, "step": 59025 }, { "epoch": 0.5555764705882353, "grad_norm": 0.38884710841916975, "learning_rate": 2.6838850731491924e-06, "loss": 0.014153403043746949, "step": 59030 }, { "epoch": 0.5556235294117647, "grad_norm": 0.6337409740532617, "learning_rate": 2.6837714122967695e-06, "loss": 0.019027292728424072, "step": 59035 }, { "epoch": 0.5556705882352941, "grad_norm": 0.418060583109862, "learning_rate": 2.6836577658835213e-06, "loss": 0.01753770112991333, "step": 59040 }, { "epoch": 0.5557176470588235, "grad_norm": 0.6332879226932011, "learning_rate": 2.6835441339063906e-06, "loss": 0.018513138592243194, "step": 59045 }, { "epoch": 0.5557647058823529, "grad_norm": 0.6566430556824082, "learning_rate": 2.683430516362322e-06, "loss": 0.017249520123004913, "step": 59050 }, { "epoch": 0.5558117647058823, "grad_norm": 0.6502578667084938, "learning_rate": 2.68331691324826e-06, "loss": 0.01959011107683182, "step": 59055 }, { "epoch": 0.5558588235294117, "grad_norm": 0.5544834748989086, "learning_rate": 2.683203324561151e-06, "loss": 0.01738142818212509, "step": 59060 }, { "epoch": 0.5559058823529411, "grad_norm": 0.41907180780364334, "learning_rate": 2.6830897502979414e-06, "loss": 0.011838121712207795, "step": 59065 }, { "epoch": 0.5559529411764705, "grad_norm": 0.4556262985990183, "learning_rate": 2.6829761904555777e-06, "loss": 0.016439458727836607, "step": 59070 }, { "epoch": 0.556, "grad_norm": 0.49432727164598933, "learning_rate": 2.6828626450310104e-06, "loss": 0.017837592959403993, "step": 59075 }, { "epoch": 0.5560470588235294, "grad_norm": 0.4680751434571309, "learning_rate": 2.682749114021187e-06, "loss": 0.025121772289276124, "step": 59080 }, { "epoch": 0.5560941176470588, "grad_norm": 0.48128500179106787, "learning_rate": 2.682635597423059e-06, "loss": 0.017415809631347656, "step": 59085 }, { "epoch": 0.5561411764705882, "grad_norm": 0.45652037255562283, "learning_rate": 2.682522095233577e-06, "loss": 0.015515780448913575, "step": 59090 }, { "epoch": 0.5561882352941176, "grad_norm": 0.5524533659829381, "learning_rate": 2.6824086074496935e-06, "loss": 0.014640228450298309, "step": 59095 }, { "epoch": 0.556235294117647, "grad_norm": 0.5835272398557764, "learning_rate": 2.682295134068361e-06, "loss": 0.017469939589500428, "step": 59100 }, { "epoch": 0.5562823529411765, "grad_norm": 0.4554283840338234, "learning_rate": 2.682181675086534e-06, "loss": 0.01534973680973053, "step": 59105 }, { "epoch": 0.5563294117647059, "grad_norm": 0.44525517131344, "learning_rate": 2.6820682305011677e-06, "loss": 0.012076690793037415, "step": 59110 }, { "epoch": 0.5563764705882353, "grad_norm": 0.3695562148133259, "learning_rate": 2.6819548003092164e-06, "loss": 0.017440949380397797, "step": 59115 }, { "epoch": 0.5564235294117648, "grad_norm": 0.38057835509808424, "learning_rate": 2.681841384507638e-06, "loss": 0.014043748378753662, "step": 59120 }, { "epoch": 0.5564705882352942, "grad_norm": 0.42228908937201187, "learning_rate": 2.681727983093389e-06, "loss": 0.016580349206924437, "step": 59125 }, { "epoch": 0.5565176470588236, "grad_norm": 0.6784164573428463, "learning_rate": 2.6816145960634287e-06, "loss": 0.02119912952184677, "step": 59130 }, { "epoch": 0.556564705882353, "grad_norm": 0.32338725226573667, "learning_rate": 2.6815012234147166e-06, "loss": 0.020862588286399843, "step": 59135 }, { "epoch": 0.5566117647058824, "grad_norm": 0.3261157920538525, "learning_rate": 2.6813878651442122e-06, "loss": 0.019574740529060365, "step": 59140 }, { "epoch": 0.5566588235294118, "grad_norm": 0.5343236277362388, "learning_rate": 2.681274521248876e-06, "loss": 0.01986283212900162, "step": 59145 }, { "epoch": 0.5567058823529412, "grad_norm": 0.5930373507492933, "learning_rate": 2.681161191725672e-06, "loss": 0.014776740968227387, "step": 59150 }, { "epoch": 0.5567529411764706, "grad_norm": 0.7960813149251758, "learning_rate": 2.6810478765715616e-06, "loss": 0.018522700667381285, "step": 59155 }, { "epoch": 0.5568, "grad_norm": 0.632886991468537, "learning_rate": 2.680934575783509e-06, "loss": 0.01536692976951599, "step": 59160 }, { "epoch": 0.5568470588235294, "grad_norm": 0.5059570565120465, "learning_rate": 2.6808212893584786e-06, "loss": 0.017116962373256682, "step": 59165 }, { "epoch": 0.5568941176470589, "grad_norm": 0.36563943614937094, "learning_rate": 2.680708017293437e-06, "loss": 0.014011967182159423, "step": 59170 }, { "epoch": 0.5569411764705883, "grad_norm": 0.5179626834289182, "learning_rate": 2.6805947595853493e-06, "loss": 0.018271219730377198, "step": 59175 }, { "epoch": 0.5569882352941177, "grad_norm": 0.682178202589105, "learning_rate": 2.6804815162311844e-06, "loss": 0.019068615138530733, "step": 59180 }, { "epoch": 0.5570352941176471, "grad_norm": 0.4494071317043679, "learning_rate": 2.680368287227909e-06, "loss": 0.014381393790245056, "step": 59185 }, { "epoch": 0.5570823529411765, "grad_norm": 0.636017839056943, "learning_rate": 2.680255072572494e-06, "loss": 0.016193252801895142, "step": 59190 }, { "epoch": 0.5571294117647059, "grad_norm": 0.4852080363693106, "learning_rate": 2.6801418722619083e-06, "loss": 0.017985981702804566, "step": 59195 }, { "epoch": 0.5571764705882353, "grad_norm": 0.8609021593648137, "learning_rate": 2.6800286862931236e-06, "loss": 0.019832824170589448, "step": 59200 }, { "epoch": 0.5572235294117647, "grad_norm": 0.3884274632894219, "learning_rate": 2.6799155146631104e-06, "loss": 0.021114689111709595, "step": 59205 }, { "epoch": 0.5572705882352941, "grad_norm": 0.560909397728374, "learning_rate": 2.6798023573688436e-06, "loss": 0.017721782624721526, "step": 59210 }, { "epoch": 0.5573176470588236, "grad_norm": 0.5601749642830536, "learning_rate": 2.6796892144072954e-06, "loss": 0.014627261459827423, "step": 59215 }, { "epoch": 0.557364705882353, "grad_norm": 0.5586446668773979, "learning_rate": 2.679576085775441e-06, "loss": 0.016104495525360106, "step": 59220 }, { "epoch": 0.5574117647058824, "grad_norm": 0.5469846388370757, "learning_rate": 2.6794629714702557e-06, "loss": 0.014406540989875793, "step": 59225 }, { "epoch": 0.5574588235294118, "grad_norm": 0.36697045618915963, "learning_rate": 2.6793498714887156e-06, "loss": 0.015097931027412415, "step": 59230 }, { "epoch": 0.5575058823529412, "grad_norm": 0.48240345476324753, "learning_rate": 2.6792367858277977e-06, "loss": 0.011912393569946288, "step": 59235 }, { "epoch": 0.5575529411764706, "grad_norm": 0.4872877015784206, "learning_rate": 2.679123714484481e-06, "loss": 0.03345527052879334, "step": 59240 }, { "epoch": 0.5576, "grad_norm": 0.49319612374274424, "learning_rate": 2.679010657455745e-06, "loss": 0.01921279579401016, "step": 59245 }, { "epoch": 0.5576470588235294, "grad_norm": 0.3610328758283627, "learning_rate": 2.6788976147385677e-06, "loss": 0.014789365231990814, "step": 59250 }, { "epoch": 0.5576941176470588, "grad_norm": 0.4442999005851692, "learning_rate": 2.6787845863299315e-06, "loss": 0.012560200691223145, "step": 59255 }, { "epoch": 0.5577411764705882, "grad_norm": 0.37942147815292476, "learning_rate": 2.6786715722268177e-06, "loss": 0.017054137587547303, "step": 59260 }, { "epoch": 0.5577882352941177, "grad_norm": 0.5920235456021568, "learning_rate": 2.6785585724262085e-06, "loss": 0.021572160720825195, "step": 59265 }, { "epoch": 0.5578352941176471, "grad_norm": 0.6559186398497522, "learning_rate": 2.6784455869250875e-06, "loss": 0.017895984649658202, "step": 59270 }, { "epoch": 0.5578823529411765, "grad_norm": 0.44516747539624296, "learning_rate": 2.6783326157204397e-06, "loss": 0.0171695813536644, "step": 59275 }, { "epoch": 0.5579294117647059, "grad_norm": 0.43720978481311573, "learning_rate": 2.67821965880925e-06, "loss": 0.013713529706001282, "step": 59280 }, { "epoch": 0.5579764705882353, "grad_norm": 0.5183457006909008, "learning_rate": 2.678106716188505e-06, "loss": 0.01696496903896332, "step": 59285 }, { "epoch": 0.5580235294117647, "grad_norm": 0.3927208581082812, "learning_rate": 2.6779937878551903e-06, "loss": 0.017247849702835084, "step": 59290 }, { "epoch": 0.5580705882352941, "grad_norm": 0.2748578788812968, "learning_rate": 2.6778808738062955e-06, "loss": 0.014915019273757935, "step": 59295 }, { "epoch": 0.5581176470588235, "grad_norm": 0.4288288003368964, "learning_rate": 2.6777679740388086e-06, "loss": 0.012705782055854797, "step": 59300 }, { "epoch": 0.5581647058823529, "grad_norm": 0.6582067007712241, "learning_rate": 2.67765508854972e-06, "loss": 0.016220539808273315, "step": 59305 }, { "epoch": 0.5582117647058824, "grad_norm": 0.38949188544335966, "learning_rate": 2.6775422173360196e-06, "loss": 0.0171809658408165, "step": 59310 }, { "epoch": 0.5582588235294118, "grad_norm": 0.35210418437943913, "learning_rate": 2.677429360394699e-06, "loss": 0.012773558497428894, "step": 59315 }, { "epoch": 0.5583058823529412, "grad_norm": 0.5337878090076407, "learning_rate": 2.6773165177227506e-06, "loss": 0.018179279565811158, "step": 59320 }, { "epoch": 0.5583529411764706, "grad_norm": 0.6967835456469312, "learning_rate": 2.6772036893171678e-06, "loss": 0.02050766348838806, "step": 59325 }, { "epoch": 0.5584, "grad_norm": 0.7505821023763236, "learning_rate": 2.6770908751749447e-06, "loss": 0.0158338725566864, "step": 59330 }, { "epoch": 0.5584470588235294, "grad_norm": 0.7343446985827626, "learning_rate": 2.676978075293077e-06, "loss": 0.01920686662197113, "step": 59335 }, { "epoch": 0.5584941176470588, "grad_norm": 0.5492531324220179, "learning_rate": 2.67686528966856e-06, "loss": 0.01905375123023987, "step": 59340 }, { "epoch": 0.5585411764705882, "grad_norm": 0.4410352142775337, "learning_rate": 2.6767525182983897e-06, "loss": 0.014847582578659058, "step": 59345 }, { "epoch": 0.5585882352941176, "grad_norm": 0.42333144155739694, "learning_rate": 2.676639761179565e-06, "loss": 0.016939295828342436, "step": 59350 }, { "epoch": 0.558635294117647, "grad_norm": 0.5342754494557075, "learning_rate": 2.6765270183090845e-06, "loss": 0.01649666130542755, "step": 59355 }, { "epoch": 0.5586823529411765, "grad_norm": 0.4698693531378856, "learning_rate": 2.6764142896839473e-06, "loss": 0.01777500510215759, "step": 59360 }, { "epoch": 0.5587294117647059, "grad_norm": 0.4910939744809395, "learning_rate": 2.676301575301154e-06, "loss": 0.01695140600204468, "step": 59365 }, { "epoch": 0.5587764705882353, "grad_norm": 0.7140004539691275, "learning_rate": 2.676188875157705e-06, "loss": 0.018240904808044432, "step": 59370 }, { "epoch": 0.5588235294117647, "grad_norm": 0.5669104173796692, "learning_rate": 2.6760761892506036e-06, "loss": 0.014992481470108033, "step": 59375 }, { "epoch": 0.5588705882352941, "grad_norm": 0.46248672968170823, "learning_rate": 2.6759635175768523e-06, "loss": 0.016113391518592833, "step": 59380 }, { "epoch": 0.5589176470588235, "grad_norm": 0.5269104653506321, "learning_rate": 2.6758508601334552e-06, "loss": 0.014959627389907837, "step": 59385 }, { "epoch": 0.5589647058823529, "grad_norm": 0.6255487310304615, "learning_rate": 2.675738216917416e-06, "loss": 0.01625564694404602, "step": 59390 }, { "epoch": 0.5590117647058823, "grad_norm": 1.0178205406003094, "learning_rate": 2.675625587925742e-06, "loss": 0.015708258748054503, "step": 59395 }, { "epoch": 0.5590588235294117, "grad_norm": 0.47637951250185545, "learning_rate": 2.675512973155439e-06, "loss": 0.014850017428398133, "step": 59400 }, { "epoch": 0.5591058823529412, "grad_norm": 0.4156898761662883, "learning_rate": 2.6754003726035143e-06, "loss": 0.016393306851387023, "step": 59405 }, { "epoch": 0.5591529411764706, "grad_norm": 0.6574800649936053, "learning_rate": 2.6752877862669763e-06, "loss": 0.01591191291809082, "step": 59410 }, { "epoch": 0.5592, "grad_norm": 0.46470455017301365, "learning_rate": 2.675175214142834e-06, "loss": 0.015876542031764983, "step": 59415 }, { "epoch": 0.5592470588235294, "grad_norm": 0.3777435105143576, "learning_rate": 2.675062656228098e-06, "loss": 0.014728973805904388, "step": 59420 }, { "epoch": 0.5592941176470588, "grad_norm": 0.46790374627684034, "learning_rate": 2.6749501125197785e-06, "loss": 0.018532678484916687, "step": 59425 }, { "epoch": 0.5593411764705882, "grad_norm": 0.5729516179206272, "learning_rate": 2.674837583014888e-06, "loss": 0.01705528497695923, "step": 59430 }, { "epoch": 0.5593882352941176, "grad_norm": 0.3441583474665884, "learning_rate": 2.674725067710439e-06, "loss": 0.013191607594490052, "step": 59435 }, { "epoch": 0.559435294117647, "grad_norm": 0.6836163939003904, "learning_rate": 2.674612566603445e-06, "loss": 0.014880892634391785, "step": 59440 }, { "epoch": 0.5594823529411764, "grad_norm": 0.5457769412835329, "learning_rate": 2.6745000796909205e-06, "loss": 0.01740436851978302, "step": 59445 }, { "epoch": 0.5595294117647058, "grad_norm": 0.612230722384691, "learning_rate": 2.6743876069698806e-06, "loss": 0.020873197913169862, "step": 59450 }, { "epoch": 0.5595764705882353, "grad_norm": 0.4193086168650341, "learning_rate": 2.6742751484373418e-06, "loss": 0.022082673013210298, "step": 59455 }, { "epoch": 0.5596235294117647, "grad_norm": 0.561465338161661, "learning_rate": 2.6741627040903216e-06, "loss": 0.0175686314702034, "step": 59460 }, { "epoch": 0.5596705882352941, "grad_norm": 0.562567136625339, "learning_rate": 2.674050273925838e-06, "loss": 0.020532524585723876, "step": 59465 }, { "epoch": 0.5597176470588235, "grad_norm": 0.5050188779565998, "learning_rate": 2.6739378579409087e-06, "loss": 0.01454012244939804, "step": 59470 }, { "epoch": 0.5597647058823529, "grad_norm": 0.543800151342883, "learning_rate": 2.6738254561325545e-06, "loss": 0.018465332686901093, "step": 59475 }, { "epoch": 0.5598117647058823, "grad_norm": 0.6872872265385827, "learning_rate": 2.6737130684977952e-06, "loss": 0.017298349738121034, "step": 59480 }, { "epoch": 0.5598588235294117, "grad_norm": 0.5615863130737186, "learning_rate": 2.6736006950336537e-06, "loss": 0.024034403264522552, "step": 59485 }, { "epoch": 0.5599058823529411, "grad_norm": 0.43682533521139844, "learning_rate": 2.6734883357371504e-06, "loss": 0.02231943905353546, "step": 59490 }, { "epoch": 0.5599529411764705, "grad_norm": 0.660961772019225, "learning_rate": 2.67337599060531e-06, "loss": 0.01599096953868866, "step": 59495 }, { "epoch": 0.56, "grad_norm": 0.4871327147112932, "learning_rate": 2.6732636596351564e-06, "loss": 0.019001424312591553, "step": 59500 }, { "epoch": 0.5600470588235295, "grad_norm": 0.47290052285820117, "learning_rate": 2.6731513428237145e-06, "loss": 0.01831587255001068, "step": 59505 }, { "epoch": 0.5600941176470589, "grad_norm": 0.5952763622006452, "learning_rate": 2.6730390401680096e-06, "loss": 0.019663991034030916, "step": 59510 }, { "epoch": 0.5601411764705883, "grad_norm": 0.2983919606176461, "learning_rate": 2.672926751665069e-06, "loss": 0.019278141856193542, "step": 59515 }, { "epoch": 0.5601882352941177, "grad_norm": 0.5998113797863369, "learning_rate": 2.6728144773119207e-06, "loss": 0.019679220020771028, "step": 59520 }, { "epoch": 0.560235294117647, "grad_norm": 0.37802879065809697, "learning_rate": 2.672702217105592e-06, "loss": 0.017074741423130035, "step": 59525 }, { "epoch": 0.5602823529411765, "grad_norm": 0.47804988730067327, "learning_rate": 2.6725899710431137e-06, "loss": 0.017637911438941955, "step": 59530 }, { "epoch": 0.5603294117647059, "grad_norm": 0.18877882449159583, "learning_rate": 2.672477739121515e-06, "loss": 0.015971392393112183, "step": 59535 }, { "epoch": 0.5603764705882353, "grad_norm": 0.606359171470516, "learning_rate": 2.6723655213378274e-06, "loss": 0.01829906404018402, "step": 59540 }, { "epoch": 0.5604235294117647, "grad_norm": 0.5085005931989105, "learning_rate": 2.672253317689083e-06, "loss": 0.017202970385551453, "step": 59545 }, { "epoch": 0.5604705882352942, "grad_norm": 0.4859029970845174, "learning_rate": 2.6721411281723144e-06, "loss": 0.018207404017448425, "step": 59550 }, { "epoch": 0.5605176470588236, "grad_norm": 0.43123193340132476, "learning_rate": 2.672028952784555e-06, "loss": 0.015282276272773742, "step": 59555 }, { "epoch": 0.560564705882353, "grad_norm": 0.9577438756955384, "learning_rate": 2.67191679152284e-06, "loss": 0.018381582200527193, "step": 59560 }, { "epoch": 0.5606117647058824, "grad_norm": 0.4785109218402348, "learning_rate": 2.671804644384205e-06, "loss": 0.01725928783416748, "step": 59565 }, { "epoch": 0.5606588235294118, "grad_norm": 0.6524128585610497, "learning_rate": 2.6716925113656855e-06, "loss": 0.019248850643634796, "step": 59570 }, { "epoch": 0.5607058823529412, "grad_norm": 0.5092885303788963, "learning_rate": 2.6715803924643197e-06, "loss": 0.018885189294815065, "step": 59575 }, { "epoch": 0.5607529411764706, "grad_norm": 0.34423233927462454, "learning_rate": 2.671468287677145e-06, "loss": 0.013458842039108276, "step": 59580 }, { "epoch": 0.5608, "grad_norm": 0.3770816508410056, "learning_rate": 2.6713561970012004e-06, "loss": 0.01701962500810623, "step": 59585 }, { "epoch": 0.5608470588235294, "grad_norm": 0.4360972526824523, "learning_rate": 2.6712441204335253e-06, "loss": 0.013707832992076873, "step": 59590 }, { "epoch": 0.5608941176470589, "grad_norm": 0.52016629140797, "learning_rate": 2.671132057971162e-06, "loss": 0.018009886145591736, "step": 59595 }, { "epoch": 0.5609411764705883, "grad_norm": 0.4376486124882092, "learning_rate": 2.6710200096111503e-06, "loss": 0.01826324313879013, "step": 59600 }, { "epoch": 0.5609882352941177, "grad_norm": 0.45618192353512177, "learning_rate": 2.670907975350534e-06, "loss": 0.016606247425079344, "step": 59605 }, { "epoch": 0.5610352941176471, "grad_norm": 0.4896364657314748, "learning_rate": 2.6707959551863555e-06, "loss": 0.016611289978027344, "step": 59610 }, { "epoch": 0.5610823529411765, "grad_norm": 0.6558384533727135, "learning_rate": 2.6706839491156588e-06, "loss": 0.017214277386665346, "step": 59615 }, { "epoch": 0.5611294117647059, "grad_norm": 0.48026784239152415, "learning_rate": 2.67057195713549e-06, "loss": 0.01530669629573822, "step": 59620 }, { "epoch": 0.5611764705882353, "grad_norm": 0.6833018731633007, "learning_rate": 2.670459979242894e-06, "loss": 0.01409280002117157, "step": 59625 }, { "epoch": 0.5612235294117647, "grad_norm": 0.45749022884527496, "learning_rate": 2.6703480154349183e-06, "loss": 0.0168299064040184, "step": 59630 }, { "epoch": 0.5612705882352941, "grad_norm": 0.3873594054654158, "learning_rate": 2.6702360657086097e-06, "loss": 0.014979889988899231, "step": 59635 }, { "epoch": 0.5613176470588235, "grad_norm": 0.6973054618219819, "learning_rate": 2.6701241300610174e-06, "loss": 0.015843117237091066, "step": 59640 }, { "epoch": 0.561364705882353, "grad_norm": 0.581949069552785, "learning_rate": 2.6700122084891904e-06, "loss": 0.018491186201572418, "step": 59645 }, { "epoch": 0.5614117647058824, "grad_norm": 0.47738260660491805, "learning_rate": 2.6699003009901793e-06, "loss": 0.023747724294662476, "step": 59650 }, { "epoch": 0.5614588235294118, "grad_norm": 0.4905303544730398, "learning_rate": 2.669788407561034e-06, "loss": 0.019579222798347472, "step": 59655 }, { "epoch": 0.5615058823529412, "grad_norm": 0.5468584738021086, "learning_rate": 2.6696765281988084e-06, "loss": 0.017772220075130463, "step": 59660 }, { "epoch": 0.5615529411764706, "grad_norm": 0.6324132200585846, "learning_rate": 2.6695646629005537e-06, "loss": 0.01887476593255997, "step": 59665 }, { "epoch": 0.5616, "grad_norm": 0.6076932374093258, "learning_rate": 2.669452811663325e-06, "loss": 0.015363684296607972, "step": 59670 }, { "epoch": 0.5616470588235294, "grad_norm": 0.4492143280131855, "learning_rate": 2.6693409744841752e-06, "loss": 0.016557277739048006, "step": 59675 }, { "epoch": 0.5616941176470588, "grad_norm": 0.622802786750868, "learning_rate": 2.6692291513601616e-06, "loss": 0.020423063635826112, "step": 59680 }, { "epoch": 0.5617411764705882, "grad_norm": 0.470485837148771, "learning_rate": 2.669117342288339e-06, "loss": 0.015482541918754578, "step": 59685 }, { "epoch": 0.5617882352941177, "grad_norm": 0.3696245560383809, "learning_rate": 2.6690055472657643e-06, "loss": 0.016024231910705566, "step": 59690 }, { "epoch": 0.5618352941176471, "grad_norm": 0.540916654584138, "learning_rate": 2.668893766289497e-06, "loss": 0.019392818212509155, "step": 59695 }, { "epoch": 0.5618823529411765, "grad_norm": 0.5504651136708508, "learning_rate": 2.668781999356595e-06, "loss": 0.01870044767856598, "step": 59700 }, { "epoch": 0.5619294117647059, "grad_norm": 0.5240922078218685, "learning_rate": 2.6686702464641184e-06, "loss": 0.014977160096168517, "step": 59705 }, { "epoch": 0.5619764705882353, "grad_norm": 0.5451115272571708, "learning_rate": 2.6685585076091273e-06, "loss": 0.015100324153900146, "step": 59710 }, { "epoch": 0.5620235294117647, "grad_norm": 0.5430736767886909, "learning_rate": 2.668446782788684e-06, "loss": 0.017841818928718566, "step": 59715 }, { "epoch": 0.5620705882352941, "grad_norm": 0.44911180290621616, "learning_rate": 2.6683350719998498e-06, "loss": 0.016593366861343384, "step": 59720 }, { "epoch": 0.5621176470588235, "grad_norm": 0.7275590469915496, "learning_rate": 2.6682233752396886e-06, "loss": 0.017573149502277376, "step": 59725 }, { "epoch": 0.5621647058823529, "grad_norm": 0.48331056609228507, "learning_rate": 2.668111692505264e-06, "loss": 0.017411205172538757, "step": 59730 }, { "epoch": 0.5622117647058823, "grad_norm": 0.35708645372490827, "learning_rate": 2.6680000237936414e-06, "loss": 0.019383758306503296, "step": 59735 }, { "epoch": 0.5622588235294118, "grad_norm": 0.43835427806182575, "learning_rate": 2.6678883691018857e-06, "loss": 0.016465234756469726, "step": 59740 }, { "epoch": 0.5623058823529412, "grad_norm": 0.574784919805186, "learning_rate": 2.6677767284270647e-06, "loss": 0.01863192617893219, "step": 59745 }, { "epoch": 0.5623529411764706, "grad_norm": 0.8178900967236841, "learning_rate": 2.667665101766245e-06, "loss": 0.019474484026432037, "step": 59750 }, { "epoch": 0.5624, "grad_norm": 0.6310837352792439, "learning_rate": 2.6675534891164957e-06, "loss": 0.01572784185409546, "step": 59755 }, { "epoch": 0.5624470588235294, "grad_norm": 0.5104065915000013, "learning_rate": 2.6674418904748846e-06, "loss": 0.02121894359588623, "step": 59760 }, { "epoch": 0.5624941176470588, "grad_norm": 0.6069631405309394, "learning_rate": 2.667330305838483e-06, "loss": 0.017042356729507446, "step": 59765 }, { "epoch": 0.5625411764705882, "grad_norm": 0.44891387833465324, "learning_rate": 2.6672187352043615e-06, "loss": 0.01712784469127655, "step": 59770 }, { "epoch": 0.5625882352941176, "grad_norm": 0.44853895317214526, "learning_rate": 2.667107178569592e-06, "loss": 0.015920725464820863, "step": 59775 }, { "epoch": 0.562635294117647, "grad_norm": 0.37773577934282904, "learning_rate": 2.666995635931247e-06, "loss": 0.020136302709579466, "step": 59780 }, { "epoch": 0.5626823529411765, "grad_norm": 0.6560857001819046, "learning_rate": 2.6668841072863995e-06, "loss": 0.016209053993225097, "step": 59785 }, { "epoch": 0.5627294117647059, "grad_norm": 0.404533064642076, "learning_rate": 2.6667725926321247e-06, "loss": 0.014987143874168395, "step": 59790 }, { "epoch": 0.5627764705882353, "grad_norm": 0.29428395286962794, "learning_rate": 2.6666610919654967e-06, "loss": 0.014835289120674134, "step": 59795 }, { "epoch": 0.5628235294117647, "grad_norm": 0.5232704217826616, "learning_rate": 2.6665496052835926e-06, "loss": 0.015049073100090026, "step": 59800 }, { "epoch": 0.5628705882352941, "grad_norm": 0.5258050577550116, "learning_rate": 2.666438132583489e-06, "loss": 0.017002497613430024, "step": 59805 }, { "epoch": 0.5629176470588235, "grad_norm": 0.644468087989373, "learning_rate": 2.666326673862264e-06, "loss": 0.017381614446640013, "step": 59810 }, { "epoch": 0.5629647058823529, "grad_norm": 0.7453600675262548, "learning_rate": 2.6662152291169952e-06, "loss": 0.016880229115486145, "step": 59815 }, { "epoch": 0.5630117647058823, "grad_norm": 0.5342466298574325, "learning_rate": 2.666103798344763e-06, "loss": 0.020974749326705934, "step": 59820 }, { "epoch": 0.5630588235294117, "grad_norm": 0.6810928012723404, "learning_rate": 2.6659923815426468e-06, "loss": 0.017152474820613862, "step": 59825 }, { "epoch": 0.5631058823529411, "grad_norm": 0.35271518863896445, "learning_rate": 2.665880978707729e-06, "loss": 0.013779902458190918, "step": 59830 }, { "epoch": 0.5631529411764706, "grad_norm": 0.5523801122358426, "learning_rate": 2.6657695898370905e-06, "loss": 0.013892517983913421, "step": 59835 }, { "epoch": 0.5632, "grad_norm": 0.5751567533549352, "learning_rate": 2.6656582149278153e-06, "loss": 0.01745443344116211, "step": 59840 }, { "epoch": 0.5632470588235294, "grad_norm": 0.586050541513835, "learning_rate": 2.665546853976986e-06, "loss": 0.017718034982681274, "step": 59845 }, { "epoch": 0.5632941176470588, "grad_norm": 4.141716164767144, "learning_rate": 2.6654355069816885e-06, "loss": 0.01661895364522934, "step": 59850 }, { "epoch": 0.5633411764705882, "grad_norm": 0.6462352535474354, "learning_rate": 2.665324173939007e-06, "loss": 0.01727319657802582, "step": 59855 }, { "epoch": 0.5633882352941176, "grad_norm": 0.6305115239064527, "learning_rate": 2.6652128548460277e-06, "loss": 0.014052258431911468, "step": 59860 }, { "epoch": 0.563435294117647, "grad_norm": 0.6097225728225528, "learning_rate": 2.665101549699839e-06, "loss": 0.017324565351009368, "step": 59865 }, { "epoch": 0.5634823529411764, "grad_norm": 0.6249006361327526, "learning_rate": 2.664990258497528e-06, "loss": 0.021225756406784056, "step": 59870 }, { "epoch": 0.5635294117647058, "grad_norm": 0.36293586223503926, "learning_rate": 2.6648789812361837e-06, "loss": 0.011894336342811585, "step": 59875 }, { "epoch": 0.5635764705882353, "grad_norm": 0.6219857588121922, "learning_rate": 2.6647677179128957e-06, "loss": 0.021277807652950287, "step": 59880 }, { "epoch": 0.5636235294117647, "grad_norm": 0.5661806132140195, "learning_rate": 2.664656468524755e-06, "loss": 0.017893826961517333, "step": 59885 }, { "epoch": 0.5636705882352941, "grad_norm": 0.581266863641341, "learning_rate": 2.6645452330688525e-06, "loss": 0.017309150099754332, "step": 59890 }, { "epoch": 0.5637176470588235, "grad_norm": 0.6436970247153266, "learning_rate": 2.664434011542281e-06, "loss": 0.017779326438903807, "step": 59895 }, { "epoch": 0.563764705882353, "grad_norm": 0.5451401431424194, "learning_rate": 2.664322803942133e-06, "loss": 0.013350430130958556, "step": 59900 }, { "epoch": 0.5638117647058823, "grad_norm": 0.6141508860354191, "learning_rate": 2.664211610265503e-06, "loss": 0.017389927804470063, "step": 59905 }, { "epoch": 0.5638588235294117, "grad_norm": 0.4643812958465255, "learning_rate": 2.6641004305094854e-06, "loss": 0.016502076387405397, "step": 59910 }, { "epoch": 0.5639058823529411, "grad_norm": 0.6432451525113547, "learning_rate": 2.663989264671176e-06, "loss": 0.019760258495807648, "step": 59915 }, { "epoch": 0.5639529411764705, "grad_norm": 0.4455508531844286, "learning_rate": 2.6638781127476716e-06, "loss": 0.014786161482334137, "step": 59920 }, { "epoch": 0.564, "grad_norm": 0.5031428395745174, "learning_rate": 2.6637669747360687e-06, "loss": 0.018694655597209932, "step": 59925 }, { "epoch": 0.5640470588235295, "grad_norm": 0.650855131846096, "learning_rate": 2.663655850633466e-06, "loss": 0.020232720673084258, "step": 59930 }, { "epoch": 0.5640941176470589, "grad_norm": 0.4769948012762895, "learning_rate": 2.6635447404369628e-06, "loss": 0.01611630469560623, "step": 59935 }, { "epoch": 0.5641411764705883, "grad_norm": 0.616881821835899, "learning_rate": 2.6634336441436588e-06, "loss": 0.021684961020946504, "step": 59940 }, { "epoch": 0.5641882352941177, "grad_norm": 0.4084845619383238, "learning_rate": 2.6633225617506545e-06, "loss": 0.0203846275806427, "step": 59945 }, { "epoch": 0.5642352941176471, "grad_norm": 0.6446181011695739, "learning_rate": 2.6632114932550516e-06, "loss": 0.019245429337024687, "step": 59950 }, { "epoch": 0.5642823529411765, "grad_norm": 0.46977080437567087, "learning_rate": 2.6631004386539527e-06, "loss": 0.013910037279129029, "step": 59955 }, { "epoch": 0.5643294117647059, "grad_norm": 0.48770162091127334, "learning_rate": 2.662989397944461e-06, "loss": 0.013385316729545594, "step": 59960 }, { "epoch": 0.5643764705882353, "grad_norm": 0.4733529825187759, "learning_rate": 2.6628783711236807e-06, "loss": 0.016903644800186156, "step": 59965 }, { "epoch": 0.5644235294117647, "grad_norm": 0.3438063873795261, "learning_rate": 2.6627673581887165e-06, "loss": 0.013714832067489625, "step": 59970 }, { "epoch": 0.5644705882352942, "grad_norm": 0.4604508889988434, "learning_rate": 2.6626563591366744e-06, "loss": 0.014611187577247619, "step": 59975 }, { "epoch": 0.5645176470588236, "grad_norm": 0.7022944499496382, "learning_rate": 2.6625453739646605e-06, "loss": 0.016983675956726074, "step": 59980 }, { "epoch": 0.564564705882353, "grad_norm": 0.4611061343137782, "learning_rate": 2.6624344026697834e-06, "loss": 0.01908402144908905, "step": 59985 }, { "epoch": 0.5646117647058824, "grad_norm": 0.5819889638764139, "learning_rate": 2.662323445249151e-06, "loss": 0.016297286748886107, "step": 59990 }, { "epoch": 0.5646588235294118, "grad_norm": 0.45014735909604814, "learning_rate": 2.662212501699871e-06, "loss": 0.011786584556102753, "step": 59995 }, { "epoch": 0.5647058823529412, "grad_norm": 0.5870492236990206, "learning_rate": 2.662101572019056e-06, "loss": 0.02280796617269516, "step": 60000 }, { "epoch": 0.5647058823529412, "eval_loss": 0.01719297282397747, "eval_runtime": 613.6602, "eval_samples_per_second": 110.811, "eval_steps_per_second": 6.926, "step": 60000 }, { "epoch": 0.5647529411764706, "grad_norm": 0.5942264494878563, "learning_rate": 2.661990656203815e-06, "loss": 0.017308029532432555, "step": 60005 }, { "epoch": 0.5648, "grad_norm": 0.6615228537993395, "learning_rate": 2.6618797542512616e-06, "loss": 0.01766143590211868, "step": 60010 }, { "epoch": 0.5648470588235294, "grad_norm": 0.4227870870911931, "learning_rate": 2.6617688661585057e-06, "loss": 0.014528056979179383, "step": 60015 }, { "epoch": 0.5648941176470588, "grad_norm": 0.3182428332901492, "learning_rate": 2.661657991922662e-06, "loss": 0.014715394377708435, "step": 60020 }, { "epoch": 0.5649411764705883, "grad_norm": 0.8593179666865028, "learning_rate": 2.661547131540845e-06, "loss": 0.01888422667980194, "step": 60025 }, { "epoch": 0.5649882352941177, "grad_norm": 0.3874589381291124, "learning_rate": 2.6614362850101706e-06, "loss": 0.01888868659734726, "step": 60030 }, { "epoch": 0.5650352941176471, "grad_norm": 0.6177577638926809, "learning_rate": 2.6613254523277525e-06, "loss": 0.02137419432401657, "step": 60035 }, { "epoch": 0.5650823529411765, "grad_norm": 0.4136278253498535, "learning_rate": 2.661214633490709e-06, "loss": 0.012773768603801727, "step": 60040 }, { "epoch": 0.5651294117647059, "grad_norm": 0.48862841058649487, "learning_rate": 2.6611038284961577e-06, "loss": 0.015304262936115264, "step": 60045 }, { "epoch": 0.5651764705882353, "grad_norm": 0.42666540408489423, "learning_rate": 2.6609930373412163e-06, "loss": 0.015510153770446778, "step": 60050 }, { "epoch": 0.5652235294117647, "grad_norm": 0.5272241284978051, "learning_rate": 2.660882260023005e-06, "loss": 0.017834669351577757, "step": 60055 }, { "epoch": 0.5652705882352941, "grad_norm": 0.7312826158473327, "learning_rate": 2.6607714965386426e-06, "loss": 0.023920914530754088, "step": 60060 }, { "epoch": 0.5653176470588235, "grad_norm": 0.8210309812038982, "learning_rate": 2.6606607468852514e-06, "loss": 0.01858239471912384, "step": 60065 }, { "epoch": 0.565364705882353, "grad_norm": 0.7131733660304213, "learning_rate": 2.6605500110599526e-06, "loss": 0.01877724826335907, "step": 60070 }, { "epoch": 0.5654117647058824, "grad_norm": 0.49406611350468643, "learning_rate": 2.6604392890598692e-06, "loss": 0.015367609262466431, "step": 60075 }, { "epoch": 0.5654588235294118, "grad_norm": 0.38245356082196813, "learning_rate": 2.660328580882124e-06, "loss": 0.016048403084278108, "step": 60080 }, { "epoch": 0.5655058823529412, "grad_norm": 0.4907012324941868, "learning_rate": 2.6602178865238414e-06, "loss": 0.015667010843753815, "step": 60085 }, { "epoch": 0.5655529411764706, "grad_norm": 0.46995083411212685, "learning_rate": 2.6601072059821478e-06, "loss": 0.021469065546989442, "step": 60090 }, { "epoch": 0.5656, "grad_norm": 0.7670742067236725, "learning_rate": 2.6599965392541673e-06, "loss": 0.019013914465904235, "step": 60095 }, { "epoch": 0.5656470588235294, "grad_norm": 0.43688031163609947, "learning_rate": 2.6598858863370276e-06, "loss": 0.013029266893863679, "step": 60100 }, { "epoch": 0.5656941176470588, "grad_norm": 0.6064051280353976, "learning_rate": 2.659775247227857e-06, "loss": 0.01948670744895935, "step": 60105 }, { "epoch": 0.5657411764705882, "grad_norm": 0.5897560737592793, "learning_rate": 2.6596646219237826e-06, "loss": 0.01909000426530838, "step": 60110 }, { "epoch": 0.5657882352941176, "grad_norm": 0.4203798115265332, "learning_rate": 2.6595540104219352e-06, "loss": 0.012341418862342834, "step": 60115 }, { "epoch": 0.5658352941176471, "grad_norm": 0.5380219063783018, "learning_rate": 2.6594434127194446e-06, "loss": 0.019232898950576782, "step": 60120 }, { "epoch": 0.5658823529411765, "grad_norm": 0.49625878765790227, "learning_rate": 2.6593328288134406e-06, "loss": 0.01564682424068451, "step": 60125 }, { "epoch": 0.5659294117647059, "grad_norm": 0.46588192775916054, "learning_rate": 2.659222258701057e-06, "loss": 0.019140443205833434, "step": 60130 }, { "epoch": 0.5659764705882353, "grad_norm": 0.5892675724057902, "learning_rate": 2.6591117023794246e-06, "loss": 0.017017093300819398, "step": 60135 }, { "epoch": 0.5660235294117647, "grad_norm": 0.4176271382958082, "learning_rate": 2.6590011598456785e-06, "loss": 0.01284928321838379, "step": 60140 }, { "epoch": 0.5660705882352941, "grad_norm": 0.6425423920356291, "learning_rate": 2.6588906310969515e-06, "loss": 0.014843842387199402, "step": 60145 }, { "epoch": 0.5661176470588235, "grad_norm": 0.6124569022123454, "learning_rate": 2.6587801161303793e-06, "loss": 0.01332949846982956, "step": 60150 }, { "epoch": 0.5661647058823529, "grad_norm": 0.2935984359909093, "learning_rate": 2.658669614943099e-06, "loss": 0.01746837645769119, "step": 60155 }, { "epoch": 0.5662117647058823, "grad_norm": 0.7736356228728446, "learning_rate": 2.6585591275322458e-06, "loss": 0.01878385543823242, "step": 60160 }, { "epoch": 0.5662588235294118, "grad_norm": 0.6492231935983047, "learning_rate": 2.658448653894959e-06, "loss": 0.020457811653614044, "step": 60165 }, { "epoch": 0.5663058823529412, "grad_norm": 0.4997205441400338, "learning_rate": 2.658338194028376e-06, "loss": 0.02160695493221283, "step": 60170 }, { "epoch": 0.5663529411764706, "grad_norm": 0.45717779162880834, "learning_rate": 2.658227747929636e-06, "loss": 0.01536412388086319, "step": 60175 }, { "epoch": 0.5664, "grad_norm": 0.5419633589709291, "learning_rate": 2.65811731559588e-06, "loss": 0.016594129800796508, "step": 60180 }, { "epoch": 0.5664470588235294, "grad_norm": 0.5658025369048566, "learning_rate": 2.6580068970242486e-06, "loss": 0.016729941964149474, "step": 60185 }, { "epoch": 0.5664941176470588, "grad_norm": 0.5839749688831688, "learning_rate": 2.657896492211883e-06, "loss": 0.016575917601585388, "step": 60190 }, { "epoch": 0.5665411764705882, "grad_norm": 0.500941610880607, "learning_rate": 2.6577861011559274e-06, "loss": 0.014974157512187957, "step": 60195 }, { "epoch": 0.5665882352941176, "grad_norm": 0.5018326987595986, "learning_rate": 2.657675723853524e-06, "loss": 0.015832582116127016, "step": 60200 }, { "epoch": 0.566635294117647, "grad_norm": 0.6835019651654937, "learning_rate": 2.6575653603018176e-06, "loss": 0.020693874359130858, "step": 60205 }, { "epoch": 0.5666823529411765, "grad_norm": 0.36323715376352006, "learning_rate": 2.657455010497953e-06, "loss": 0.015110711753368377, "step": 60210 }, { "epoch": 0.5667294117647059, "grad_norm": 0.500384298890544, "learning_rate": 2.657344674439077e-06, "loss": 0.017146436870098113, "step": 60215 }, { "epoch": 0.5667764705882353, "grad_norm": 0.4247603196207354, "learning_rate": 2.6572343521223355e-06, "loss": 0.014138796925544738, "step": 60220 }, { "epoch": 0.5668235294117647, "grad_norm": 0.7436626485964346, "learning_rate": 2.6571240435448764e-06, "loss": 0.01866365671157837, "step": 60225 }, { "epoch": 0.5668705882352941, "grad_norm": 0.46764250128149637, "learning_rate": 2.6570137487038484e-06, "loss": 0.015433010458946229, "step": 60230 }, { "epoch": 0.5669176470588235, "grad_norm": 0.319216920964918, "learning_rate": 2.6569034675964005e-06, "loss": 0.014544409513473511, "step": 60235 }, { "epoch": 0.5669647058823529, "grad_norm": 0.4925487273288191, "learning_rate": 2.6567932002196835e-06, "loss": 0.017021121084690095, "step": 60240 }, { "epoch": 0.5670117647058823, "grad_norm": 0.5092012822892308, "learning_rate": 2.6566829465708478e-06, "loss": 0.019803762435913086, "step": 60245 }, { "epoch": 0.5670588235294117, "grad_norm": 0.8209973833971023, "learning_rate": 2.6565727066470442e-06, "loss": 0.021233020722866057, "step": 60250 }, { "epoch": 0.5671058823529411, "grad_norm": 0.6382259734252386, "learning_rate": 2.6564624804454277e-06, "loss": 0.014591601490974427, "step": 60255 }, { "epoch": 0.5671529411764706, "grad_norm": 0.32032365559336706, "learning_rate": 2.65635226796315e-06, "loss": 0.012020836025476456, "step": 60260 }, { "epoch": 0.5672, "grad_norm": 0.4812762344541075, "learning_rate": 2.6562420691973656e-06, "loss": 0.01648305058479309, "step": 60265 }, { "epoch": 0.5672470588235294, "grad_norm": 0.42985379221431624, "learning_rate": 2.65613188414523e-06, "loss": 0.01567614674568176, "step": 60270 }, { "epoch": 0.5672941176470588, "grad_norm": 0.9485400480338793, "learning_rate": 2.6560217128038983e-06, "loss": 0.015849287807941436, "step": 60275 }, { "epoch": 0.5673411764705882, "grad_norm": 0.5747772405482402, "learning_rate": 2.655911555170529e-06, "loss": 0.018437290191650392, "step": 60280 }, { "epoch": 0.5673882352941176, "grad_norm": 0.5048064390080951, "learning_rate": 2.6558014112422775e-06, "loss": 0.019776102900505067, "step": 60285 }, { "epoch": 0.567435294117647, "grad_norm": 0.9273037397361341, "learning_rate": 2.655691281016304e-06, "loss": 0.01742573082447052, "step": 60290 }, { "epoch": 0.5674823529411764, "grad_norm": 0.5683707992471861, "learning_rate": 2.655581164489766e-06, "loss": 0.014411130547523498, "step": 60295 }, { "epoch": 0.5675294117647058, "grad_norm": 0.7283527553805381, "learning_rate": 2.6554710616598257e-06, "loss": 0.022442431747913362, "step": 60300 }, { "epoch": 0.5675764705882353, "grad_norm": 0.7081510338350511, "learning_rate": 2.6553609725236416e-06, "loss": 0.015104132890701293, "step": 60305 }, { "epoch": 0.5676235294117647, "grad_norm": 0.43872025527688385, "learning_rate": 2.655250897078377e-06, "loss": 0.016015064716339112, "step": 60310 }, { "epoch": 0.5676705882352941, "grad_norm": 0.5948130009415613, "learning_rate": 2.655140835321194e-06, "loss": 0.02474585473537445, "step": 60315 }, { "epoch": 0.5677176470588235, "grad_norm": 0.6029612073015962, "learning_rate": 2.655030787249256e-06, "loss": 0.020592352747917174, "step": 60320 }, { "epoch": 0.567764705882353, "grad_norm": 0.6917637384794194, "learning_rate": 2.654920752859726e-06, "loss": 0.019725415110588073, "step": 60325 }, { "epoch": 0.5678117647058823, "grad_norm": 0.6568709015833079, "learning_rate": 2.6548107321497715e-06, "loss": 0.01964232325553894, "step": 60330 }, { "epoch": 0.5678588235294117, "grad_norm": 0.4389783387326838, "learning_rate": 2.6547007251165557e-06, "loss": 0.017590636014938356, "step": 60335 }, { "epoch": 0.5679058823529411, "grad_norm": 0.61117656836197, "learning_rate": 2.6545907317572465e-06, "loss": 0.016211847960948943, "step": 60340 }, { "epoch": 0.5679529411764705, "grad_norm": 0.23060297731576918, "learning_rate": 2.654480752069012e-06, "loss": 0.011449643969535827, "step": 60345 }, { "epoch": 0.568, "grad_norm": 0.5593409124000279, "learning_rate": 2.6543707860490187e-06, "loss": 0.019326938688755034, "step": 60350 }, { "epoch": 0.5680470588235295, "grad_norm": 0.592378477615731, "learning_rate": 2.6542608336944377e-06, "loss": 0.015256024897098541, "step": 60355 }, { "epoch": 0.5680941176470589, "grad_norm": 0.4897715109888125, "learning_rate": 2.654150895002437e-06, "loss": 0.019158542156219482, "step": 60360 }, { "epoch": 0.5681411764705883, "grad_norm": 0.6299111356219567, "learning_rate": 2.654040969970189e-06, "loss": 0.017487253248691558, "step": 60365 }, { "epoch": 0.5681882352941177, "grad_norm": 0.286814709919816, "learning_rate": 2.6539310585948635e-06, "loss": 0.01574913561344147, "step": 60370 }, { "epoch": 0.5682352941176471, "grad_norm": 0.6457957119335695, "learning_rate": 2.653821160873635e-06, "loss": 0.016945484280586242, "step": 60375 }, { "epoch": 0.5682823529411765, "grad_norm": 0.5349386996498687, "learning_rate": 2.653711276803675e-06, "loss": 0.015960460901260375, "step": 60380 }, { "epoch": 0.5683294117647059, "grad_norm": 0.5900530884452782, "learning_rate": 2.653601406382158e-06, "loss": 0.016328756511211396, "step": 60385 }, { "epoch": 0.5683764705882353, "grad_norm": 0.5764617909257491, "learning_rate": 2.6534915496062586e-06, "loss": 0.015937022864818573, "step": 60390 }, { "epoch": 0.5684235294117647, "grad_norm": 0.49120455267595586, "learning_rate": 2.6533817064731532e-06, "loss": 0.015575309097766877, "step": 60395 }, { "epoch": 0.5684705882352942, "grad_norm": 0.42782602343860554, "learning_rate": 2.6532718769800177e-06, "loss": 0.016106459498405456, "step": 60400 }, { "epoch": 0.5685176470588236, "grad_norm": 0.5983940534891726, "learning_rate": 2.6531620611240295e-06, "loss": 0.019730935990810394, "step": 60405 }, { "epoch": 0.568564705882353, "grad_norm": 0.4967910621191572, "learning_rate": 2.6530522589023665e-06, "loss": 0.016952598094940187, "step": 60410 }, { "epoch": 0.5686117647058824, "grad_norm": 0.5020214982825907, "learning_rate": 2.6529424703122085e-06, "loss": 0.014364387094974517, "step": 60415 }, { "epoch": 0.5686588235294118, "grad_norm": 0.36335231375553195, "learning_rate": 2.652832695350734e-06, "loss": 0.015550321340560913, "step": 60420 }, { "epoch": 0.5687058823529412, "grad_norm": 0.3893637917290299, "learning_rate": 2.6527229340151238e-06, "loss": 0.015874746441841125, "step": 60425 }, { "epoch": 0.5687529411764706, "grad_norm": 0.6865256639889958, "learning_rate": 2.6526131863025606e-06, "loss": 0.020853956043720246, "step": 60430 }, { "epoch": 0.5688, "grad_norm": 0.5602981269863467, "learning_rate": 2.652503452210225e-06, "loss": 0.015917034447193147, "step": 60435 }, { "epoch": 0.5688470588235294, "grad_norm": 0.4737271298482241, "learning_rate": 2.6523937317353004e-06, "loss": 0.016891974210739135, "step": 60440 }, { "epoch": 0.5688941176470588, "grad_norm": 0.49416866068320164, "learning_rate": 2.652284024874971e-06, "loss": 0.017538008093833924, "step": 60445 }, { "epoch": 0.5689411764705883, "grad_norm": 0.7575688357278898, "learning_rate": 2.6521743316264215e-06, "loss": 0.020741355419158936, "step": 60450 }, { "epoch": 0.5689882352941177, "grad_norm": 0.3702502289811366, "learning_rate": 2.6520646519868363e-06, "loss": 0.01719689667224884, "step": 60455 }, { "epoch": 0.5690352941176471, "grad_norm": 0.7512322509208423, "learning_rate": 2.651954985953403e-06, "loss": 0.018601781129837035, "step": 60460 }, { "epoch": 0.5690823529411765, "grad_norm": 0.5760779327237815, "learning_rate": 2.6518453335233086e-06, "loss": 0.016730500757694243, "step": 60465 }, { "epoch": 0.5691294117647059, "grad_norm": 0.5463654618897351, "learning_rate": 2.65173569469374e-06, "loss": 0.017052420973777772, "step": 60470 }, { "epoch": 0.5691764705882353, "grad_norm": 0.5702957471850052, "learning_rate": 2.651626069461887e-06, "loss": 0.02239612638950348, "step": 60475 }, { "epoch": 0.5692235294117647, "grad_norm": 0.49505729347591504, "learning_rate": 2.6515164578249375e-06, "loss": 0.01741306632757187, "step": 60480 }, { "epoch": 0.5692705882352941, "grad_norm": 0.6525575437295533, "learning_rate": 2.6514068597800836e-06, "loss": 0.017919336259365082, "step": 60485 }, { "epoch": 0.5693176470588235, "grad_norm": 0.44089694926745887, "learning_rate": 2.651297275324516e-06, "loss": 0.018858519196510316, "step": 60490 }, { "epoch": 0.569364705882353, "grad_norm": 0.5099330808386573, "learning_rate": 2.651187704455426e-06, "loss": 0.01471133977174759, "step": 60495 }, { "epoch": 0.5694117647058824, "grad_norm": 0.480120272277191, "learning_rate": 2.651078147170007e-06, "loss": 0.015246975421905517, "step": 60500 }, { "epoch": 0.5694588235294118, "grad_norm": 0.6231076806957999, "learning_rate": 2.650968603465452e-06, "loss": 0.018063348531723023, "step": 60505 }, { "epoch": 0.5695058823529412, "grad_norm": 0.4686064649891393, "learning_rate": 2.6508590733389555e-06, "loss": 0.014456284046173096, "step": 60510 }, { "epoch": 0.5695529411764706, "grad_norm": 0.4930161416613579, "learning_rate": 2.650749556787713e-06, "loss": 0.016990604996681213, "step": 60515 }, { "epoch": 0.5696, "grad_norm": 0.4842108693440284, "learning_rate": 2.6506400538089216e-06, "loss": 0.017540755867958068, "step": 60520 }, { "epoch": 0.5696470588235294, "grad_norm": 0.3964049456048851, "learning_rate": 2.650530564399776e-06, "loss": 0.016774851083755492, "step": 60525 }, { "epoch": 0.5696941176470588, "grad_norm": 0.49401916544912444, "learning_rate": 2.6504210885574755e-06, "loss": 0.0165309876203537, "step": 60530 }, { "epoch": 0.5697411764705882, "grad_norm": 0.5402576240658529, "learning_rate": 2.650311626279217e-06, "loss": 0.014871153235435485, "step": 60535 }, { "epoch": 0.5697882352941176, "grad_norm": 0.46545798662509325, "learning_rate": 2.650202177562202e-06, "loss": 0.015551352500915527, "step": 60540 }, { "epoch": 0.5698352941176471, "grad_norm": 0.44941438179453813, "learning_rate": 2.650092742403628e-06, "loss": 0.01905568391084671, "step": 60545 }, { "epoch": 0.5698823529411765, "grad_norm": 0.984023393994105, "learning_rate": 2.6499833208006974e-06, "loss": 0.023909184336662292, "step": 60550 }, { "epoch": 0.5699294117647059, "grad_norm": 0.6455085645779556, "learning_rate": 2.649873912750612e-06, "loss": 0.02030341327190399, "step": 60555 }, { "epoch": 0.5699764705882353, "grad_norm": 0.41503556217601056, "learning_rate": 2.649764518250574e-06, "loss": 0.017851704359054567, "step": 60560 }, { "epoch": 0.5700235294117647, "grad_norm": 0.5433579909331306, "learning_rate": 2.649655137297786e-06, "loss": 0.015563446283340453, "step": 60565 }, { "epoch": 0.5700705882352941, "grad_norm": 0.42053095589595174, "learning_rate": 2.649545769889453e-06, "loss": 0.014928141236305236, "step": 60570 }, { "epoch": 0.5701176470588235, "grad_norm": 0.49804619131688327, "learning_rate": 2.6494364160227792e-06, "loss": 0.01527797281742096, "step": 60575 }, { "epoch": 0.5701647058823529, "grad_norm": 0.6298356352585828, "learning_rate": 2.6493270756949713e-06, "loss": 0.01593015342950821, "step": 60580 }, { "epoch": 0.5702117647058823, "grad_norm": 0.49577836389579427, "learning_rate": 2.649217748903235e-06, "loss": 0.01874244660139084, "step": 60585 }, { "epoch": 0.5702588235294118, "grad_norm": 0.5296872983847032, "learning_rate": 2.6491084356447775e-06, "loss": 0.01702723503112793, "step": 60590 }, { "epoch": 0.5703058823529412, "grad_norm": 0.6647059647045162, "learning_rate": 2.648999135916808e-06, "loss": 0.015011388063430785, "step": 60595 }, { "epoch": 0.5703529411764706, "grad_norm": 1.2637467643687972, "learning_rate": 2.648889849716535e-06, "loss": 0.01639791429042816, "step": 60600 }, { "epoch": 0.5704, "grad_norm": 0.425310723852179, "learning_rate": 2.648780577041168e-06, "loss": 0.016288954019546508, "step": 60605 }, { "epoch": 0.5704470588235294, "grad_norm": 0.4616297851717294, "learning_rate": 2.648671317887917e-06, "loss": 0.019026324152946472, "step": 60610 }, { "epoch": 0.5704941176470588, "grad_norm": 0.5000458931535495, "learning_rate": 2.6485620722539947e-06, "loss": 0.013629132509231567, "step": 60615 }, { "epoch": 0.5705411764705882, "grad_norm": 0.7090722614489597, "learning_rate": 2.6484528401366126e-06, "loss": 0.016559234261512755, "step": 60620 }, { "epoch": 0.5705882352941176, "grad_norm": 0.4884490174144122, "learning_rate": 2.6483436215329835e-06, "loss": 0.01909712851047516, "step": 60625 }, { "epoch": 0.570635294117647, "grad_norm": 0.5414835303308451, "learning_rate": 2.648234416440321e-06, "loss": 0.023421651124954222, "step": 60630 }, { "epoch": 0.5706823529411764, "grad_norm": 0.38074446372977605, "learning_rate": 2.648125224855841e-06, "loss": 0.017871315777301788, "step": 60635 }, { "epoch": 0.5707294117647059, "grad_norm": 0.6092687626614115, "learning_rate": 2.648016046776757e-06, "loss": 0.015488731861114501, "step": 60640 }, { "epoch": 0.5707764705882353, "grad_norm": 0.5047031168062962, "learning_rate": 2.6479068822002867e-06, "loss": 0.02057269513607025, "step": 60645 }, { "epoch": 0.5708235294117647, "grad_norm": 0.3575273639938899, "learning_rate": 2.6477977311236463e-06, "loss": 0.021041598916053773, "step": 60650 }, { "epoch": 0.5708705882352941, "grad_norm": 0.6994152467814853, "learning_rate": 2.647688593544054e-06, "loss": 0.01665170192718506, "step": 60655 }, { "epoch": 0.5709176470588235, "grad_norm": 0.5646850878270369, "learning_rate": 2.6475794694587283e-06, "loss": 0.017713524401187897, "step": 60660 }, { "epoch": 0.5709647058823529, "grad_norm": 0.38916359342489165, "learning_rate": 2.6474703588648883e-06, "loss": 0.01449568271636963, "step": 60665 }, { "epoch": 0.5710117647058823, "grad_norm": 0.6747011448146675, "learning_rate": 2.647361261759755e-06, "loss": 0.014587496221065522, "step": 60670 }, { "epoch": 0.5710588235294117, "grad_norm": 0.6027763697568507, "learning_rate": 2.647252178140548e-06, "loss": 0.016440358757972718, "step": 60675 }, { "epoch": 0.5711058823529411, "grad_norm": 0.569686384312841, "learning_rate": 2.6471431080044907e-06, "loss": 0.015447801351547242, "step": 60680 }, { "epoch": 0.5711529411764706, "grad_norm": 0.5992747604887316, "learning_rate": 2.6470340513488046e-06, "loss": 0.015733282268047332, "step": 60685 }, { "epoch": 0.5712, "grad_norm": 0.5351760136420798, "learning_rate": 2.6469250081707137e-06, "loss": 0.01865345984697342, "step": 60690 }, { "epoch": 0.5712470588235294, "grad_norm": 0.49943976630324377, "learning_rate": 2.6468159784674423e-06, "loss": 0.022217999398708343, "step": 60695 }, { "epoch": 0.5712941176470588, "grad_norm": 0.5593340399464134, "learning_rate": 2.646706962236215e-06, "loss": 0.018469178676605226, "step": 60700 }, { "epoch": 0.5713411764705882, "grad_norm": 0.6128573359880826, "learning_rate": 2.646597959474258e-06, "loss": 0.021602186560630798, "step": 60705 }, { "epoch": 0.5713882352941176, "grad_norm": 0.6013497747296102, "learning_rate": 2.6464889701787976e-06, "loss": 0.01831279695034027, "step": 60710 }, { "epoch": 0.571435294117647, "grad_norm": 0.5295237661416692, "learning_rate": 2.6463799943470614e-06, "loss": 0.017729344964027404, "step": 60715 }, { "epoch": 0.5714823529411764, "grad_norm": 0.4715345095426066, "learning_rate": 2.646271031976277e-06, "loss": 0.021540552377700806, "step": 60720 }, { "epoch": 0.5715294117647058, "grad_norm": 0.49785592702969306, "learning_rate": 2.646162083063675e-06, "loss": 0.018960697948932646, "step": 60725 }, { "epoch": 0.5715764705882352, "grad_norm": 0.4147682133998821, "learning_rate": 2.6460531476064827e-06, "loss": 0.015582871437072755, "step": 60730 }, { "epoch": 0.5716235294117648, "grad_norm": 0.372555492691112, "learning_rate": 2.645944225601933e-06, "loss": 0.01621614396572113, "step": 60735 }, { "epoch": 0.5716705882352942, "grad_norm": 0.31313110730870847, "learning_rate": 2.6458353170472566e-06, "loss": 0.011930607259273529, "step": 60740 }, { "epoch": 0.5717176470588236, "grad_norm": 0.6457808436536863, "learning_rate": 2.6457264219396856e-06, "loss": 0.019057834148406984, "step": 60745 }, { "epoch": 0.571764705882353, "grad_norm": 0.5471296164621615, "learning_rate": 2.6456175402764527e-06, "loss": 0.01671047955751419, "step": 60750 }, { "epoch": 0.5718117647058824, "grad_norm": 0.480775343992932, "learning_rate": 2.6455086720547925e-06, "loss": 0.016825750470161438, "step": 60755 }, { "epoch": 0.5718588235294118, "grad_norm": 0.41770993042776255, "learning_rate": 2.645399817271938e-06, "loss": 0.014989304542541503, "step": 60760 }, { "epoch": 0.5719058823529412, "grad_norm": 0.5263039284202502, "learning_rate": 2.6452909759251267e-06, "loss": 0.015221357345581055, "step": 60765 }, { "epoch": 0.5719529411764706, "grad_norm": 0.43792460405448125, "learning_rate": 2.6451821480115935e-06, "loss": 0.018572813272476195, "step": 60770 }, { "epoch": 0.572, "grad_norm": 0.48445722378293027, "learning_rate": 2.6450733335285757e-06, "loss": 0.013508163392543793, "step": 60775 }, { "epoch": 0.5720470588235295, "grad_norm": 0.6987990466792886, "learning_rate": 2.6449645324733108e-06, "loss": 0.016723097860813142, "step": 60780 }, { "epoch": 0.5720941176470589, "grad_norm": 0.6044284887160474, "learning_rate": 2.6448557448430373e-06, "loss": 0.01770144999027252, "step": 60785 }, { "epoch": 0.5721411764705883, "grad_norm": 0.39361471224979117, "learning_rate": 2.6447469706349954e-06, "loss": 0.010652062296867371, "step": 60790 }, { "epoch": 0.5721882352941177, "grad_norm": 0.5436470762584283, "learning_rate": 2.6446382098464253e-06, "loss": 0.020462694764137267, "step": 60795 }, { "epoch": 0.5722352941176471, "grad_norm": 1.891307258448272, "learning_rate": 2.6445294624745665e-06, "loss": 0.021100744605064392, "step": 60800 }, { "epoch": 0.5722823529411765, "grad_norm": 0.5905149888106215, "learning_rate": 2.6444207285166616e-06, "loss": 0.014679089188575745, "step": 60805 }, { "epoch": 0.5723294117647059, "grad_norm": 0.6249049801599995, "learning_rate": 2.6443120079699534e-06, "loss": 0.015100950002670288, "step": 60810 }, { "epoch": 0.5723764705882353, "grad_norm": 0.5747179715822307, "learning_rate": 2.6442033008316857e-06, "loss": 0.019214586913585664, "step": 60815 }, { "epoch": 0.5724235294117647, "grad_norm": 0.5414246921479913, "learning_rate": 2.6440946070991014e-06, "loss": 0.017906609177589416, "step": 60820 }, { "epoch": 0.5724705882352941, "grad_norm": 0.5970778246653938, "learning_rate": 2.6439859267694464e-06, "loss": 0.020221617817878724, "step": 60825 }, { "epoch": 0.5725176470588236, "grad_norm": 0.7090965459728037, "learning_rate": 2.6438772598399657e-06, "loss": 0.02059832513332367, "step": 60830 }, { "epoch": 0.572564705882353, "grad_norm": 0.5529495769248239, "learning_rate": 2.643768606307906e-06, "loss": 0.020268279314041137, "step": 60835 }, { "epoch": 0.5726117647058824, "grad_norm": 0.29141609920463696, "learning_rate": 2.643659966170515e-06, "loss": 0.016774082183837892, "step": 60840 }, { "epoch": 0.5726588235294118, "grad_norm": 0.574273433298266, "learning_rate": 2.6435513394250405e-06, "loss": 0.016528716683387755, "step": 60845 }, { "epoch": 0.5727058823529412, "grad_norm": 0.375430508517794, "learning_rate": 2.6434427260687314e-06, "loss": 0.01395919919013977, "step": 60850 }, { "epoch": 0.5727529411764706, "grad_norm": 0.5799989621356402, "learning_rate": 2.643334126098838e-06, "loss": 0.014745205640792847, "step": 60855 }, { "epoch": 0.5728, "grad_norm": 0.702174885010862, "learning_rate": 2.6432255395126095e-06, "loss": 0.018112070858478546, "step": 60860 }, { "epoch": 0.5728470588235294, "grad_norm": 1.9114081466966306, "learning_rate": 2.643116966307298e-06, "loss": 0.020644763112068178, "step": 60865 }, { "epoch": 0.5728941176470588, "grad_norm": 0.6497473778856626, "learning_rate": 2.643008406480155e-06, "loss": 0.020643675327301027, "step": 60870 }, { "epoch": 0.5729411764705883, "grad_norm": 0.5169156156365877, "learning_rate": 2.642899860028435e-06, "loss": 0.01581232249736786, "step": 60875 }, { "epoch": 0.5729882352941177, "grad_norm": 0.6060873368355797, "learning_rate": 2.6427913269493893e-06, "loss": 0.01587219536304474, "step": 60880 }, { "epoch": 0.5730352941176471, "grad_norm": 0.5590429440195969, "learning_rate": 2.6426828072402737e-06, "loss": 0.01867508888244629, "step": 60885 }, { "epoch": 0.5730823529411765, "grad_norm": 0.5774711168080597, "learning_rate": 2.642574300898343e-06, "loss": 0.02508549690246582, "step": 60890 }, { "epoch": 0.5731294117647059, "grad_norm": 0.34095496380697854, "learning_rate": 2.642465807920854e-06, "loss": 0.013050885498523712, "step": 60895 }, { "epoch": 0.5731764705882353, "grad_norm": 0.5224691194273688, "learning_rate": 2.642357328305061e-06, "loss": 0.023742836713790894, "step": 60900 }, { "epoch": 0.5732235294117647, "grad_norm": 0.6834101048182939, "learning_rate": 2.642248862048225e-06, "loss": 0.01694242060184479, "step": 60905 }, { "epoch": 0.5732705882352941, "grad_norm": 0.4457110237775505, "learning_rate": 2.6421404091476023e-06, "loss": 0.0149541437625885, "step": 60910 }, { "epoch": 0.5733176470588235, "grad_norm": 0.45787011218874535, "learning_rate": 2.6420319696004524e-06, "loss": 0.02105156481266022, "step": 60915 }, { "epoch": 0.5733647058823529, "grad_norm": 0.38541528971687267, "learning_rate": 2.6419235434040356e-06, "loss": 0.014633195102214813, "step": 60920 }, { "epoch": 0.5734117647058824, "grad_norm": 0.6636075600332306, "learning_rate": 2.6418151305556124e-06, "loss": 0.016297681629657744, "step": 60925 }, { "epoch": 0.5734588235294118, "grad_norm": 0.4449217784553049, "learning_rate": 2.6417067310524436e-06, "loss": 0.016041994094848633, "step": 60930 }, { "epoch": 0.5735058823529412, "grad_norm": 0.5671369629938333, "learning_rate": 2.6415983448917925e-06, "loss": 0.015696072578430177, "step": 60935 }, { "epoch": 0.5735529411764706, "grad_norm": 0.35488217497334235, "learning_rate": 2.641489972070922e-06, "loss": 0.01459847092628479, "step": 60940 }, { "epoch": 0.5736, "grad_norm": 0.3121240751772449, "learning_rate": 2.6413816125870955e-06, "loss": 0.01670767068862915, "step": 60945 }, { "epoch": 0.5736470588235294, "grad_norm": 0.5469038888492825, "learning_rate": 2.6412732664375783e-06, "loss": 0.017533227801322937, "step": 60950 }, { "epoch": 0.5736941176470588, "grad_norm": 0.4257436925642369, "learning_rate": 2.6411649336196355e-06, "loss": 0.012528136372566223, "step": 60955 }, { "epoch": 0.5737411764705882, "grad_norm": 0.5508833226133086, "learning_rate": 2.6410566141305332e-06, "loss": 0.014426806569099426, "step": 60960 }, { "epoch": 0.5737882352941176, "grad_norm": 0.6128916050006461, "learning_rate": 2.6409483079675386e-06, "loss": 0.021880245208740233, "step": 60965 }, { "epoch": 0.5738352941176471, "grad_norm": 0.546671222058678, "learning_rate": 2.6408400151279195e-06, "loss": 0.014709611237049103, "step": 60970 }, { "epoch": 0.5738823529411765, "grad_norm": 0.5454044741723675, "learning_rate": 2.640731735608944e-06, "loss": 0.015893122553825377, "step": 60975 }, { "epoch": 0.5739294117647059, "grad_norm": 0.41853254976990784, "learning_rate": 2.6406234694078824e-06, "loss": 0.018721354007720948, "step": 60980 }, { "epoch": 0.5739764705882353, "grad_norm": 0.6260377325033882, "learning_rate": 2.6405152165220036e-06, "loss": 0.015872150659561157, "step": 60985 }, { "epoch": 0.5740235294117647, "grad_norm": 0.4025916905988253, "learning_rate": 2.64040697694858e-06, "loss": 0.018345220386981963, "step": 60990 }, { "epoch": 0.5740705882352941, "grad_norm": 0.5008492410985481, "learning_rate": 2.6402987506848827e-06, "loss": 0.01842656433582306, "step": 60995 }, { "epoch": 0.5741176470588235, "grad_norm": 0.4748558288597575, "learning_rate": 2.640190537728184e-06, "loss": 0.019197434186935425, "step": 61000 }, { "epoch": 0.5741647058823529, "grad_norm": 0.8957950405938903, "learning_rate": 2.640082338075757e-06, "loss": 0.015124453604221344, "step": 61005 }, { "epoch": 0.5742117647058823, "grad_norm": 0.3455239318892615, "learning_rate": 2.6399741517248756e-06, "loss": 0.01193142831325531, "step": 61010 }, { "epoch": 0.5742588235294117, "grad_norm": 0.4816903131578513, "learning_rate": 2.639865978672815e-06, "loss": 0.016535376012325288, "step": 61015 }, { "epoch": 0.5743058823529412, "grad_norm": 0.3917137823143494, "learning_rate": 2.6397578189168515e-06, "loss": 0.01527264267206192, "step": 61020 }, { "epoch": 0.5743529411764706, "grad_norm": 0.4331840658141117, "learning_rate": 2.6396496724542607e-06, "loss": 0.016664758324623108, "step": 61025 }, { "epoch": 0.5744, "grad_norm": 0.4237661633944376, "learning_rate": 2.6395415392823202e-06, "loss": 0.015349563956260682, "step": 61030 }, { "epoch": 0.5744470588235294, "grad_norm": 0.7457385203262947, "learning_rate": 2.639433419398308e-06, "loss": 0.01559184044599533, "step": 61035 }, { "epoch": 0.5744941176470588, "grad_norm": 0.5210521421428582, "learning_rate": 2.6393253127995015e-06, "loss": 0.018974843621253967, "step": 61040 }, { "epoch": 0.5745411764705882, "grad_norm": 0.521876712806327, "learning_rate": 2.6392172194831823e-06, "loss": 0.016382841765880583, "step": 61045 }, { "epoch": 0.5745882352941176, "grad_norm": 0.553751209624299, "learning_rate": 2.6391091394466295e-06, "loss": 0.015789711475372316, "step": 61050 }, { "epoch": 0.574635294117647, "grad_norm": 0.36845112594982854, "learning_rate": 2.6390010726871247e-06, "loss": 0.016777832806110383, "step": 61055 }, { "epoch": 0.5746823529411764, "grad_norm": 0.4308850946078198, "learning_rate": 2.6388930192019484e-06, "loss": 0.01731317937374115, "step": 61060 }, { "epoch": 0.5747294117647059, "grad_norm": 0.5735547827542948, "learning_rate": 2.6387849789883854e-06, "loss": 0.020222195982933046, "step": 61065 }, { "epoch": 0.5747764705882353, "grad_norm": 0.37887416685780045, "learning_rate": 2.638676952043717e-06, "loss": 0.01890830248594284, "step": 61070 }, { "epoch": 0.5748235294117647, "grad_norm": 0.4807505302698495, "learning_rate": 2.63856893836523e-06, "loss": 0.017572493851184846, "step": 61075 }, { "epoch": 0.5748705882352941, "grad_norm": 0.5064171821195776, "learning_rate": 2.638460937950206e-06, "loss": 0.0185547411441803, "step": 61080 }, { "epoch": 0.5749176470588235, "grad_norm": 0.5617931887104751, "learning_rate": 2.638352950795934e-06, "loss": 0.01604672372341156, "step": 61085 }, { "epoch": 0.5749647058823529, "grad_norm": 0.6261059535210663, "learning_rate": 2.6382449768996986e-06, "loss": 0.01407395899295807, "step": 61090 }, { "epoch": 0.5750117647058823, "grad_norm": 0.5433275534235539, "learning_rate": 2.638137016258788e-06, "loss": 0.018248939514160158, "step": 61095 }, { "epoch": 0.5750588235294117, "grad_norm": 0.5552220826787833, "learning_rate": 2.638029068870489e-06, "loss": 0.01892240345478058, "step": 61100 }, { "epoch": 0.5751058823529411, "grad_norm": 0.4013521646914141, "learning_rate": 2.6379211347320923e-06, "loss": 0.015818409621715546, "step": 61105 }, { "epoch": 0.5751529411764705, "grad_norm": 0.8041795877657341, "learning_rate": 2.6378132138408864e-06, "loss": 0.01692927032709122, "step": 61110 }, { "epoch": 0.5752, "grad_norm": 0.6398325774457382, "learning_rate": 2.6377053061941617e-06, "loss": 0.022228136658668518, "step": 61115 }, { "epoch": 0.5752470588235294, "grad_norm": 0.5195905692146698, "learning_rate": 2.6375974117892102e-06, "loss": 0.01545228660106659, "step": 61120 }, { "epoch": 0.5752941176470588, "grad_norm": 0.3653836653571167, "learning_rate": 2.6374895306233227e-06, "loss": 0.011747747659683228, "step": 61125 }, { "epoch": 0.5753411764705882, "grad_norm": 0.4892344665646547, "learning_rate": 2.637381662693793e-06, "loss": 0.01317930519580841, "step": 61130 }, { "epoch": 0.5753882352941176, "grad_norm": 0.560153254468278, "learning_rate": 2.637273807997914e-06, "loss": 0.01614242047071457, "step": 61135 }, { "epoch": 0.575435294117647, "grad_norm": 0.7082616429966091, "learning_rate": 2.63716596653298e-06, "loss": 0.018824896216392516, "step": 61140 }, { "epoch": 0.5754823529411764, "grad_norm": 0.68016069073914, "learning_rate": 2.6370581382962867e-06, "loss": 0.014343979954719543, "step": 61145 }, { "epoch": 0.5755294117647058, "grad_norm": 0.7061531174167455, "learning_rate": 2.6369503232851297e-06, "loss": 0.013958168029785157, "step": 61150 }, { "epoch": 0.5755764705882352, "grad_norm": 0.5072966832031479, "learning_rate": 2.6368425214968046e-06, "loss": 0.01721411645412445, "step": 61155 }, { "epoch": 0.5756235294117648, "grad_norm": 0.6954200863981842, "learning_rate": 2.636734732928611e-06, "loss": 0.01990426778793335, "step": 61160 }, { "epoch": 0.5756705882352942, "grad_norm": 0.6678530581056262, "learning_rate": 2.6366269575778443e-06, "loss": 0.021541975438594818, "step": 61165 }, { "epoch": 0.5757176470588236, "grad_norm": 0.42714999758270455, "learning_rate": 2.636519195441805e-06, "loss": 0.015350493788719177, "step": 61170 }, { "epoch": 0.575764705882353, "grad_norm": 0.6739552247005712, "learning_rate": 2.636411446517793e-06, "loss": 0.018313363194465637, "step": 61175 }, { "epoch": 0.5758117647058824, "grad_norm": 0.5382660694491094, "learning_rate": 2.6363037108031085e-06, "loss": 0.015796647965908052, "step": 61180 }, { "epoch": 0.5758588235294118, "grad_norm": 0.5054168246446398, "learning_rate": 2.6361959882950526e-06, "loss": 0.01762554943561554, "step": 61185 }, { "epoch": 0.5759058823529412, "grad_norm": 0.6000073048153611, "learning_rate": 2.6360882789909274e-06, "loss": 0.014658543467521667, "step": 61190 }, { "epoch": 0.5759529411764706, "grad_norm": 0.41319364676220005, "learning_rate": 2.635980582888036e-06, "loss": 0.0155903160572052, "step": 61195 }, { "epoch": 0.576, "grad_norm": 0.3485832990379571, "learning_rate": 2.6358728999836806e-06, "loss": 0.01895093321800232, "step": 61200 }, { "epoch": 0.5760470588235294, "grad_norm": 0.652936845489211, "learning_rate": 2.635765230275168e-06, "loss": 0.019728586077690125, "step": 61205 }, { "epoch": 0.5760941176470589, "grad_norm": 0.44286027085808477, "learning_rate": 2.635657573759801e-06, "loss": 0.01625853180885315, "step": 61210 }, { "epoch": 0.5761411764705883, "grad_norm": 0.4474507169752895, "learning_rate": 2.635549930434887e-06, "loss": 0.016413141787052155, "step": 61215 }, { "epoch": 0.5761882352941177, "grad_norm": 0.33014825540609993, "learning_rate": 2.635442300297732e-06, "loss": 0.011380937695503236, "step": 61220 }, { "epoch": 0.5762352941176471, "grad_norm": 0.4281685017082177, "learning_rate": 2.6353346833456434e-06, "loss": 0.014718246459960938, "step": 61225 }, { "epoch": 0.5762823529411765, "grad_norm": 0.5174800308776086, "learning_rate": 2.635227079575929e-06, "loss": 0.015309298038482666, "step": 61230 }, { "epoch": 0.5763294117647059, "grad_norm": 0.6703090199587548, "learning_rate": 2.635119488985899e-06, "loss": 0.016445261240005494, "step": 61235 }, { "epoch": 0.5763764705882353, "grad_norm": 0.4432288156466579, "learning_rate": 2.6350119115728614e-06, "loss": 0.01474866271018982, "step": 61240 }, { "epoch": 0.5764235294117647, "grad_norm": 0.4236060749851148, "learning_rate": 2.6349043473341284e-06, "loss": 0.01787394881248474, "step": 61245 }, { "epoch": 0.5764705882352941, "grad_norm": 0.5281915698674425, "learning_rate": 2.63479679626701e-06, "loss": 0.019050900638103486, "step": 61250 }, { "epoch": 0.5765176470588236, "grad_norm": 0.6090867745624159, "learning_rate": 2.6346892583688195e-06, "loss": 0.013586637377738953, "step": 61255 }, { "epoch": 0.576564705882353, "grad_norm": 0.5950973118283546, "learning_rate": 2.6345817336368683e-06, "loss": 0.01935060918331146, "step": 61260 }, { "epoch": 0.5766117647058824, "grad_norm": 0.6079018564884955, "learning_rate": 2.634474222068471e-06, "loss": 0.02131510674953461, "step": 61265 }, { "epoch": 0.5766588235294118, "grad_norm": 0.3963623284642553, "learning_rate": 2.6343667236609417e-06, "loss": 0.01836049258708954, "step": 61270 }, { "epoch": 0.5767058823529412, "grad_norm": 0.5583600162095279, "learning_rate": 2.634259238411595e-06, "loss": 0.017036020755767822, "step": 61275 }, { "epoch": 0.5767529411764706, "grad_norm": 0.3603885075887726, "learning_rate": 2.6341517663177477e-06, "loss": 0.017519840598106386, "step": 61280 }, { "epoch": 0.5768, "grad_norm": 0.5973264392138233, "learning_rate": 2.6340443073767158e-06, "loss": 0.020954805612564086, "step": 61285 }, { "epoch": 0.5768470588235294, "grad_norm": 0.5991545262482352, "learning_rate": 2.633936861585816e-06, "loss": 0.018208611011505126, "step": 61290 }, { "epoch": 0.5768941176470588, "grad_norm": 0.45209006268414037, "learning_rate": 2.6338294289423683e-06, "loss": 0.018450886011123657, "step": 61295 }, { "epoch": 0.5769411764705883, "grad_norm": 0.44033167711366494, "learning_rate": 2.6337220094436907e-06, "loss": 0.015290135145187378, "step": 61300 }, { "epoch": 0.5769882352941177, "grad_norm": 0.5896807044706188, "learning_rate": 2.633614603087102e-06, "loss": 0.015880271792411804, "step": 61305 }, { "epoch": 0.5770352941176471, "grad_norm": 0.5816274219120636, "learning_rate": 2.6335072098699237e-06, "loss": 0.015631125867366792, "step": 61310 }, { "epoch": 0.5770823529411765, "grad_norm": 0.40060949055381123, "learning_rate": 2.633399829789477e-06, "loss": 0.013173645734786988, "step": 61315 }, { "epoch": 0.5771294117647059, "grad_norm": 0.4735887373931181, "learning_rate": 2.6332924628430835e-06, "loss": 0.019363605976104738, "step": 61320 }, { "epoch": 0.5771764705882353, "grad_norm": 0.4276986890324653, "learning_rate": 2.633185109028066e-06, "loss": 0.018548299372196198, "step": 61325 }, { "epoch": 0.5772235294117647, "grad_norm": 0.44930740948591286, "learning_rate": 2.633077768341749e-06, "loss": 0.013047698140144347, "step": 61330 }, { "epoch": 0.5772705882352941, "grad_norm": 0.518317177972649, "learning_rate": 2.6329704407814556e-06, "loss": 0.016985908150672913, "step": 61335 }, { "epoch": 0.5773176470588235, "grad_norm": 0.47237306739315693, "learning_rate": 2.632863126344511e-06, "loss": 0.015179164707660675, "step": 61340 }, { "epoch": 0.5773647058823529, "grad_norm": 0.5163408998477291, "learning_rate": 2.6327558250282415e-06, "loss": 0.017899540066719056, "step": 61345 }, { "epoch": 0.5774117647058824, "grad_norm": 0.37928388417670034, "learning_rate": 2.6326485368299736e-06, "loss": 0.02092098891735077, "step": 61350 }, { "epoch": 0.5774588235294118, "grad_norm": 0.4956085187531058, "learning_rate": 2.632541261747034e-06, "loss": 0.014827948808670045, "step": 61355 }, { "epoch": 0.5775058823529412, "grad_norm": 0.5998409683044352, "learning_rate": 2.632433999776752e-06, "loss": 0.01594987213611603, "step": 61360 }, { "epoch": 0.5775529411764706, "grad_norm": 0.564549992889416, "learning_rate": 2.6323267509164554e-06, "loss": 0.019471648335456847, "step": 61365 }, { "epoch": 0.5776, "grad_norm": 0.50799592621014, "learning_rate": 2.6322195151634737e-06, "loss": 0.020613199472427367, "step": 61370 }, { "epoch": 0.5776470588235294, "grad_norm": 0.4568416491148954, "learning_rate": 2.6321122925151378e-06, "loss": 0.011361242830753326, "step": 61375 }, { "epoch": 0.5776941176470588, "grad_norm": 0.5553698963268912, "learning_rate": 2.6320050829687795e-06, "loss": 0.0172274112701416, "step": 61380 }, { "epoch": 0.5777411764705882, "grad_norm": 0.5394515204328229, "learning_rate": 2.6318978865217294e-06, "loss": 0.017194192111492156, "step": 61385 }, { "epoch": 0.5777882352941176, "grad_norm": 0.5718247916402626, "learning_rate": 2.6317907031713207e-06, "loss": 0.018772217631340026, "step": 61390 }, { "epoch": 0.5778352941176471, "grad_norm": 0.34575142326222924, "learning_rate": 2.6316835329148876e-06, "loss": 0.013291166722774505, "step": 61395 }, { "epoch": 0.5778823529411765, "grad_norm": 0.423841264740216, "learning_rate": 2.631576375749763e-06, "loss": 0.016103702783584594, "step": 61400 }, { "epoch": 0.5779294117647059, "grad_norm": 0.6015426833994848, "learning_rate": 2.631469231673283e-06, "loss": 0.019194343686103822, "step": 61405 }, { "epoch": 0.5779764705882353, "grad_norm": 0.42420127010340564, "learning_rate": 2.6313621006827823e-06, "loss": 0.019230866432189943, "step": 61410 }, { "epoch": 0.5780235294117647, "grad_norm": 0.6829388927211202, "learning_rate": 2.631254982775598e-06, "loss": 0.019831378757953644, "step": 61415 }, { "epoch": 0.5780705882352941, "grad_norm": 0.629566619297375, "learning_rate": 2.6311478779490674e-06, "loss": 0.021780280768871306, "step": 61420 }, { "epoch": 0.5781176470588235, "grad_norm": 0.4517422555625738, "learning_rate": 2.6310407862005277e-06, "loss": 0.01634240448474884, "step": 61425 }, { "epoch": 0.5781647058823529, "grad_norm": 0.44047260017475065, "learning_rate": 2.6309337075273187e-06, "loss": 0.01805929243564606, "step": 61430 }, { "epoch": 0.5782117647058823, "grad_norm": 0.4023045379259356, "learning_rate": 2.6308266419267792e-06, "loss": 0.015693068504333496, "step": 61435 }, { "epoch": 0.5782588235294117, "grad_norm": 0.4065033896665217, "learning_rate": 2.6307195893962495e-06, "loss": 0.017131665349006654, "step": 61440 }, { "epoch": 0.5783058823529412, "grad_norm": 0.45073237060909477, "learning_rate": 2.630612549933071e-06, "loss": 0.013273926079273224, "step": 61445 }, { "epoch": 0.5783529411764706, "grad_norm": 0.6621347359957568, "learning_rate": 2.630505523534585e-06, "loss": 0.015362969040870667, "step": 61450 }, { "epoch": 0.5784, "grad_norm": 0.65143467322392, "learning_rate": 2.630398510198135e-06, "loss": 0.017877762019634248, "step": 61455 }, { "epoch": 0.5784470588235294, "grad_norm": 0.49741364325341986, "learning_rate": 2.630291509921063e-06, "loss": 0.013491418957710267, "step": 61460 }, { "epoch": 0.5784941176470588, "grad_norm": 0.7113827916783391, "learning_rate": 2.6301845227007133e-06, "loss": 0.02013865262269974, "step": 61465 }, { "epoch": 0.5785411764705882, "grad_norm": 0.46175651217977964, "learning_rate": 2.630077548534432e-06, "loss": 0.014484062790870667, "step": 61470 }, { "epoch": 0.5785882352941176, "grad_norm": 0.39564129731601827, "learning_rate": 2.629970587419563e-06, "loss": 0.013819938898086548, "step": 61475 }, { "epoch": 0.578635294117647, "grad_norm": 0.27234559908703104, "learning_rate": 2.6298636393534537e-06, "loss": 0.01942528635263443, "step": 61480 }, { "epoch": 0.5786823529411764, "grad_norm": 0.46729349422578204, "learning_rate": 2.62975670433345e-06, "loss": 0.018234674632549287, "step": 61485 }, { "epoch": 0.5787294117647059, "grad_norm": 0.5893985515326218, "learning_rate": 2.6296497823569013e-06, "loss": 0.021762388944625854, "step": 61490 }, { "epoch": 0.5787764705882353, "grad_norm": 0.4226325737610404, "learning_rate": 2.629542873421155e-06, "loss": 0.011521910130977631, "step": 61495 }, { "epoch": 0.5788235294117647, "grad_norm": 0.4240820290162518, "learning_rate": 2.6294359775235605e-06, "loss": 0.020612657070159912, "step": 61500 }, { "epoch": 0.5788705882352941, "grad_norm": 0.6541837158038016, "learning_rate": 2.629329094661469e-06, "loss": 0.019246366620063782, "step": 61505 }, { "epoch": 0.5789176470588235, "grad_norm": 0.530775402371585, "learning_rate": 2.6292222248322297e-06, "loss": 0.017657288908958436, "step": 61510 }, { "epoch": 0.5789647058823529, "grad_norm": 0.41710427844762077, "learning_rate": 2.629115368033196e-06, "loss": 0.014439348876476289, "step": 61515 }, { "epoch": 0.5790117647058823, "grad_norm": 0.47087217219168614, "learning_rate": 2.6290085242617185e-06, "loss": 0.02259867638349533, "step": 61520 }, { "epoch": 0.5790588235294117, "grad_norm": 0.837947928747958, "learning_rate": 2.628901693515151e-06, "loss": 0.021142578125, "step": 61525 }, { "epoch": 0.5791058823529411, "grad_norm": 0.45605002328307004, "learning_rate": 2.628794875790848e-06, "loss": 0.016379478573799133, "step": 61530 }, { "epoch": 0.5791529411764705, "grad_norm": 0.5779891814264636, "learning_rate": 2.6286880710861635e-06, "loss": 0.017325937747955322, "step": 61535 }, { "epoch": 0.5792, "grad_norm": 0.6016226312160393, "learning_rate": 2.628581279398452e-06, "loss": 0.018373697996139526, "step": 61540 }, { "epoch": 0.5792470588235294, "grad_norm": 0.48950388195399364, "learning_rate": 2.628474500725072e-06, "loss": 0.01586277186870575, "step": 61545 }, { "epoch": 0.5792941176470588, "grad_norm": 0.3436558482611121, "learning_rate": 2.628367735063377e-06, "loss": 0.01784059703350067, "step": 61550 }, { "epoch": 0.5793411764705882, "grad_norm": 0.568739480409249, "learning_rate": 2.6282609824107284e-06, "loss": 0.018502850830554963, "step": 61555 }, { "epoch": 0.5793882352941176, "grad_norm": 0.4456782498590945, "learning_rate": 2.628154242764482e-06, "loss": 0.0160180926322937, "step": 61560 }, { "epoch": 0.579435294117647, "grad_norm": 0.5919466717052423, "learning_rate": 2.628047516121997e-06, "loss": 0.022165167331695556, "step": 61565 }, { "epoch": 0.5794823529411764, "grad_norm": 0.48374486113990456, "learning_rate": 2.6279408024806343e-06, "loss": 0.02055642902851105, "step": 61570 }, { "epoch": 0.5795294117647058, "grad_norm": 0.5968762127033592, "learning_rate": 2.627834101837754e-06, "loss": 0.01908942461013794, "step": 61575 }, { "epoch": 0.5795764705882352, "grad_norm": 0.6124201916519099, "learning_rate": 2.627727414190718e-06, "loss": 0.014667361974716187, "step": 61580 }, { "epoch": 0.5796235294117648, "grad_norm": 0.4910313500504982, "learning_rate": 2.6276207395368868e-06, "loss": 0.019538679718971254, "step": 61585 }, { "epoch": 0.5796705882352942, "grad_norm": 0.425357020348683, "learning_rate": 2.627514077873625e-06, "loss": 0.01803102046251297, "step": 61590 }, { "epoch": 0.5797176470588236, "grad_norm": 0.4392070499540625, "learning_rate": 2.627407429198296e-06, "loss": 0.016934344172477724, "step": 61595 }, { "epoch": 0.579764705882353, "grad_norm": 0.4024643905884801, "learning_rate": 2.6273007935082634e-06, "loss": 0.01651638150215149, "step": 61600 }, { "epoch": 0.5798117647058824, "grad_norm": 0.76917150174668, "learning_rate": 2.6271941708008924e-06, "loss": 0.020161442458629608, "step": 61605 }, { "epoch": 0.5798588235294118, "grad_norm": 0.5947990482559165, "learning_rate": 2.62708756107355e-06, "loss": 0.014821664988994598, "step": 61610 }, { "epoch": 0.5799058823529412, "grad_norm": 0.3623725195155539, "learning_rate": 2.6269809643236017e-06, "loss": 0.01420479416847229, "step": 61615 }, { "epoch": 0.5799529411764706, "grad_norm": 0.4441868132502984, "learning_rate": 2.626874380548415e-06, "loss": 0.01413799673318863, "step": 61620 }, { "epoch": 0.58, "grad_norm": 0.7551463781302927, "learning_rate": 2.6267678097453577e-06, "loss": 0.022269690036773683, "step": 61625 }, { "epoch": 0.5800470588235294, "grad_norm": 0.4290337926585068, "learning_rate": 2.6266612519117995e-06, "loss": 0.0138186976313591, "step": 61630 }, { "epoch": 0.5800941176470589, "grad_norm": 0.5326928621491058, "learning_rate": 2.6265547070451097e-06, "loss": 0.014515919983386994, "step": 61635 }, { "epoch": 0.5801411764705883, "grad_norm": 0.32297778572767966, "learning_rate": 2.6264481751426585e-06, "loss": 0.015157821774482726, "step": 61640 }, { "epoch": 0.5801882352941177, "grad_norm": 1.2284902759278102, "learning_rate": 2.6263416562018167e-06, "loss": 0.018036291003227234, "step": 61645 }, { "epoch": 0.5802352941176471, "grad_norm": 0.38281053787224756, "learning_rate": 2.626235150219957e-06, "loss": 0.016015855967998503, "step": 61650 }, { "epoch": 0.5802823529411765, "grad_norm": 0.6996841149250234, "learning_rate": 2.626128657194451e-06, "loss": 0.016596853733062744, "step": 61655 }, { "epoch": 0.5803294117647059, "grad_norm": 0.43630383302008763, "learning_rate": 2.626022177122673e-06, "loss": 0.01654333621263504, "step": 61660 }, { "epoch": 0.5803764705882353, "grad_norm": 0.8532039831800183, "learning_rate": 2.6259157100019966e-06, "loss": 0.016137295961380006, "step": 61665 }, { "epoch": 0.5804235294117647, "grad_norm": 0.8981288631110462, "learning_rate": 2.625809255829796e-06, "loss": 0.016713225841522218, "step": 61670 }, { "epoch": 0.5804705882352941, "grad_norm": 0.4446870583157058, "learning_rate": 2.625702814603448e-06, "loss": 0.016046217083930968, "step": 61675 }, { "epoch": 0.5805176470588236, "grad_norm": 0.34543674604724584, "learning_rate": 2.6255963863203277e-06, "loss": 0.011802671849727631, "step": 61680 }, { "epoch": 0.580564705882353, "grad_norm": 0.46777019156850097, "learning_rate": 2.625489970977813e-06, "loss": 0.01967293322086334, "step": 61685 }, { "epoch": 0.5806117647058824, "grad_norm": 0.4655483347666518, "learning_rate": 2.625383568573282e-06, "loss": 0.02395203560590744, "step": 61690 }, { "epoch": 0.5806588235294118, "grad_norm": 0.41478620704984964, "learning_rate": 2.625277179104112e-06, "loss": 0.01451086550951004, "step": 61695 }, { "epoch": 0.5807058823529412, "grad_norm": 0.3995014255787932, "learning_rate": 2.6251708025676832e-06, "loss": 0.017139627039432524, "step": 61700 }, { "epoch": 0.5807529411764706, "grad_norm": 0.6409095211008463, "learning_rate": 2.625064438961375e-06, "loss": 0.017004646360874176, "step": 61705 }, { "epoch": 0.5808, "grad_norm": 0.3550314027155615, "learning_rate": 2.624958088282569e-06, "loss": 0.01509324014186859, "step": 61710 }, { "epoch": 0.5808470588235294, "grad_norm": 0.4485044881917624, "learning_rate": 2.6248517505286465e-06, "loss": 0.01687575876712799, "step": 61715 }, { "epoch": 0.5808941176470588, "grad_norm": 0.49322331714967127, "learning_rate": 2.624745425696989e-06, "loss": 0.015750154852867126, "step": 61720 }, { "epoch": 0.5809411764705882, "grad_norm": 0.7587107715106944, "learning_rate": 2.6246391137849803e-06, "loss": 0.01691909283399582, "step": 61725 }, { "epoch": 0.5809882352941177, "grad_norm": 0.4928547747018869, "learning_rate": 2.624532814790004e-06, "loss": 0.01714244782924652, "step": 61730 }, { "epoch": 0.5810352941176471, "grad_norm": 0.49438438662789075, "learning_rate": 2.6244265287094446e-06, "loss": 0.01936875581741333, "step": 61735 }, { "epoch": 0.5810823529411765, "grad_norm": 0.6074055706731674, "learning_rate": 2.6243202555406873e-06, "loss": 0.02121374011039734, "step": 61740 }, { "epoch": 0.5811294117647059, "grad_norm": 0.4532735514365461, "learning_rate": 2.624213995281117e-06, "loss": 0.013436511158943176, "step": 61745 }, { "epoch": 0.5811764705882353, "grad_norm": 0.4789372172535546, "learning_rate": 2.624107747928122e-06, "loss": 0.016889098286628722, "step": 61750 }, { "epoch": 0.5812235294117647, "grad_norm": 0.3949648578538877, "learning_rate": 2.624001513479089e-06, "loss": 0.013224031031131744, "step": 61755 }, { "epoch": 0.5812705882352941, "grad_norm": 0.6110917351931056, "learning_rate": 2.623895291931407e-06, "loss": 0.01451798677444458, "step": 61760 }, { "epoch": 0.5813176470588235, "grad_norm": 0.38086589773402796, "learning_rate": 2.623789083282464e-06, "loss": 0.02093268036842346, "step": 61765 }, { "epoch": 0.5813647058823529, "grad_norm": 0.4928046042148826, "learning_rate": 2.6236828875296495e-06, "loss": 0.016849622130393982, "step": 61770 }, { "epoch": 0.5814117647058824, "grad_norm": 0.4567199275641934, "learning_rate": 2.623576704670355e-06, "loss": 0.01640784293413162, "step": 61775 }, { "epoch": 0.5814588235294118, "grad_norm": 0.637684182919338, "learning_rate": 2.62347053470197e-06, "loss": 0.018720841407775878, "step": 61780 }, { "epoch": 0.5815058823529412, "grad_norm": 0.4317025647297824, "learning_rate": 2.623364377621888e-06, "loss": 0.01673818826675415, "step": 61785 }, { "epoch": 0.5815529411764706, "grad_norm": 0.49585760602328677, "learning_rate": 2.6232582334275013e-06, "loss": 0.014468628168106078, "step": 61790 }, { "epoch": 0.5816, "grad_norm": 0.42813992580938603, "learning_rate": 2.623152102116202e-06, "loss": 0.01670306921005249, "step": 61795 }, { "epoch": 0.5816470588235294, "grad_norm": 0.8956742083123341, "learning_rate": 2.6230459836853866e-06, "loss": 0.01463107466697693, "step": 61800 }, { "epoch": 0.5816941176470588, "grad_norm": 0.4999892885556354, "learning_rate": 2.622939878132447e-06, "loss": 0.017919987440109253, "step": 61805 }, { "epoch": 0.5817411764705882, "grad_norm": 0.5293859066252808, "learning_rate": 2.6228337854547815e-06, "loss": 0.017026714980602264, "step": 61810 }, { "epoch": 0.5817882352941176, "grad_norm": 0.4160833430290249, "learning_rate": 2.622727705649784e-06, "loss": 0.014016175270080566, "step": 61815 }, { "epoch": 0.581835294117647, "grad_norm": 0.3601232254662074, "learning_rate": 2.6226216387148535e-06, "loss": 0.013589249551296234, "step": 61820 }, { "epoch": 0.5818823529411765, "grad_norm": 0.3903913800406992, "learning_rate": 2.622515584647387e-06, "loss": 0.017076218128204347, "step": 61825 }, { "epoch": 0.5819294117647059, "grad_norm": 0.7451442019407801, "learning_rate": 2.6224095434447834e-06, "loss": 0.017956756055355072, "step": 61830 }, { "epoch": 0.5819764705882353, "grad_norm": 0.4991096259578867, "learning_rate": 2.6223035151044413e-06, "loss": 0.020335482060909273, "step": 61835 }, { "epoch": 0.5820235294117647, "grad_norm": 0.46961636988060595, "learning_rate": 2.622197499623761e-06, "loss": 0.019260674715042114, "step": 61840 }, { "epoch": 0.5820705882352941, "grad_norm": 0.5047618430537, "learning_rate": 2.6220914970001434e-06, "loss": 0.015136541426181793, "step": 61845 }, { "epoch": 0.5821176470588235, "grad_norm": 0.42029323733157026, "learning_rate": 2.62198550723099e-06, "loss": 0.016486437618732454, "step": 61850 }, { "epoch": 0.5821647058823529, "grad_norm": 0.531435151290333, "learning_rate": 2.621879530313702e-06, "loss": 0.01704365909099579, "step": 61855 }, { "epoch": 0.5822117647058823, "grad_norm": 0.5227599738977068, "learning_rate": 2.6217735662456833e-06, "loss": 0.014649608731269836, "step": 61860 }, { "epoch": 0.5822588235294117, "grad_norm": 0.37923786503932366, "learning_rate": 2.621667615024338e-06, "loss": 0.013319283723831177, "step": 61865 }, { "epoch": 0.5823058823529412, "grad_norm": 0.5044194180940518, "learning_rate": 2.6215616766470696e-06, "loss": 0.016238264739513397, "step": 61870 }, { "epoch": 0.5823529411764706, "grad_norm": 0.5556756397779978, "learning_rate": 2.6214557511112844e-06, "loss": 0.019843477010726928, "step": 61875 }, { "epoch": 0.5824, "grad_norm": 0.7033393037999893, "learning_rate": 2.6213498384143867e-06, "loss": 0.01805383265018463, "step": 61880 }, { "epoch": 0.5824470588235294, "grad_norm": 0.5393778648838097, "learning_rate": 2.621243938553784e-06, "loss": 0.015001189708709717, "step": 61885 }, { "epoch": 0.5824941176470588, "grad_norm": 0.5755624020143111, "learning_rate": 2.621138051526883e-06, "loss": 0.017533883452415466, "step": 61890 }, { "epoch": 0.5825411764705882, "grad_norm": 0.4928611310512956, "learning_rate": 2.621032177331093e-06, "loss": 0.01782531142234802, "step": 61895 }, { "epoch": 0.5825882352941176, "grad_norm": 0.560710813106816, "learning_rate": 2.620926315963822e-06, "loss": 0.01365470588207245, "step": 61900 }, { "epoch": 0.582635294117647, "grad_norm": 0.4639861250325286, "learning_rate": 2.6208204674224795e-06, "loss": 0.017973430454730988, "step": 61905 }, { "epoch": 0.5826823529411764, "grad_norm": 0.42100629610036017, "learning_rate": 2.6207146317044764e-06, "loss": 0.013499711453914643, "step": 61910 }, { "epoch": 0.5827294117647058, "grad_norm": 0.6579559462001348, "learning_rate": 2.620608808807223e-06, "loss": 0.017636677622795104, "step": 61915 }, { "epoch": 0.5827764705882353, "grad_norm": 0.5401478323416957, "learning_rate": 2.620502998728131e-06, "loss": 0.018670609593391417, "step": 61920 }, { "epoch": 0.5828235294117647, "grad_norm": 0.6069663539049435, "learning_rate": 2.6203972014646127e-06, "loss": 0.01636718064546585, "step": 61925 }, { "epoch": 0.5828705882352941, "grad_norm": 0.5512372866072637, "learning_rate": 2.620291417014083e-06, "loss": 0.0181319922208786, "step": 61930 }, { "epoch": 0.5829176470588235, "grad_norm": 0.41231512326843206, "learning_rate": 2.620185645373954e-06, "loss": 0.013678735494613648, "step": 61935 }, { "epoch": 0.5829647058823529, "grad_norm": 0.30480383782765846, "learning_rate": 2.6200798865416405e-06, "loss": 0.014103685319423676, "step": 61940 }, { "epoch": 0.5830117647058823, "grad_norm": 0.5646336616130287, "learning_rate": 2.619974140514559e-06, "loss": 0.015827439725399017, "step": 61945 }, { "epoch": 0.5830588235294117, "grad_norm": 0.4383703209124712, "learning_rate": 2.619868407290125e-06, "loss": 0.019431427121162415, "step": 61950 }, { "epoch": 0.5831058823529411, "grad_norm": 0.43842705716036073, "learning_rate": 2.6197626868657543e-06, "loss": 0.015217530727386474, "step": 61955 }, { "epoch": 0.5831529411764705, "grad_norm": 1.0266635595788873, "learning_rate": 2.6196569792388667e-06, "loss": 0.016348031163215638, "step": 61960 }, { "epoch": 0.5832, "grad_norm": 0.30082163968966025, "learning_rate": 2.6195512844068784e-06, "loss": 0.010896679759025574, "step": 61965 }, { "epoch": 0.5832470588235295, "grad_norm": 0.6216314122168806, "learning_rate": 2.61944560236721e-06, "loss": 0.018284094333648682, "step": 61970 }, { "epoch": 0.5832941176470589, "grad_norm": 0.43998342603025364, "learning_rate": 2.61933993311728e-06, "loss": 0.015078604221343994, "step": 61975 }, { "epoch": 0.5833411764705883, "grad_norm": 0.5407710829826717, "learning_rate": 2.6192342766545096e-06, "loss": 0.014541724324226379, "step": 61980 }, { "epoch": 0.5833882352941177, "grad_norm": 0.5773602065002199, "learning_rate": 2.6191286329763204e-06, "loss": 0.013082735240459442, "step": 61985 }, { "epoch": 0.583435294117647, "grad_norm": 0.4787490233841318, "learning_rate": 2.619023002080133e-06, "loss": 0.015032017230987548, "step": 61990 }, { "epoch": 0.5834823529411765, "grad_norm": 0.44551068826321716, "learning_rate": 2.6189173839633717e-06, "loss": 0.01669885665178299, "step": 61995 }, { "epoch": 0.5835294117647059, "grad_norm": 0.6413623147673487, "learning_rate": 2.6188117786234594e-06, "loss": 0.01637158393859863, "step": 62000 }, { "epoch": 0.5835764705882353, "grad_norm": 0.324863557645973, "learning_rate": 2.6187061860578195e-06, "loss": 0.016150885820388795, "step": 62005 }, { "epoch": 0.5836235294117647, "grad_norm": 0.39649846170104996, "learning_rate": 2.6186006062638775e-06, "loss": 0.018460354208946227, "step": 62010 }, { "epoch": 0.5836705882352942, "grad_norm": 0.4884735402323867, "learning_rate": 2.618495039239059e-06, "loss": 0.01291106641292572, "step": 62015 }, { "epoch": 0.5837176470588236, "grad_norm": 0.4939893803267159, "learning_rate": 2.6183894849807902e-06, "loss": 0.013630713522434234, "step": 62020 }, { "epoch": 0.583764705882353, "grad_norm": 0.4463079262685841, "learning_rate": 2.6182839434864983e-06, "loss": 0.01664503514766693, "step": 62025 }, { "epoch": 0.5838117647058824, "grad_norm": 0.7399974012404772, "learning_rate": 2.6181784147536103e-06, "loss": 0.015824854373931885, "step": 62030 }, { "epoch": 0.5838588235294118, "grad_norm": 0.5809907540884266, "learning_rate": 2.6180728987795555e-06, "loss": 0.013716913759708405, "step": 62035 }, { "epoch": 0.5839058823529412, "grad_norm": 0.4204179504186986, "learning_rate": 2.617967395561763e-06, "loss": 0.013619421422481537, "step": 62040 }, { "epoch": 0.5839529411764706, "grad_norm": 0.3991615863189914, "learning_rate": 2.6178619050976626e-06, "loss": 0.019039657711982728, "step": 62045 }, { "epoch": 0.584, "grad_norm": 0.48877614764463784, "learning_rate": 2.617756427384685e-06, "loss": 0.019170214235782624, "step": 62050 }, { "epoch": 0.5840470588235294, "grad_norm": 0.7258452676078085, "learning_rate": 2.617650962420262e-06, "loss": 0.01590014100074768, "step": 62055 }, { "epoch": 0.5840941176470589, "grad_norm": 0.33715716814846874, "learning_rate": 2.6175455102018247e-06, "loss": 0.02264217734336853, "step": 62060 }, { "epoch": 0.5841411764705883, "grad_norm": 0.5664010642270842, "learning_rate": 2.6174400707268072e-06, "loss": 0.015173178911209107, "step": 62065 }, { "epoch": 0.5841882352941177, "grad_norm": 0.5715338843765166, "learning_rate": 2.617334643992642e-06, "loss": 0.018573236465454102, "step": 62070 }, { "epoch": 0.5842352941176471, "grad_norm": 0.5043054623586888, "learning_rate": 2.617229229996764e-06, "loss": 0.02305525988340378, "step": 62075 }, { "epoch": 0.5842823529411765, "grad_norm": 0.5048029381357586, "learning_rate": 2.617123828736608e-06, "loss": 0.01705201864242554, "step": 62080 }, { "epoch": 0.5843294117647059, "grad_norm": 0.3369268812875224, "learning_rate": 2.61701844020961e-06, "loss": 0.016897717118263246, "step": 62085 }, { "epoch": 0.5843764705882353, "grad_norm": 0.4809050305921399, "learning_rate": 2.6169130644132062e-06, "loss": 0.01634106636047363, "step": 62090 }, { "epoch": 0.5844235294117647, "grad_norm": 0.895346031516667, "learning_rate": 2.6168077013448334e-06, "loss": 0.015864770114421844, "step": 62095 }, { "epoch": 0.5844705882352941, "grad_norm": 0.5685711947182462, "learning_rate": 2.616702351001931e-06, "loss": 0.016719433665275573, "step": 62100 }, { "epoch": 0.5845176470588235, "grad_norm": 0.6344853947281428, "learning_rate": 2.6165970133819353e-06, "loss": 0.017396198213100435, "step": 62105 }, { "epoch": 0.584564705882353, "grad_norm": 0.3540530359873238, "learning_rate": 2.6164916884822876e-06, "loss": 0.008751549571752549, "step": 62110 }, { "epoch": 0.5846117647058824, "grad_norm": 0.668236344880074, "learning_rate": 2.616386376300427e-06, "loss": 0.018568095564842225, "step": 62115 }, { "epoch": 0.5846588235294118, "grad_norm": 0.34051719802817704, "learning_rate": 2.616281076833795e-06, "loss": 0.013983941078186036, "step": 62120 }, { "epoch": 0.5847058823529412, "grad_norm": 0.5263903201292939, "learning_rate": 2.6161757900798317e-06, "loss": 0.015953271090984343, "step": 62125 }, { "epoch": 0.5847529411764706, "grad_norm": 0.5129331846858424, "learning_rate": 2.616070516035981e-06, "loss": 0.016566070914268493, "step": 62130 }, { "epoch": 0.5848, "grad_norm": 0.5840732127118571, "learning_rate": 2.6159652546996856e-06, "loss": 0.016008360683917998, "step": 62135 }, { "epoch": 0.5848470588235294, "grad_norm": 0.5018534590068053, "learning_rate": 2.615860006068388e-06, "loss": 0.016982948780059813, "step": 62140 }, { "epoch": 0.5848941176470588, "grad_norm": 0.5054899601259801, "learning_rate": 2.6157547701395335e-06, "loss": 0.016147106885910034, "step": 62145 }, { "epoch": 0.5849411764705882, "grad_norm": 0.49317887165521795, "learning_rate": 2.615649546910567e-06, "loss": 0.017102758586406707, "step": 62150 }, { "epoch": 0.5849882352941177, "grad_norm": 0.5862299412700325, "learning_rate": 2.615544336378935e-06, "loss": 0.018630106747150422, "step": 62155 }, { "epoch": 0.5850352941176471, "grad_norm": 0.3917418796931049, "learning_rate": 2.615439138542083e-06, "loss": 0.01348472535610199, "step": 62160 }, { "epoch": 0.5850823529411765, "grad_norm": 0.6753710873752011, "learning_rate": 2.6153339533974588e-06, "loss": 0.01701476126909256, "step": 62165 }, { "epoch": 0.5851294117647059, "grad_norm": 0.40988973415795754, "learning_rate": 2.61522878094251e-06, "loss": 0.017070378363132476, "step": 62170 }, { "epoch": 0.5851764705882353, "grad_norm": 0.5751089482806743, "learning_rate": 2.6151236211746855e-06, "loss": 0.019988039135932924, "step": 62175 }, { "epoch": 0.5852235294117647, "grad_norm": 0.4745799667742494, "learning_rate": 2.615018474091435e-06, "loss": 0.0129814013838768, "step": 62180 }, { "epoch": 0.5852705882352941, "grad_norm": 0.5395783452724923, "learning_rate": 2.614913339690208e-06, "loss": 0.016897979378700256, "step": 62185 }, { "epoch": 0.5853176470588235, "grad_norm": 0.3966127302604232, "learning_rate": 2.6148082179684564e-06, "loss": 0.014325633645057678, "step": 62190 }, { "epoch": 0.5853647058823529, "grad_norm": 0.43858986424092106, "learning_rate": 2.6147031089236314e-06, "loss": 0.01712231934070587, "step": 62195 }, { "epoch": 0.5854117647058823, "grad_norm": 0.5623874054167306, "learning_rate": 2.6145980125531844e-06, "loss": 0.019475984573364257, "step": 62200 }, { "epoch": 0.5854588235294118, "grad_norm": 0.5392746897904179, "learning_rate": 2.6144929288545693e-06, "loss": 0.016416144371032716, "step": 62205 }, { "epoch": 0.5855058823529412, "grad_norm": 0.4350061756344078, "learning_rate": 2.6143878578252403e-06, "loss": 0.013140881061553955, "step": 62210 }, { "epoch": 0.5855529411764706, "grad_norm": 0.5274820004686872, "learning_rate": 2.6142827994626503e-06, "loss": 0.017381951212882996, "step": 62215 }, { "epoch": 0.5856, "grad_norm": 0.42354921872954715, "learning_rate": 2.6141777537642554e-06, "loss": 0.012893494963645936, "step": 62220 }, { "epoch": 0.5856470588235294, "grad_norm": 0.8955043256518449, "learning_rate": 2.6140727207275118e-06, "loss": 0.019429221749305725, "step": 62225 }, { "epoch": 0.5856941176470588, "grad_norm": 0.5067646813115217, "learning_rate": 2.613967700349876e-06, "loss": 0.01708022356033325, "step": 62230 }, { "epoch": 0.5857411764705882, "grad_norm": 0.5052144952637035, "learning_rate": 2.613862692628804e-06, "loss": 0.013777738809585572, "step": 62235 }, { "epoch": 0.5857882352941176, "grad_norm": 0.44890497181300215, "learning_rate": 2.6137576975617557e-06, "loss": 0.01733761131763458, "step": 62240 }, { "epoch": 0.585835294117647, "grad_norm": 0.49953913079786316, "learning_rate": 2.6136527151461887e-06, "loss": 0.014216670393943786, "step": 62245 }, { "epoch": 0.5858823529411765, "grad_norm": 1.0376529580642073, "learning_rate": 2.6135477453795626e-06, "loss": 0.019328801333904265, "step": 62250 }, { "epoch": 0.5859294117647059, "grad_norm": 0.539532836062038, "learning_rate": 2.613442788259337e-06, "loss": 0.016655600070953368, "step": 62255 }, { "epoch": 0.5859764705882353, "grad_norm": 0.5254730787887073, "learning_rate": 2.613337843782975e-06, "loss": 0.015145348012447357, "step": 62260 }, { "epoch": 0.5860235294117647, "grad_norm": 0.5131140000894852, "learning_rate": 2.6132329119479348e-06, "loss": 0.014743469655513763, "step": 62265 }, { "epoch": 0.5860705882352941, "grad_norm": 0.4416926811335087, "learning_rate": 2.6131279927516815e-06, "loss": 0.018765610456466675, "step": 62270 }, { "epoch": 0.5861176470588235, "grad_norm": 0.3163452543513624, "learning_rate": 2.613023086191678e-06, "loss": 0.01701599359512329, "step": 62275 }, { "epoch": 0.5861647058823529, "grad_norm": 0.6145435113472378, "learning_rate": 2.6129181922653855e-06, "loss": 0.0197923481464386, "step": 62280 }, { "epoch": 0.5862117647058823, "grad_norm": 0.3067587151932503, "learning_rate": 2.612813310970271e-06, "loss": 0.01485418975353241, "step": 62285 }, { "epoch": 0.5862588235294117, "grad_norm": 0.3465955509738638, "learning_rate": 2.612708442303799e-06, "loss": 0.013575536012649537, "step": 62290 }, { "epoch": 0.5863058823529411, "grad_norm": 0.5324860139021383, "learning_rate": 2.6126035862634347e-06, "loss": 0.016636991500854494, "step": 62295 }, { "epoch": 0.5863529411764706, "grad_norm": 0.5879805632486964, "learning_rate": 2.6124987428466457e-06, "loss": 0.01767691969871521, "step": 62300 }, { "epoch": 0.5864, "grad_norm": 0.666889674655702, "learning_rate": 2.6123939120508983e-06, "loss": 0.016441094875335693, "step": 62305 }, { "epoch": 0.5864470588235294, "grad_norm": 0.5258334426408299, "learning_rate": 2.6122890938736612e-06, "loss": 0.01788707673549652, "step": 62310 }, { "epoch": 0.5864941176470588, "grad_norm": 0.45110447024624845, "learning_rate": 2.6121842883124026e-06, "loss": 0.014891576766967774, "step": 62315 }, { "epoch": 0.5865411764705882, "grad_norm": 0.6265347224611759, "learning_rate": 2.6120794953645924e-06, "loss": 0.019509586691856384, "step": 62320 }, { "epoch": 0.5865882352941176, "grad_norm": 0.2767880543915297, "learning_rate": 2.6119747150277e-06, "loss": 0.013681440055370331, "step": 62325 }, { "epoch": 0.586635294117647, "grad_norm": 0.5607054848390329, "learning_rate": 2.611869947299197e-06, "loss": 0.016319841146469116, "step": 62330 }, { "epoch": 0.5866823529411764, "grad_norm": 0.44943079041063394, "learning_rate": 2.611765192176555e-06, "loss": 0.015649504959583282, "step": 62335 }, { "epoch": 0.5867294117647058, "grad_norm": 0.7368288162708563, "learning_rate": 2.6116604496572462e-06, "loss": 0.017894713580608367, "step": 62340 }, { "epoch": 0.5867764705882353, "grad_norm": 0.18694710395086114, "learning_rate": 2.6115557197387424e-06, "loss": 0.013329309225082398, "step": 62345 }, { "epoch": 0.5868235294117647, "grad_norm": 0.50385964770365, "learning_rate": 2.6114510024185192e-06, "loss": 0.020367945730686187, "step": 62350 }, { "epoch": 0.5868705882352941, "grad_norm": 0.481108377660643, "learning_rate": 2.6113462976940495e-06, "loss": 0.019926370680332185, "step": 62355 }, { "epoch": 0.5869176470588235, "grad_norm": 0.4304062556994926, "learning_rate": 2.611241605562809e-06, "loss": 0.016992132365703582, "step": 62360 }, { "epoch": 0.5869647058823529, "grad_norm": 0.7234403395980772, "learning_rate": 2.6111369260222734e-06, "loss": 0.015061074495315551, "step": 62365 }, { "epoch": 0.5870117647058823, "grad_norm": 0.2896037180560589, "learning_rate": 2.6110322590699195e-06, "loss": 0.016708189249038698, "step": 62370 }, { "epoch": 0.5870588235294117, "grad_norm": 0.616344634471462, "learning_rate": 2.6109276047032237e-06, "loss": 0.01692144274711609, "step": 62375 }, { "epoch": 0.5871058823529411, "grad_norm": 0.5972512475790523, "learning_rate": 2.6108229629196657e-06, "loss": 0.014789322018623352, "step": 62380 }, { "epoch": 0.5871529411764705, "grad_norm": 0.4018973236907447, "learning_rate": 2.6107183337167218e-06, "loss": 0.014278900623321534, "step": 62385 }, { "epoch": 0.5872, "grad_norm": 0.5561920594438028, "learning_rate": 2.610613717091873e-06, "loss": 0.017681792378425598, "step": 62390 }, { "epoch": 0.5872470588235295, "grad_norm": 0.5207935137302893, "learning_rate": 2.610509113042599e-06, "loss": 0.020433542132377625, "step": 62395 }, { "epoch": 0.5872941176470589, "grad_norm": 0.46487001876295764, "learning_rate": 2.6104045215663802e-06, "loss": 0.014709676802158355, "step": 62400 }, { "epoch": 0.5873411764705883, "grad_norm": 0.5654685827731, "learning_rate": 2.6102999426606982e-06, "loss": 0.017447857558727263, "step": 62405 }, { "epoch": 0.5873882352941177, "grad_norm": 0.598329214135329, "learning_rate": 2.6101953763230357e-06, "loss": 0.01771714389324188, "step": 62410 }, { "epoch": 0.587435294117647, "grad_norm": 0.4770593877096573, "learning_rate": 2.6100908225508752e-06, "loss": 0.01449180543422699, "step": 62415 }, { "epoch": 0.5874823529411765, "grad_norm": 0.6931630968677251, "learning_rate": 2.6099862813417e-06, "loss": 0.022151610255241393, "step": 62420 }, { "epoch": 0.5875294117647059, "grad_norm": 0.6479225170372586, "learning_rate": 2.609881752692995e-06, "loss": 0.016910701990127563, "step": 62425 }, { "epoch": 0.5875764705882353, "grad_norm": 0.7703193638573882, "learning_rate": 2.609777236602244e-06, "loss": 0.019685235619544984, "step": 62430 }, { "epoch": 0.5876235294117647, "grad_norm": 1.0901310867370422, "learning_rate": 2.609672733066934e-06, "loss": 0.017804530262947083, "step": 62435 }, { "epoch": 0.5876705882352942, "grad_norm": 0.5148145717179526, "learning_rate": 2.6095682420845514e-06, "loss": 0.02106863260269165, "step": 62440 }, { "epoch": 0.5877176470588236, "grad_norm": 0.6082974649747612, "learning_rate": 2.6094637636525827e-06, "loss": 0.016505321860313414, "step": 62445 }, { "epoch": 0.587764705882353, "grad_norm": 0.3558518005962765, "learning_rate": 2.6093592977685155e-06, "loss": 0.018731376528739928, "step": 62450 }, { "epoch": 0.5878117647058824, "grad_norm": 0.3654643900859617, "learning_rate": 2.609254844429839e-06, "loss": 0.012117937207221985, "step": 62455 }, { "epoch": 0.5878588235294118, "grad_norm": 0.4151606921684834, "learning_rate": 2.6091504036340416e-06, "loss": 0.020456963777542116, "step": 62460 }, { "epoch": 0.5879058823529412, "grad_norm": 0.35555819971771824, "learning_rate": 2.609045975378614e-06, "loss": 0.01476215273141861, "step": 62465 }, { "epoch": 0.5879529411764706, "grad_norm": 0.4444512189163428, "learning_rate": 2.608941559661047e-06, "loss": 0.01990317553281784, "step": 62470 }, { "epoch": 0.588, "grad_norm": 0.6751763470617806, "learning_rate": 2.6088371564788315e-06, "loss": 0.019416484236717223, "step": 62475 }, { "epoch": 0.5880470588235294, "grad_norm": 0.7882735931499246, "learning_rate": 2.6087327658294594e-06, "loss": 0.018572145700454713, "step": 62480 }, { "epoch": 0.5880941176470589, "grad_norm": 0.4392465054648316, "learning_rate": 2.6086283877104235e-06, "loss": 0.014722499251365661, "step": 62485 }, { "epoch": 0.5881411764705883, "grad_norm": 0.4521792176090712, "learning_rate": 2.6085240221192176e-06, "loss": 0.015902259945869447, "step": 62490 }, { "epoch": 0.5881882352941177, "grad_norm": 0.4495682519560523, "learning_rate": 2.6084196690533355e-06, "loss": 0.017584061622619628, "step": 62495 }, { "epoch": 0.5882352941176471, "grad_norm": 0.493202545091586, "learning_rate": 2.608315328510272e-06, "loss": 0.012937353551387787, "step": 62500 }, { "epoch": 0.5882823529411765, "grad_norm": 0.6046681631526722, "learning_rate": 2.6082110004875233e-06, "loss": 0.018931640684604643, "step": 62505 }, { "epoch": 0.5883294117647059, "grad_norm": 0.5139711323820988, "learning_rate": 2.6081066849825842e-06, "loss": 0.014275026321411134, "step": 62510 }, { "epoch": 0.5883764705882353, "grad_norm": 0.37956471500314287, "learning_rate": 2.6080023819929533e-06, "loss": 0.012615340948104858, "step": 62515 }, { "epoch": 0.5884235294117647, "grad_norm": 0.5614013080394676, "learning_rate": 2.6078980915161273e-06, "loss": 0.01773155778646469, "step": 62520 }, { "epoch": 0.5884705882352941, "grad_norm": 0.47787486121733125, "learning_rate": 2.6077938135496047e-06, "loss": 0.01705062687397003, "step": 62525 }, { "epoch": 0.5885176470588235, "grad_norm": 0.35979007364334065, "learning_rate": 2.6076895480908848e-06, "loss": 0.013410750031471252, "step": 62530 }, { "epoch": 0.588564705882353, "grad_norm": 0.5279448815736529, "learning_rate": 2.607585295137467e-06, "loss": 0.018703016638755798, "step": 62535 }, { "epoch": 0.5886117647058824, "grad_norm": 0.5012921802525477, "learning_rate": 2.607481054686852e-06, "loss": 0.014472214877605439, "step": 62540 }, { "epoch": 0.5886588235294118, "grad_norm": 0.5037649366873219, "learning_rate": 2.6073768267365407e-06, "loss": 0.019099040329456328, "step": 62545 }, { "epoch": 0.5887058823529412, "grad_norm": 0.4982095277817595, "learning_rate": 2.607272611284035e-06, "loss": 0.017020568251609802, "step": 62550 }, { "epoch": 0.5887529411764706, "grad_norm": 0.6912160630461323, "learning_rate": 2.6071684083268382e-06, "loss": 0.014985233545303345, "step": 62555 }, { "epoch": 0.5888, "grad_norm": 0.42258694412488984, "learning_rate": 2.6070642178624524e-06, "loss": 0.014227315783500671, "step": 62560 }, { "epoch": 0.5888470588235294, "grad_norm": 0.42703297998617135, "learning_rate": 2.6069600398883827e-06, "loss": 0.02204636186361313, "step": 62565 }, { "epoch": 0.5888941176470588, "grad_norm": 0.43478157376234605, "learning_rate": 2.606855874402133e-06, "loss": 0.017314976453781127, "step": 62570 }, { "epoch": 0.5889411764705882, "grad_norm": 0.6471105203487637, "learning_rate": 2.6067517214012076e-06, "loss": 0.018271908164024353, "step": 62575 }, { "epoch": 0.5889882352941177, "grad_norm": 0.5877458373448408, "learning_rate": 2.606647580883115e-06, "loss": 0.017156419157981873, "step": 62580 }, { "epoch": 0.5890352941176471, "grad_norm": 0.6059552636950171, "learning_rate": 2.6065434528453597e-06, "loss": 0.01882849931716919, "step": 62585 }, { "epoch": 0.5890823529411765, "grad_norm": 0.4296224359202265, "learning_rate": 2.60643933728545e-06, "loss": 0.016979381442070007, "step": 62590 }, { "epoch": 0.5891294117647059, "grad_norm": 0.46080227907386456, "learning_rate": 2.6063352342008947e-06, "loss": 0.017612800002098083, "step": 62595 }, { "epoch": 0.5891764705882353, "grad_norm": 0.5493439469924489, "learning_rate": 2.6062311435892018e-06, "loss": 0.01657266914844513, "step": 62600 }, { "epoch": 0.5892235294117647, "grad_norm": 0.48868959983164645, "learning_rate": 2.606127065447881e-06, "loss": 0.019002117216587067, "step": 62605 }, { "epoch": 0.5892705882352941, "grad_norm": 1.5276636550750438, "learning_rate": 2.606022999774443e-06, "loss": 0.016185978055000307, "step": 62610 }, { "epoch": 0.5893176470588235, "grad_norm": 0.4111337477744669, "learning_rate": 2.6059189465663976e-06, "loss": 0.016758865118026732, "step": 62615 }, { "epoch": 0.5893647058823529, "grad_norm": 0.4621644656106557, "learning_rate": 2.605814905821257e-06, "loss": 0.0171764075756073, "step": 62620 }, { "epoch": 0.5894117647058823, "grad_norm": 0.5449459176636989, "learning_rate": 2.6057108775365342e-06, "loss": 0.012778261303901672, "step": 62625 }, { "epoch": 0.5894588235294118, "grad_norm": 0.2890420390500667, "learning_rate": 2.6056068617097407e-06, "loss": 0.02064555585384369, "step": 62630 }, { "epoch": 0.5895058823529412, "grad_norm": 0.5577860354058161, "learning_rate": 2.6055028583383916e-06, "loss": 0.015983015298843384, "step": 62635 }, { "epoch": 0.5895529411764706, "grad_norm": 0.56526391662657, "learning_rate": 2.6053988674200007e-06, "loss": 0.022500720620155335, "step": 62640 }, { "epoch": 0.5896, "grad_norm": 0.47352614704770735, "learning_rate": 2.6052948889520836e-06, "loss": 0.014922916889190674, "step": 62645 }, { "epoch": 0.5896470588235294, "grad_norm": 0.533942260200803, "learning_rate": 2.6051909229321546e-06, "loss": 0.014336296916007995, "step": 62650 }, { "epoch": 0.5896941176470588, "grad_norm": 0.701335052068337, "learning_rate": 2.605086969357732e-06, "loss": 0.015593641996383667, "step": 62655 }, { "epoch": 0.5897411764705882, "grad_norm": 0.38721578038863663, "learning_rate": 2.6049830282263318e-06, "loss": 0.012085798382759094, "step": 62660 }, { "epoch": 0.5897882352941176, "grad_norm": 0.442102649377492, "learning_rate": 2.604879099535472e-06, "loss": 0.015611606836318969, "step": 62665 }, { "epoch": 0.589835294117647, "grad_norm": 0.39984478642822924, "learning_rate": 2.6047751832826717e-06, "loss": 0.015322282910346985, "step": 62670 }, { "epoch": 0.5898823529411765, "grad_norm": 0.45665919769390506, "learning_rate": 2.60467127946545e-06, "loss": 0.018828511238098145, "step": 62675 }, { "epoch": 0.5899294117647059, "grad_norm": 0.5020145680242875, "learning_rate": 2.6045673880813265e-06, "loss": 0.015291984379291534, "step": 62680 }, { "epoch": 0.5899764705882353, "grad_norm": 0.5694885764232139, "learning_rate": 2.6044635091278214e-06, "loss": 0.016863667964935304, "step": 62685 }, { "epoch": 0.5900235294117647, "grad_norm": 0.5702247553883276, "learning_rate": 2.6043596426024576e-06, "loss": 0.016853535175323488, "step": 62690 }, { "epoch": 0.5900705882352941, "grad_norm": 0.3775954693315775, "learning_rate": 2.6042557885027555e-06, "loss": 0.01612136363983154, "step": 62695 }, { "epoch": 0.5901176470588235, "grad_norm": 0.45307714608128496, "learning_rate": 2.6041519468262382e-06, "loss": 0.017379176616668702, "step": 62700 }, { "epoch": 0.5901647058823529, "grad_norm": 0.6627598291931514, "learning_rate": 2.6040481175704296e-06, "loss": 0.02143886238336563, "step": 62705 }, { "epoch": 0.5902117647058823, "grad_norm": 0.450132678450506, "learning_rate": 2.6039443007328537e-06, "loss": 0.01580643802881241, "step": 62710 }, { "epoch": 0.5902588235294117, "grad_norm": 0.6077011473928243, "learning_rate": 2.6038404963110346e-06, "loss": 0.013666702806949616, "step": 62715 }, { "epoch": 0.5903058823529411, "grad_norm": 0.5456394636783933, "learning_rate": 2.6037367043024987e-06, "loss": 0.017899802327156066, "step": 62720 }, { "epoch": 0.5903529411764706, "grad_norm": 0.41878933452273, "learning_rate": 2.6036329247047716e-06, "loss": 0.016888664662837984, "step": 62725 }, { "epoch": 0.5904, "grad_norm": 0.4680596129600322, "learning_rate": 2.6035291575153805e-06, "loss": 0.012213069200515746, "step": 62730 }, { "epoch": 0.5904470588235294, "grad_norm": 0.4306477757571984, "learning_rate": 2.603425402731852e-06, "loss": 0.01683250367641449, "step": 62735 }, { "epoch": 0.5904941176470588, "grad_norm": 0.5403869024201712, "learning_rate": 2.603321660351715e-06, "loss": 0.014651472866535186, "step": 62740 }, { "epoch": 0.5905411764705882, "grad_norm": 0.49894559370456926, "learning_rate": 2.603217930372499e-06, "loss": 0.014572431147098542, "step": 62745 }, { "epoch": 0.5905882352941176, "grad_norm": 0.5866509763902922, "learning_rate": 2.603114212791733e-06, "loss": 0.020083627104759215, "step": 62750 }, { "epoch": 0.590635294117647, "grad_norm": 0.7478866620902098, "learning_rate": 2.603010507606948e-06, "loss": 0.015301921963691711, "step": 62755 }, { "epoch": 0.5906823529411764, "grad_norm": 0.48852984494802587, "learning_rate": 2.6029068148156733e-06, "loss": 0.018336865305900573, "step": 62760 }, { "epoch": 0.5907294117647058, "grad_norm": 0.941783777198612, "learning_rate": 2.602803134415442e-06, "loss": 0.016512995958328246, "step": 62765 }, { "epoch": 0.5907764705882353, "grad_norm": 0.5279039838583318, "learning_rate": 2.6026994664037857e-06, "loss": 0.016493594646453856, "step": 62770 }, { "epoch": 0.5908235294117647, "grad_norm": 0.30508418851447205, "learning_rate": 2.6025958107782383e-06, "loss": 0.013504552841186523, "step": 62775 }, { "epoch": 0.5908705882352941, "grad_norm": 0.6581559133309446, "learning_rate": 2.602492167536333e-06, "loss": 0.018818606436252595, "step": 62780 }, { "epoch": 0.5909176470588235, "grad_norm": 0.48879721251731434, "learning_rate": 2.6023885366756043e-06, "loss": 0.013269123435020447, "step": 62785 }, { "epoch": 0.590964705882353, "grad_norm": 0.34073950114149015, "learning_rate": 2.602284918193587e-06, "loss": 0.02023090422153473, "step": 62790 }, { "epoch": 0.5910117647058823, "grad_norm": 0.5037422715799573, "learning_rate": 2.6021813120878176e-06, "loss": 0.011923063546419144, "step": 62795 }, { "epoch": 0.5910588235294117, "grad_norm": 0.5376886986762696, "learning_rate": 2.602077718355832e-06, "loss": 0.01700296700000763, "step": 62800 }, { "epoch": 0.5911058823529411, "grad_norm": 0.4965324489185611, "learning_rate": 2.601974136995168e-06, "loss": 0.016900849342346192, "step": 62805 }, { "epoch": 0.5911529411764705, "grad_norm": 0.5821010516571745, "learning_rate": 2.6018705680033623e-06, "loss": 0.01474650502204895, "step": 62810 }, { "epoch": 0.5912, "grad_norm": 0.4404964069462698, "learning_rate": 2.6017670113779542e-06, "loss": 0.013810637593269347, "step": 62815 }, { "epoch": 0.5912470588235295, "grad_norm": 0.41634508950553495, "learning_rate": 2.6016634671164833e-06, "loss": 0.015518066287040711, "step": 62820 }, { "epoch": 0.5912941176470589, "grad_norm": 0.9105881953090423, "learning_rate": 2.6015599352164887e-06, "loss": 0.01994614154100418, "step": 62825 }, { "epoch": 0.5913411764705883, "grad_norm": 0.7400543589091427, "learning_rate": 2.601456415675512e-06, "loss": 0.021820193529129027, "step": 62830 }, { "epoch": 0.5913882352941177, "grad_norm": 0.3949665762983352, "learning_rate": 2.6013529084910937e-06, "loss": 0.014652237296104431, "step": 62835 }, { "epoch": 0.5914352941176471, "grad_norm": 0.4754152406107825, "learning_rate": 2.6012494136607757e-06, "loss": 0.01853988766670227, "step": 62840 }, { "epoch": 0.5914823529411765, "grad_norm": 0.5183864550926701, "learning_rate": 2.6011459311821013e-06, "loss": 0.016602334380149842, "step": 62845 }, { "epoch": 0.5915294117647059, "grad_norm": 0.7203050586482312, "learning_rate": 2.601042461052614e-06, "loss": 0.015557242929935456, "step": 62850 }, { "epoch": 0.5915764705882353, "grad_norm": 0.5856084673155428, "learning_rate": 2.6009390032698563e-06, "loss": 0.0161910280585289, "step": 62855 }, { "epoch": 0.5916235294117647, "grad_norm": 0.48107879671406406, "learning_rate": 2.6008355578313737e-06, "loss": 0.01894257068634033, "step": 62860 }, { "epoch": 0.5916705882352942, "grad_norm": 0.5915678051100685, "learning_rate": 2.600732124734713e-06, "loss": 0.012407029420137406, "step": 62865 }, { "epoch": 0.5917176470588236, "grad_norm": 0.5229295097856509, "learning_rate": 2.600628703977418e-06, "loss": 0.01577956825494766, "step": 62870 }, { "epoch": 0.591764705882353, "grad_norm": 0.8216540460249532, "learning_rate": 2.600525295557037e-06, "loss": 0.01805148720741272, "step": 62875 }, { "epoch": 0.5918117647058824, "grad_norm": 0.39980911726376, "learning_rate": 2.600421899471117e-06, "loss": 0.01726790815591812, "step": 62880 }, { "epoch": 0.5918588235294118, "grad_norm": 0.4821075866383757, "learning_rate": 2.6003185157172055e-06, "loss": 0.01684626340866089, "step": 62885 }, { "epoch": 0.5919058823529412, "grad_norm": 0.5700273491065716, "learning_rate": 2.6002151442928523e-06, "loss": 0.014543834328651428, "step": 62890 }, { "epoch": 0.5919529411764706, "grad_norm": 0.4098647070402597, "learning_rate": 2.600111785195607e-06, "loss": 0.018523040413856506, "step": 62895 }, { "epoch": 0.592, "grad_norm": 0.36660666334475644, "learning_rate": 2.600008438423019e-06, "loss": 0.015465560555458068, "step": 62900 }, { "epoch": 0.5920470588235294, "grad_norm": 0.5939602750903749, "learning_rate": 2.5999051039726393e-06, "loss": 0.014561839401721954, "step": 62905 }, { "epoch": 0.5920941176470588, "grad_norm": 0.5368196011372565, "learning_rate": 2.5998017818420194e-06, "loss": 0.01917451322078705, "step": 62910 }, { "epoch": 0.5921411764705883, "grad_norm": 0.46194772430492514, "learning_rate": 2.5996984720287117e-06, "loss": 0.01618356108665466, "step": 62915 }, { "epoch": 0.5921882352941177, "grad_norm": 0.5255169974041605, "learning_rate": 2.5995951745302694e-06, "loss": 0.01248466521501541, "step": 62920 }, { "epoch": 0.5922352941176471, "grad_norm": 0.5424647122653364, "learning_rate": 2.599491889344245e-06, "loss": 0.015843656659126282, "step": 62925 }, { "epoch": 0.5922823529411765, "grad_norm": 0.5440032429412339, "learning_rate": 2.5993886164681943e-06, "loss": 0.01725804656744003, "step": 62930 }, { "epoch": 0.5923294117647059, "grad_norm": 1.2818465685312177, "learning_rate": 2.5992853558996707e-06, "loss": 0.015363946557044983, "step": 62935 }, { "epoch": 0.5923764705882353, "grad_norm": 0.6998559973396649, "learning_rate": 2.599182107636231e-06, "loss": 0.017074939608573914, "step": 62940 }, { "epoch": 0.5924235294117647, "grad_norm": 0.5250906947819953, "learning_rate": 2.5990788716754308e-06, "loss": 0.017933556437492372, "step": 62945 }, { "epoch": 0.5924705882352941, "grad_norm": 0.6404096007015998, "learning_rate": 2.598975648014827e-06, "loss": 0.013368898630142212, "step": 62950 }, { "epoch": 0.5925176470588235, "grad_norm": 0.5968304769574296, "learning_rate": 2.598872436651978e-06, "loss": 0.019931501150131224, "step": 62955 }, { "epoch": 0.592564705882353, "grad_norm": 0.547894623915192, "learning_rate": 2.5987692375844414e-06, "loss": 0.015283986926078796, "step": 62960 }, { "epoch": 0.5926117647058824, "grad_norm": 0.7500692630708827, "learning_rate": 2.598666050809777e-06, "loss": 0.018400120735168456, "step": 62965 }, { "epoch": 0.5926588235294118, "grad_norm": 0.8663841346748393, "learning_rate": 2.598562876325544e-06, "loss": 0.021079257130622864, "step": 62970 }, { "epoch": 0.5927058823529412, "grad_norm": 0.4695566196353064, "learning_rate": 2.598459714129302e-06, "loss": 0.014991125464439392, "step": 62975 }, { "epoch": 0.5927529411764706, "grad_norm": 0.49572520611471255, "learning_rate": 2.5983565642186132e-06, "loss": 0.014120028913021087, "step": 62980 }, { "epoch": 0.5928, "grad_norm": 0.5060589767015871, "learning_rate": 2.5982534265910393e-06, "loss": 0.018952101469039917, "step": 62985 }, { "epoch": 0.5928470588235294, "grad_norm": 0.5432131960819971, "learning_rate": 2.5981503012441416e-06, "loss": 0.018978303670883177, "step": 62990 }, { "epoch": 0.5928941176470588, "grad_norm": 0.4730407058595568, "learning_rate": 2.5980471881754845e-06, "loss": 0.017047275602817536, "step": 62995 }, { "epoch": 0.5929411764705882, "grad_norm": 0.44204745164960935, "learning_rate": 2.5979440873826307e-06, "loss": 0.013566213846206664, "step": 63000 }, { "epoch": 0.5929882352941176, "grad_norm": 0.765640965699746, "learning_rate": 2.5978409988631454e-06, "loss": 0.016746559739112855, "step": 63005 }, { "epoch": 0.5930352941176471, "grad_norm": 0.5610651416700961, "learning_rate": 2.5977379226145925e-06, "loss": 0.019490450620651245, "step": 63010 }, { "epoch": 0.5930823529411765, "grad_norm": 0.5275569222566235, "learning_rate": 2.5976348586345394e-06, "loss": 0.014266771078109742, "step": 63015 }, { "epoch": 0.5931294117647059, "grad_norm": 0.44409798299422537, "learning_rate": 2.5975318069205517e-06, "loss": 0.018874573707580566, "step": 63020 }, { "epoch": 0.5931764705882353, "grad_norm": 0.4708499683257639, "learning_rate": 2.5974287674701975e-06, "loss": 0.017780473828315733, "step": 63025 }, { "epoch": 0.5932235294117647, "grad_norm": 0.5894317710374151, "learning_rate": 2.5973257402810433e-06, "loss": 0.016087885200977325, "step": 63030 }, { "epoch": 0.5932705882352941, "grad_norm": 0.5474368113554823, "learning_rate": 2.5972227253506573e-06, "loss": 0.01601845324039459, "step": 63035 }, { "epoch": 0.5933176470588235, "grad_norm": 0.36774949708994187, "learning_rate": 2.5971197226766098e-06, "loss": 0.018933099508285523, "step": 63040 }, { "epoch": 0.5933647058823529, "grad_norm": 0.3947231556735242, "learning_rate": 2.5970167322564706e-06, "loss": 0.014077772200107575, "step": 63045 }, { "epoch": 0.5934117647058823, "grad_norm": 0.6690546751459022, "learning_rate": 2.5969137540878096e-06, "loss": 0.015285632014274598, "step": 63050 }, { "epoch": 0.5934588235294118, "grad_norm": 0.37956212080930557, "learning_rate": 2.596810788168198e-06, "loss": 0.01521124541759491, "step": 63055 }, { "epoch": 0.5935058823529412, "grad_norm": 0.6478301643391956, "learning_rate": 2.5967078344952084e-06, "loss": 0.01644323319196701, "step": 63060 }, { "epoch": 0.5935529411764706, "grad_norm": 0.6421902026884095, "learning_rate": 2.596604893066412e-06, "loss": 0.01836564838886261, "step": 63065 }, { "epoch": 0.5936, "grad_norm": 0.47848570608771046, "learning_rate": 2.596501963879384e-06, "loss": 0.014001193642616271, "step": 63070 }, { "epoch": 0.5936470588235294, "grad_norm": 0.43221025997706647, "learning_rate": 2.596399046931697e-06, "loss": 0.013985127210617065, "step": 63075 }, { "epoch": 0.5936941176470588, "grad_norm": 0.421506448267521, "learning_rate": 2.596296142220925e-06, "loss": 0.014597192406654358, "step": 63080 }, { "epoch": 0.5937411764705882, "grad_norm": 0.317134904006611, "learning_rate": 2.5961932497446442e-06, "loss": 0.018462854623794555, "step": 63085 }, { "epoch": 0.5937882352941176, "grad_norm": 0.6256819988101886, "learning_rate": 2.596090369500431e-06, "loss": 0.015126040577888489, "step": 63090 }, { "epoch": 0.593835294117647, "grad_norm": 0.38450488516662323, "learning_rate": 2.59598750148586e-06, "loss": 0.01845458894968033, "step": 63095 }, { "epoch": 0.5938823529411764, "grad_norm": 0.5613087864798084, "learning_rate": 2.59588464569851e-06, "loss": 0.016694869101047515, "step": 63100 }, { "epoch": 0.5939294117647059, "grad_norm": 0.32759205897140947, "learning_rate": 2.595781802135958e-06, "loss": 0.013946132361888885, "step": 63105 }, { "epoch": 0.5939764705882353, "grad_norm": 0.4510557487803678, "learning_rate": 2.595678970795784e-06, "loss": 0.012457533180713654, "step": 63110 }, { "epoch": 0.5940235294117647, "grad_norm": 0.615227519461414, "learning_rate": 2.595576151675566e-06, "loss": 0.018351054191589354, "step": 63115 }, { "epoch": 0.5940705882352941, "grad_norm": 0.43085027410059845, "learning_rate": 2.595473344772885e-06, "loss": 0.01274135708808899, "step": 63120 }, { "epoch": 0.5941176470588235, "grad_norm": 0.3363763269662664, "learning_rate": 2.59537055008532e-06, "loss": 0.016078822314739227, "step": 63125 }, { "epoch": 0.5941647058823529, "grad_norm": 0.6703815044266027, "learning_rate": 2.5952677676104533e-06, "loss": 0.014267197251319886, "step": 63130 }, { "epoch": 0.5942117647058823, "grad_norm": 0.38836870331104273, "learning_rate": 2.595164997345867e-06, "loss": 0.01389104425907135, "step": 63135 }, { "epoch": 0.5942588235294117, "grad_norm": 0.5503074031755851, "learning_rate": 2.5950622392891426e-06, "loss": 0.0193853497505188, "step": 63140 }, { "epoch": 0.5943058823529411, "grad_norm": 0.52131923513566, "learning_rate": 2.5949594934378653e-06, "loss": 0.014452603459358216, "step": 63145 }, { "epoch": 0.5943529411764706, "grad_norm": 0.5289911642536801, "learning_rate": 2.5948567597896175e-06, "loss": 0.013104678690433502, "step": 63150 }, { "epoch": 0.5944, "grad_norm": 0.5357671072791861, "learning_rate": 2.5947540383419845e-06, "loss": 0.023122118413448335, "step": 63155 }, { "epoch": 0.5944470588235294, "grad_norm": 0.4836568172453407, "learning_rate": 2.5946513290925513e-06, "loss": 0.014164941012859344, "step": 63160 }, { "epoch": 0.5944941176470588, "grad_norm": 0.49113778867328384, "learning_rate": 2.5945486320389035e-06, "loss": 0.015026850998401642, "step": 63165 }, { "epoch": 0.5945411764705882, "grad_norm": 0.37930057917727833, "learning_rate": 2.5944459471786287e-06, "loss": 0.016831597685813902, "step": 63170 }, { "epoch": 0.5945882352941176, "grad_norm": 0.6492433588401061, "learning_rate": 2.5943432745093132e-06, "loss": 0.013562072813510895, "step": 63175 }, { "epoch": 0.594635294117647, "grad_norm": 0.5248717112779345, "learning_rate": 2.5942406140285457e-06, "loss": 0.024444425106048585, "step": 63180 }, { "epoch": 0.5946823529411764, "grad_norm": 0.491973890039367, "learning_rate": 2.5941379657339145e-06, "loss": 0.014493829011917115, "step": 63185 }, { "epoch": 0.5947294117647058, "grad_norm": 0.6829191906480289, "learning_rate": 2.594035329623009e-06, "loss": 0.01887575685977936, "step": 63190 }, { "epoch": 0.5947764705882352, "grad_norm": 0.4124218127569898, "learning_rate": 2.5939327056934195e-06, "loss": 0.01356561779975891, "step": 63195 }, { "epoch": 0.5948235294117648, "grad_norm": 0.6345282592943013, "learning_rate": 2.5938300939427352e-06, "loss": 0.01798136830329895, "step": 63200 }, { "epoch": 0.5948705882352942, "grad_norm": 0.533742534104145, "learning_rate": 2.5937274943685494e-06, "loss": 0.018607057631015778, "step": 63205 }, { "epoch": 0.5949176470588236, "grad_norm": 0.47414588926226675, "learning_rate": 2.5936249069684525e-06, "loss": 0.0170894131064415, "step": 63210 }, { "epoch": 0.594964705882353, "grad_norm": 0.4105594210226105, "learning_rate": 2.5935223317400383e-06, "loss": 0.012572243809700012, "step": 63215 }, { "epoch": 0.5950117647058824, "grad_norm": 0.5514264421242048, "learning_rate": 2.5934197686808994e-06, "loss": 0.015816539525985718, "step": 63220 }, { "epoch": 0.5950588235294118, "grad_norm": 0.32370816825652987, "learning_rate": 2.5933172177886295e-06, "loss": 0.012483199685811996, "step": 63225 }, { "epoch": 0.5951058823529412, "grad_norm": 0.6634584281127546, "learning_rate": 2.5932146790608237e-06, "loss": 0.02101667821407318, "step": 63230 }, { "epoch": 0.5951529411764706, "grad_norm": 0.3763565740490005, "learning_rate": 2.593112152495078e-06, "loss": 0.01748643070459366, "step": 63235 }, { "epoch": 0.5952, "grad_norm": 0.4571241656595459, "learning_rate": 2.5930096380889868e-06, "loss": 0.01435278207063675, "step": 63240 }, { "epoch": 0.5952470588235295, "grad_norm": 0.5801896216160226, "learning_rate": 2.592907135840148e-06, "loss": 0.015244427323341369, "step": 63245 }, { "epoch": 0.5952941176470589, "grad_norm": 1.1828200207005226, "learning_rate": 2.5928046457461587e-06, "loss": 0.01399625688791275, "step": 63250 }, { "epoch": 0.5953411764705883, "grad_norm": 0.5059979270017153, "learning_rate": 2.592702167804616e-06, "loss": 0.016058583557605744, "step": 63255 }, { "epoch": 0.5953882352941177, "grad_norm": 0.5295929587251769, "learning_rate": 2.592599702013119e-06, "loss": 0.015971457958221434, "step": 63260 }, { "epoch": 0.5954352941176471, "grad_norm": 0.6312482011117733, "learning_rate": 2.5924972483692677e-06, "loss": 0.011839792132377625, "step": 63265 }, { "epoch": 0.5954823529411765, "grad_norm": 0.39095324167904205, "learning_rate": 2.5923948068706617e-06, "loss": 0.01498015522956848, "step": 63270 }, { "epoch": 0.5955294117647059, "grad_norm": 0.6300172516459857, "learning_rate": 2.592292377514901e-06, "loss": 0.01887570470571518, "step": 63275 }, { "epoch": 0.5955764705882353, "grad_norm": 0.5411411456474471, "learning_rate": 2.5921899602995874e-06, "loss": 0.01816602945327759, "step": 63280 }, { "epoch": 0.5956235294117647, "grad_norm": 0.5540607247230667, "learning_rate": 2.5920875552223224e-06, "loss": 0.01470324695110321, "step": 63285 }, { "epoch": 0.5956705882352941, "grad_norm": 0.48100893494517727, "learning_rate": 2.5919851622807098e-06, "loss": 0.011551516503095627, "step": 63290 }, { "epoch": 0.5957176470588236, "grad_norm": 0.6984541797067589, "learning_rate": 2.591882781472351e-06, "loss": 0.018195801973342897, "step": 63295 }, { "epoch": 0.595764705882353, "grad_norm": 0.3704567894504941, "learning_rate": 2.591780412794851e-06, "loss": 0.018071132898330688, "step": 63300 }, { "epoch": 0.5958117647058824, "grad_norm": 0.44737872458492334, "learning_rate": 2.591678056245815e-06, "loss": 0.017229115962982176, "step": 63305 }, { "epoch": 0.5958588235294118, "grad_norm": 0.5342257029150415, "learning_rate": 2.5915757118228467e-06, "loss": 0.0172180637717247, "step": 63310 }, { "epoch": 0.5959058823529412, "grad_norm": 0.3686531350820366, "learning_rate": 2.591473379523554e-06, "loss": 0.012122144550085067, "step": 63315 }, { "epoch": 0.5959529411764706, "grad_norm": 0.5067676150083404, "learning_rate": 2.5913710593455416e-06, "loss": 0.016855299472808838, "step": 63320 }, { "epoch": 0.596, "grad_norm": 0.4921969662380353, "learning_rate": 2.5912687512864177e-06, "loss": 0.017641592025756835, "step": 63325 }, { "epoch": 0.5960470588235294, "grad_norm": 0.5464466986266489, "learning_rate": 2.5911664553437897e-06, "loss": 0.016595886647701265, "step": 63330 }, { "epoch": 0.5960941176470588, "grad_norm": 0.45503119977938605, "learning_rate": 2.5910641715152673e-06, "loss": 0.01625876873731613, "step": 63335 }, { "epoch": 0.5961411764705883, "grad_norm": 0.38734772733855416, "learning_rate": 2.590961899798458e-06, "loss": 0.016046901047229768, "step": 63340 }, { "epoch": 0.5961882352941177, "grad_norm": 0.38645931643588316, "learning_rate": 2.590859640190973e-06, "loss": 0.01573857069015503, "step": 63345 }, { "epoch": 0.5962352941176471, "grad_norm": 0.5108527677705731, "learning_rate": 2.5907573926904228e-06, "loss": 0.018123020231723786, "step": 63350 }, { "epoch": 0.5962823529411765, "grad_norm": 0.5266298478154776, "learning_rate": 2.590655157294418e-06, "loss": 0.015558975934982299, "step": 63355 }, { "epoch": 0.5963294117647059, "grad_norm": 0.7613606722412984, "learning_rate": 2.5905529340005704e-06, "loss": 0.016730457544326782, "step": 63360 }, { "epoch": 0.5963764705882353, "grad_norm": 0.5733532097259691, "learning_rate": 2.590450722806493e-06, "loss": 0.019876129925251007, "step": 63365 }, { "epoch": 0.5964235294117647, "grad_norm": 0.6623868640931342, "learning_rate": 2.590348523709799e-06, "loss": 0.01760093867778778, "step": 63370 }, { "epoch": 0.5964705882352941, "grad_norm": 0.47347720202458937, "learning_rate": 2.590246336708102e-06, "loss": 0.017422333359718323, "step": 63375 }, { "epoch": 0.5965176470588235, "grad_norm": 0.6086139523096069, "learning_rate": 2.5901441617990163e-06, "loss": 0.013224320113658905, "step": 63380 }, { "epoch": 0.5965647058823529, "grad_norm": 0.5386065569407742, "learning_rate": 2.590041998980158e-06, "loss": 0.012390395998954773, "step": 63385 }, { "epoch": 0.5966117647058824, "grad_norm": 0.5827547144496646, "learning_rate": 2.5899398482491417e-06, "loss": 0.01464671641588211, "step": 63390 }, { "epoch": 0.5966588235294118, "grad_norm": 0.7036819744464888, "learning_rate": 2.5898377096035847e-06, "loss": 0.01657789945602417, "step": 63395 }, { "epoch": 0.5967058823529412, "grad_norm": 0.40558027191140367, "learning_rate": 2.5897355830411035e-06, "loss": 0.01613118648529053, "step": 63400 }, { "epoch": 0.5967529411764706, "grad_norm": 0.6143999080413547, "learning_rate": 2.5896334685593166e-06, "loss": 0.01958506405353546, "step": 63405 }, { "epoch": 0.5968, "grad_norm": 0.3921463141756971, "learning_rate": 2.5895313661558418e-06, "loss": 0.014564216136932373, "step": 63410 }, { "epoch": 0.5968470588235294, "grad_norm": 0.6092413914568693, "learning_rate": 2.5894292758282993e-06, "loss": 0.018916118144989013, "step": 63415 }, { "epoch": 0.5968941176470588, "grad_norm": 0.7079255312844038, "learning_rate": 2.589327197574307e-06, "loss": 0.020099331438541413, "step": 63420 }, { "epoch": 0.5969411764705882, "grad_norm": 0.4289020177434131, "learning_rate": 2.5892251313914875e-06, "loss": 0.01970631778240204, "step": 63425 }, { "epoch": 0.5969882352941176, "grad_norm": 0.5466207552373874, "learning_rate": 2.58912307727746e-06, "loss": 0.01476505994796753, "step": 63430 }, { "epoch": 0.5970352941176471, "grad_norm": 0.656356324601701, "learning_rate": 2.5890210352298473e-06, "loss": 0.018144670128822326, "step": 63435 }, { "epoch": 0.5970823529411765, "grad_norm": 0.45989732485558416, "learning_rate": 2.5889190052462716e-06, "loss": 0.016507828235626222, "step": 63440 }, { "epoch": 0.5971294117647059, "grad_norm": 0.36968405332359694, "learning_rate": 2.588816987324355e-06, "loss": 0.011397959291934967, "step": 63445 }, { "epoch": 0.5971764705882353, "grad_norm": 0.6193935264706529, "learning_rate": 2.5887149814617235e-06, "loss": 0.02150142043828964, "step": 63450 }, { "epoch": 0.5972235294117647, "grad_norm": 0.4780929578412454, "learning_rate": 2.5886129876559994e-06, "loss": 0.015661533176898956, "step": 63455 }, { "epoch": 0.5972705882352941, "grad_norm": 0.40333483128586695, "learning_rate": 2.588511005904808e-06, "loss": 0.01595050245523453, "step": 63460 }, { "epoch": 0.5973176470588235, "grad_norm": 0.5282708168972801, "learning_rate": 2.5884090362057755e-06, "loss": 0.017444533109664918, "step": 63465 }, { "epoch": 0.5973647058823529, "grad_norm": 0.5745582578179862, "learning_rate": 2.5883070785565277e-06, "loss": 0.012423226982355118, "step": 63470 }, { "epoch": 0.5974117647058823, "grad_norm": 0.9208553915713968, "learning_rate": 2.5882051329546926e-06, "loss": 0.016461417078971863, "step": 63475 }, { "epoch": 0.5974588235294118, "grad_norm": 0.4439391738118189, "learning_rate": 2.588103199397897e-06, "loss": 0.01609441787004471, "step": 63480 }, { "epoch": 0.5975058823529412, "grad_norm": 0.3415462872975937, "learning_rate": 2.5880012778837687e-06, "loss": 0.011905533075332642, "step": 63485 }, { "epoch": 0.5975529411764706, "grad_norm": 0.6365192020217355, "learning_rate": 2.5878993684099378e-06, "loss": 0.015198540687561036, "step": 63490 }, { "epoch": 0.5976, "grad_norm": 0.5505814560437633, "learning_rate": 2.587797470974033e-06, "loss": 0.01779649257659912, "step": 63495 }, { "epoch": 0.5976470588235294, "grad_norm": 0.37431746858382786, "learning_rate": 2.587695585573685e-06, "loss": 0.0174711674451828, "step": 63500 }, { "epoch": 0.5976941176470588, "grad_norm": 0.6072341132264114, "learning_rate": 2.587593712206525e-06, "loss": 0.016186043620109558, "step": 63505 }, { "epoch": 0.5977411764705882, "grad_norm": 0.5074651225791139, "learning_rate": 2.5874918508701837e-06, "loss": 0.01385917216539383, "step": 63510 }, { "epoch": 0.5977882352941176, "grad_norm": 3.4287591688562777, "learning_rate": 2.5873900015622944e-06, "loss": 0.016545824706554413, "step": 63515 }, { "epoch": 0.597835294117647, "grad_norm": 0.44393895825919477, "learning_rate": 2.5872881642804882e-06, "loss": 0.020624765753746034, "step": 63520 }, { "epoch": 0.5978823529411764, "grad_norm": 0.6774799972757867, "learning_rate": 2.5871863390224007e-06, "loss": 0.014191633462905884, "step": 63525 }, { "epoch": 0.5979294117647059, "grad_norm": 0.44786050359272617, "learning_rate": 2.587084525785664e-06, "loss": 0.016122391819953917, "step": 63530 }, { "epoch": 0.5979764705882353, "grad_norm": 0.4322593142040689, "learning_rate": 2.5869827245679152e-06, "loss": 0.015342128276824952, "step": 63535 }, { "epoch": 0.5980235294117647, "grad_norm": 0.6198504064053703, "learning_rate": 2.5868809353667874e-06, "loss": 0.017384561896324157, "step": 63540 }, { "epoch": 0.5980705882352941, "grad_norm": 0.5286213048692489, "learning_rate": 2.5867791581799186e-06, "loss": 0.01801374852657318, "step": 63545 }, { "epoch": 0.5981176470588235, "grad_norm": 0.35624102108763084, "learning_rate": 2.5866773930049448e-06, "loss": 0.01600002348423004, "step": 63550 }, { "epoch": 0.5981647058823529, "grad_norm": 0.38615546799165607, "learning_rate": 2.586575639839503e-06, "loss": 0.016489458084106446, "step": 63555 }, { "epoch": 0.5982117647058823, "grad_norm": 0.4859512689684314, "learning_rate": 2.5864738986812323e-06, "loss": 0.013961026072502136, "step": 63560 }, { "epoch": 0.5982588235294117, "grad_norm": 0.5932927040351265, "learning_rate": 2.58637216952777e-06, "loss": 0.018904340267181397, "step": 63565 }, { "epoch": 0.5983058823529411, "grad_norm": 0.5907088218656048, "learning_rate": 2.5862704523767574e-06, "loss": 0.0177247554063797, "step": 63570 }, { "epoch": 0.5983529411764706, "grad_norm": 0.5668091231216383, "learning_rate": 2.5861687472258323e-06, "loss": 0.016546157002449036, "step": 63575 }, { "epoch": 0.5984, "grad_norm": 0.7895995084636794, "learning_rate": 2.5860670540726376e-06, "loss": 0.0176579087972641, "step": 63580 }, { "epoch": 0.5984470588235294, "grad_norm": 0.37270619715371067, "learning_rate": 2.5859653729148126e-06, "loss": 0.016863057017326356, "step": 63585 }, { "epoch": 0.5984941176470588, "grad_norm": 0.44197664994082614, "learning_rate": 2.5858637037500005e-06, "loss": 0.017951624095439912, "step": 63590 }, { "epoch": 0.5985411764705882, "grad_norm": 0.4110786841424266, "learning_rate": 2.585762046575843e-06, "loss": 0.016494274139404297, "step": 63595 }, { "epoch": 0.5985882352941176, "grad_norm": 0.5304302963005031, "learning_rate": 2.5856604013899845e-06, "loss": 0.019114047288894653, "step": 63600 }, { "epoch": 0.598635294117647, "grad_norm": 0.4031009829468789, "learning_rate": 2.5855587681900685e-06, "loss": 0.014379860460758209, "step": 63605 }, { "epoch": 0.5986823529411764, "grad_norm": 0.44132142326376633, "learning_rate": 2.5854571469737396e-06, "loss": 0.023523890972137453, "step": 63610 }, { "epoch": 0.5987294117647058, "grad_norm": 2.4186085546493064, "learning_rate": 2.5853555377386418e-06, "loss": 0.01948067992925644, "step": 63615 }, { "epoch": 0.5987764705882352, "grad_norm": 0.29983486003105553, "learning_rate": 2.5852539404824228e-06, "loss": 0.012530697882175446, "step": 63620 }, { "epoch": 0.5988235294117648, "grad_norm": 0.5245883996470118, "learning_rate": 2.585152355202728e-06, "loss": 0.015692496299743654, "step": 63625 }, { "epoch": 0.5988705882352942, "grad_norm": 0.46581225803852944, "learning_rate": 2.585050781897205e-06, "loss": 0.01737046092748642, "step": 63630 }, { "epoch": 0.5989176470588236, "grad_norm": 0.8773768256684626, "learning_rate": 2.584949220563501e-06, "loss": 0.03140591382980347, "step": 63635 }, { "epoch": 0.598964705882353, "grad_norm": 0.537633883050118, "learning_rate": 2.584847671199265e-06, "loss": 0.02075333297252655, "step": 63640 }, { "epoch": 0.5990117647058824, "grad_norm": 0.4509899734728736, "learning_rate": 2.584746133802146e-06, "loss": 0.013388414680957795, "step": 63645 }, { "epoch": 0.5990588235294118, "grad_norm": 0.35989931196432984, "learning_rate": 2.584644608369794e-06, "loss": 0.015513116121292114, "step": 63650 }, { "epoch": 0.5991058823529412, "grad_norm": 0.45415672953468716, "learning_rate": 2.5845430948998584e-06, "loss": 0.01243150383234024, "step": 63655 }, { "epoch": 0.5991529411764706, "grad_norm": 0.5318419584398136, "learning_rate": 2.5844415933899915e-06, "loss": 0.014685887098312377, "step": 63660 }, { "epoch": 0.5992, "grad_norm": 0.5536166771786603, "learning_rate": 2.5843401038378446e-06, "loss": 0.020286434888839723, "step": 63665 }, { "epoch": 0.5992470588235295, "grad_norm": 0.370500434380814, "learning_rate": 2.584238626241069e-06, "loss": 0.01586196422576904, "step": 63670 }, { "epoch": 0.5992941176470589, "grad_norm": 0.40993533850966407, "learning_rate": 2.5841371605973192e-06, "loss": 0.015602034330368043, "step": 63675 }, { "epoch": 0.5993411764705883, "grad_norm": 0.49516059107244126, "learning_rate": 2.5840357069042478e-06, "loss": 0.016938027739524842, "step": 63680 }, { "epoch": 0.5993882352941177, "grad_norm": 0.772961565550315, "learning_rate": 2.583934265159509e-06, "loss": 0.016875690221786498, "step": 63685 }, { "epoch": 0.5994352941176471, "grad_norm": 0.8694429538448768, "learning_rate": 2.5838328353607585e-06, "loss": 0.020350031554698944, "step": 63690 }, { "epoch": 0.5994823529411765, "grad_norm": 0.5324882551551455, "learning_rate": 2.583731417505651e-06, "loss": 0.020029792189598085, "step": 63695 }, { "epoch": 0.5995294117647059, "grad_norm": 0.6526225489183778, "learning_rate": 2.583630011591843e-06, "loss": 0.014926886558532715, "step": 63700 }, { "epoch": 0.5995764705882353, "grad_norm": 0.6505887580337063, "learning_rate": 2.5835286176169918e-06, "loss": 0.021700406074523927, "step": 63705 }, { "epoch": 0.5996235294117647, "grad_norm": 0.5237988646454237, "learning_rate": 2.5834272355787542e-06, "loss": 0.017272447049617768, "step": 63710 }, { "epoch": 0.5996705882352941, "grad_norm": 0.45879204174280697, "learning_rate": 2.5833258654747882e-06, "loss": 0.014514806866645812, "step": 63715 }, { "epoch": 0.5997176470588236, "grad_norm": 0.5660787869063671, "learning_rate": 2.5832245073027535e-06, "loss": 0.01996684670448303, "step": 63720 }, { "epoch": 0.599764705882353, "grad_norm": 0.6231698889442786, "learning_rate": 2.5831231610603087e-06, "loss": 0.014435988664627076, "step": 63725 }, { "epoch": 0.5998117647058824, "grad_norm": 0.45331166601460215, "learning_rate": 2.5830218267451144e-06, "loss": 0.015298232436180115, "step": 63730 }, { "epoch": 0.5998588235294118, "grad_norm": 0.38405398309683575, "learning_rate": 2.5829205043548307e-06, "loss": 0.01317574381828308, "step": 63735 }, { "epoch": 0.5999058823529412, "grad_norm": 0.31350300278669324, "learning_rate": 2.5828191938871193e-06, "loss": 0.013177651166915893, "step": 63740 }, { "epoch": 0.5999529411764706, "grad_norm": 0.5608704216298327, "learning_rate": 2.5827178953396425e-06, "loss": 0.012524551153182984, "step": 63745 }, { "epoch": 0.6, "grad_norm": 0.4966374428535141, "learning_rate": 2.582616608710062e-06, "loss": 0.014538249373435974, "step": 63750 }, { "epoch": 0.6000470588235294, "grad_norm": 0.6103904421155092, "learning_rate": 2.582515333996042e-06, "loss": 0.020881201326847076, "step": 63755 }, { "epoch": 0.6000941176470588, "grad_norm": 0.5615659290771748, "learning_rate": 2.5824140711952457e-06, "loss": 0.014831021428108215, "step": 63760 }, { "epoch": 0.6001411764705883, "grad_norm": 0.6142850165929646, "learning_rate": 2.5823128203053378e-06, "loss": 0.017927885055541992, "step": 63765 }, { "epoch": 0.6001882352941177, "grad_norm": 0.4708567207863225, "learning_rate": 2.5822115813239833e-06, "loss": 0.014707311987876892, "step": 63770 }, { "epoch": 0.6002352941176471, "grad_norm": 0.6666745827511681, "learning_rate": 2.5821103542488484e-06, "loss": 0.01694401204586029, "step": 63775 }, { "epoch": 0.6002823529411765, "grad_norm": 0.3788188719043447, "learning_rate": 2.5820091390776003e-06, "loss": 0.02007491737604141, "step": 63780 }, { "epoch": 0.6003294117647059, "grad_norm": 0.5461915744169469, "learning_rate": 2.5819079358079046e-06, "loss": 0.017225110530853273, "step": 63785 }, { "epoch": 0.6003764705882353, "grad_norm": 0.4761951446047736, "learning_rate": 2.58180674443743e-06, "loss": 0.014121577143669128, "step": 63790 }, { "epoch": 0.6004235294117647, "grad_norm": 0.49343222893462163, "learning_rate": 2.5817055649638444e-06, "loss": 0.016192206740379335, "step": 63795 }, { "epoch": 0.6004705882352941, "grad_norm": 0.5265120841024993, "learning_rate": 2.581604397384817e-06, "loss": 0.016026955842971802, "step": 63800 }, { "epoch": 0.6005176470588235, "grad_norm": 0.7204453090362821, "learning_rate": 2.581503241698018e-06, "loss": 0.017286527156829833, "step": 63805 }, { "epoch": 0.6005647058823529, "grad_norm": 0.31053421811554666, "learning_rate": 2.5814020979011163e-06, "loss": 0.013567943871021271, "step": 63810 }, { "epoch": 0.6006117647058824, "grad_norm": 0.4231870296787019, "learning_rate": 2.5813009659917847e-06, "loss": 0.014616823196411133, "step": 63815 }, { "epoch": 0.6006588235294118, "grad_norm": 0.6266013581051287, "learning_rate": 2.581199845967693e-06, "loss": 0.017632576823234557, "step": 63820 }, { "epoch": 0.6007058823529412, "grad_norm": 0.5348043308676027, "learning_rate": 2.5810987378265145e-06, "loss": 0.019988155364990233, "step": 63825 }, { "epoch": 0.6007529411764706, "grad_norm": 1.2590158709284585, "learning_rate": 2.580997641565922e-06, "loss": 0.016165727376937868, "step": 63830 }, { "epoch": 0.6008, "grad_norm": 0.45752824110868023, "learning_rate": 2.5808965571835887e-06, "loss": 0.015201614797115326, "step": 63835 }, { "epoch": 0.6008470588235294, "grad_norm": 0.6163658724003805, "learning_rate": 2.580795484677189e-06, "loss": 0.016124650835990906, "step": 63840 }, { "epoch": 0.6008941176470588, "grad_norm": 0.35661671458816374, "learning_rate": 2.580694424044397e-06, "loss": 0.019986534118652345, "step": 63845 }, { "epoch": 0.6009411764705882, "grad_norm": 0.7072232427982541, "learning_rate": 2.580593375282889e-06, "loss": 0.02028132677078247, "step": 63850 }, { "epoch": 0.6009882352941176, "grad_norm": 0.458935931134711, "learning_rate": 2.5804923383903404e-06, "loss": 0.01861926317214966, "step": 63855 }, { "epoch": 0.6010352941176471, "grad_norm": 0.7133336043124672, "learning_rate": 2.5803913133644283e-06, "loss": 0.01783497482538223, "step": 63860 }, { "epoch": 0.6010823529411765, "grad_norm": 0.7488869100833249, "learning_rate": 2.5802903002028297e-06, "loss": 0.017269918322563173, "step": 63865 }, { "epoch": 0.6011294117647059, "grad_norm": 0.5068259927025881, "learning_rate": 2.5801892989032224e-06, "loss": 0.015677788853645326, "step": 63870 }, { "epoch": 0.6011764705882353, "grad_norm": 0.4724749815644894, "learning_rate": 2.5800883094632854e-06, "loss": 0.019713971018791198, "step": 63875 }, { "epoch": 0.6012235294117647, "grad_norm": 0.43413523285518907, "learning_rate": 2.579987331880698e-06, "loss": 0.018013110756874083, "step": 63880 }, { "epoch": 0.6012705882352941, "grad_norm": 0.4002211316077818, "learning_rate": 2.5798863661531397e-06, "loss": 0.01496785283088684, "step": 63885 }, { "epoch": 0.6013176470588235, "grad_norm": 0.4976010122267283, "learning_rate": 2.5797854122782913e-06, "loss": 0.013327272236347198, "step": 63890 }, { "epoch": 0.6013647058823529, "grad_norm": 0.43125751763250947, "learning_rate": 2.5796844702538336e-06, "loss": 0.012849536538124085, "step": 63895 }, { "epoch": 0.6014117647058823, "grad_norm": 0.5604327835752179, "learning_rate": 2.5795835400774487e-06, "loss": 0.012115374207496643, "step": 63900 }, { "epoch": 0.6014588235294117, "grad_norm": 0.7712759831648038, "learning_rate": 2.5794826217468188e-06, "loss": 0.015671122074127197, "step": 63905 }, { "epoch": 0.6015058823529412, "grad_norm": 0.37059149512015077, "learning_rate": 2.5793817152596267e-06, "loss": 0.017192734777927397, "step": 63910 }, { "epoch": 0.6015529411764706, "grad_norm": 0.494145439826746, "learning_rate": 2.5792808206135567e-06, "loss": 0.016604670882225038, "step": 63915 }, { "epoch": 0.6016, "grad_norm": 0.7572073087502933, "learning_rate": 2.579179937806293e-06, "loss": 0.02229522615671158, "step": 63920 }, { "epoch": 0.6016470588235294, "grad_norm": 0.8344294862932113, "learning_rate": 2.5790790668355197e-06, "loss": 0.018104077875614168, "step": 63925 }, { "epoch": 0.6016941176470588, "grad_norm": 0.8798946802230805, "learning_rate": 2.5789782076989233e-06, "loss": 0.016191671788692474, "step": 63930 }, { "epoch": 0.6017411764705882, "grad_norm": 0.5063364671009527, "learning_rate": 2.5788773603941897e-06, "loss": 0.01283249706029892, "step": 63935 }, { "epoch": 0.6017882352941176, "grad_norm": 0.43507435872868905, "learning_rate": 2.5787765249190055e-06, "loss": 0.017262348532676698, "step": 63940 }, { "epoch": 0.601835294117647, "grad_norm": 0.25499928854275977, "learning_rate": 2.5786757012710584e-06, "loss": 0.01620977818965912, "step": 63945 }, { "epoch": 0.6018823529411764, "grad_norm": 0.45698406197739, "learning_rate": 2.578574889448037e-06, "loss": 0.016590334475040436, "step": 63950 }, { "epoch": 0.6019294117647059, "grad_norm": 2.1336186795234746, "learning_rate": 2.578474089447629e-06, "loss": 0.02055491805076599, "step": 63955 }, { "epoch": 0.6019764705882353, "grad_norm": 0.3620622030053893, "learning_rate": 2.5783733012675245e-06, "loss": 0.016793321073055267, "step": 63960 }, { "epoch": 0.6020235294117647, "grad_norm": 0.37682967463505934, "learning_rate": 2.5782725249054136e-06, "loss": 0.01586424559354782, "step": 63965 }, { "epoch": 0.6020705882352941, "grad_norm": 0.7568853072719196, "learning_rate": 2.5781717603589863e-06, "loss": 0.01831088066101074, "step": 63970 }, { "epoch": 0.6021176470588235, "grad_norm": 0.476638538620976, "learning_rate": 2.578071007625934e-06, "loss": 0.016582566499710082, "step": 63975 }, { "epoch": 0.6021647058823529, "grad_norm": 0.5107042475662484, "learning_rate": 2.5779702667039484e-06, "loss": 0.017561350762844086, "step": 63980 }, { "epoch": 0.6022117647058823, "grad_norm": 0.5640582261739786, "learning_rate": 2.5778695375907236e-06, "loss": 0.016667839884757996, "step": 63985 }, { "epoch": 0.6022588235294117, "grad_norm": 0.5300388945122302, "learning_rate": 2.5777688202839505e-06, "loss": 0.019888685643672945, "step": 63990 }, { "epoch": 0.6023058823529411, "grad_norm": 0.48778014937760594, "learning_rate": 2.577668114781325e-06, "loss": 0.012439396977424622, "step": 63995 }, { "epoch": 0.6023529411764705, "grad_norm": 0.5462563375776067, "learning_rate": 2.57756742108054e-06, "loss": 0.01945834457874298, "step": 64000 }, { "epoch": 0.6024, "grad_norm": 0.5799121268119602, "learning_rate": 2.577466739179291e-06, "loss": 0.017574962973594666, "step": 64005 }, { "epoch": 0.6024470588235294, "grad_norm": 0.9278899384708253, "learning_rate": 2.577366069075274e-06, "loss": 0.018359151482582093, "step": 64010 }, { "epoch": 0.6024941176470588, "grad_norm": 0.5468669973859132, "learning_rate": 2.577265410766185e-06, "loss": 0.018340131640434264, "step": 64015 }, { "epoch": 0.6025411764705882, "grad_norm": 0.5919139790231653, "learning_rate": 2.5771647642497207e-06, "loss": 0.02013176679611206, "step": 64020 }, { "epoch": 0.6025882352941176, "grad_norm": 0.4791427613530273, "learning_rate": 2.5770641295235795e-06, "loss": 0.015632212162017822, "step": 64025 }, { "epoch": 0.602635294117647, "grad_norm": 0.6156749632665358, "learning_rate": 2.576963506585458e-06, "loss": 0.018895845115184783, "step": 64030 }, { "epoch": 0.6026823529411764, "grad_norm": 0.5564461213986277, "learning_rate": 2.5768628954330566e-06, "loss": 0.016372019052505495, "step": 64035 }, { "epoch": 0.6027294117647058, "grad_norm": 0.6586596870870484, "learning_rate": 2.5767622960640747e-06, "loss": 0.016599631309509276, "step": 64040 }, { "epoch": 0.6027764705882352, "grad_norm": 0.4635293818513981, "learning_rate": 2.576661708476212e-06, "loss": 0.017618721723556517, "step": 64045 }, { "epoch": 0.6028235294117648, "grad_norm": 0.5419868278073091, "learning_rate": 2.5765611326671685e-06, "loss": 0.016293753683567048, "step": 64050 }, { "epoch": 0.6028705882352942, "grad_norm": 0.5210820681118605, "learning_rate": 2.5764605686346466e-06, "loss": 0.01716000437736511, "step": 64055 }, { "epoch": 0.6029176470588236, "grad_norm": 1.2662313499339521, "learning_rate": 2.576360016376348e-06, "loss": 0.01405729353427887, "step": 64060 }, { "epoch": 0.602964705882353, "grad_norm": 0.413247169584979, "learning_rate": 2.5762594758899753e-06, "loss": 0.01376970112323761, "step": 64065 }, { "epoch": 0.6030117647058824, "grad_norm": 0.46376522314610763, "learning_rate": 2.5761589471732308e-06, "loss": 0.01682364344596863, "step": 64070 }, { "epoch": 0.6030588235294118, "grad_norm": 0.43389066548091215, "learning_rate": 2.5760584302238195e-06, "loss": 0.01687772572040558, "step": 64075 }, { "epoch": 0.6031058823529412, "grad_norm": 0.7026925222204644, "learning_rate": 2.575957925039446e-06, "loss": 0.017854000627994537, "step": 64080 }, { "epoch": 0.6031529411764706, "grad_norm": 0.4872130690449563, "learning_rate": 2.575857431617815e-06, "loss": 0.01551910638809204, "step": 64085 }, { "epoch": 0.6032, "grad_norm": 0.6057034440675221, "learning_rate": 2.5757569499566314e-06, "loss": 0.01639838218688965, "step": 64090 }, { "epoch": 0.6032470588235294, "grad_norm": 0.5330822278482203, "learning_rate": 2.5756564800536026e-06, "loss": 0.016022756695747375, "step": 64095 }, { "epoch": 0.6032941176470589, "grad_norm": 0.5207409800378856, "learning_rate": 2.5755560219064353e-06, "loss": 0.014964358508586883, "step": 64100 }, { "epoch": 0.6033411764705883, "grad_norm": 0.3642049176383334, "learning_rate": 2.5754555755128374e-06, "loss": 0.014930589497089386, "step": 64105 }, { "epoch": 0.6033882352941177, "grad_norm": 0.2653875718565457, "learning_rate": 2.5753551408705168e-06, "loss": 0.015167054533958436, "step": 64110 }, { "epoch": 0.6034352941176471, "grad_norm": 0.7043438907360724, "learning_rate": 2.5752547179771826e-06, "loss": 0.01919420063495636, "step": 64115 }, { "epoch": 0.6034823529411765, "grad_norm": 0.3278539444622586, "learning_rate": 2.575154306830544e-06, "loss": 0.015553233027458192, "step": 64120 }, { "epoch": 0.6035294117647059, "grad_norm": 0.3900197870655657, "learning_rate": 2.575053907428311e-06, "loss": 0.01631039083003998, "step": 64125 }, { "epoch": 0.6035764705882353, "grad_norm": 0.528040771492148, "learning_rate": 2.5749535197681945e-06, "loss": 0.017892053723335265, "step": 64130 }, { "epoch": 0.6036235294117647, "grad_norm": 0.7303487955474048, "learning_rate": 2.5748531438479067e-06, "loss": 0.014085891842842101, "step": 64135 }, { "epoch": 0.6036705882352941, "grad_norm": 0.4706091856006466, "learning_rate": 2.5747527796651583e-06, "loss": 0.012312337756156921, "step": 64140 }, { "epoch": 0.6037176470588236, "grad_norm": 0.5171234737848032, "learning_rate": 2.5746524272176625e-06, "loss": 0.015757866203784943, "step": 64145 }, { "epoch": 0.603764705882353, "grad_norm": 0.4987741086390307, "learning_rate": 2.5745520865031324e-06, "loss": 0.017998068034648894, "step": 64150 }, { "epoch": 0.6038117647058824, "grad_norm": 0.3861481385802006, "learning_rate": 2.5744517575192823e-06, "loss": 0.016014339029788972, "step": 64155 }, { "epoch": 0.6038588235294118, "grad_norm": 0.6235864267805095, "learning_rate": 2.5743514402638265e-06, "loss": 0.015158230066299438, "step": 64160 }, { "epoch": 0.6039058823529412, "grad_norm": 0.37193810492019114, "learning_rate": 2.5742511347344797e-06, "loss": 0.015161843597888946, "step": 64165 }, { "epoch": 0.6039529411764706, "grad_norm": 0.41413463954845103, "learning_rate": 2.5741508409289574e-06, "loss": 0.013911169767379761, "step": 64170 }, { "epoch": 0.604, "grad_norm": 0.49001811295540615, "learning_rate": 2.5740505588449764e-06, "loss": 0.015368252992630005, "step": 64175 }, { "epoch": 0.6040470588235294, "grad_norm": 0.34270034480669154, "learning_rate": 2.573950288480254e-06, "loss": 0.013553552329540253, "step": 64180 }, { "epoch": 0.6040941176470588, "grad_norm": 0.5667229934113021, "learning_rate": 2.573850029832508e-06, "loss": 0.01706046313047409, "step": 64185 }, { "epoch": 0.6041411764705882, "grad_norm": 0.716054580599044, "learning_rate": 2.5737497828994562e-06, "loss": 0.015899647772312165, "step": 64190 }, { "epoch": 0.6041882352941177, "grad_norm": 0.5231864095816326, "learning_rate": 2.5736495476788165e-06, "loss": 0.01524306833744049, "step": 64195 }, { "epoch": 0.6042352941176471, "grad_norm": 0.524426455842851, "learning_rate": 2.5735493241683103e-06, "loss": 0.021494978666305543, "step": 64200 }, { "epoch": 0.6042823529411765, "grad_norm": 0.6674125484369785, "learning_rate": 2.5734491123656557e-06, "loss": 0.01550690233707428, "step": 64205 }, { "epoch": 0.6043294117647059, "grad_norm": 0.9118272470726121, "learning_rate": 2.573348912268575e-06, "loss": 0.013758224248886109, "step": 64210 }, { "epoch": 0.6043764705882353, "grad_norm": 0.4533961756698545, "learning_rate": 2.573248723874788e-06, "loss": 0.01670551747083664, "step": 64215 }, { "epoch": 0.6044235294117647, "grad_norm": 0.4683202820214901, "learning_rate": 2.5731485471820185e-06, "loss": 0.014783002436161041, "step": 64220 }, { "epoch": 0.6044705882352941, "grad_norm": 0.4413524502732382, "learning_rate": 2.5730483821879877e-06, "loss": 0.013919402658939362, "step": 64225 }, { "epoch": 0.6045176470588235, "grad_norm": 0.46440204472676944, "learning_rate": 2.572948228890419e-06, "loss": 0.013351543247699738, "step": 64230 }, { "epoch": 0.6045647058823529, "grad_norm": 0.45268643712226403, "learning_rate": 2.5728480872870364e-06, "loss": 0.02453215718269348, "step": 64235 }, { "epoch": 0.6046117647058824, "grad_norm": 0.5115394168756556, "learning_rate": 2.572747957375565e-06, "loss": 0.017535784840583803, "step": 64240 }, { "epoch": 0.6046588235294118, "grad_norm": 0.3846183629081558, "learning_rate": 2.5726478391537286e-06, "loss": 0.015823233127593993, "step": 64245 }, { "epoch": 0.6047058823529412, "grad_norm": 0.726575130742689, "learning_rate": 2.5725477326192534e-06, "loss": 0.015870994329452513, "step": 64250 }, { "epoch": 0.6047529411764706, "grad_norm": 0.49999028827220987, "learning_rate": 2.5724476377698657e-06, "loss": 0.01669093668460846, "step": 64255 }, { "epoch": 0.6048, "grad_norm": 0.5475414801986132, "learning_rate": 2.5723475546032927e-06, "loss": 0.015226760506629944, "step": 64260 }, { "epoch": 0.6048470588235294, "grad_norm": 0.4423293787992264, "learning_rate": 2.5722474831172616e-06, "loss": 0.015037184953689576, "step": 64265 }, { "epoch": 0.6048941176470588, "grad_norm": 0.4981796732803354, "learning_rate": 2.5721474233095013e-06, "loss": 0.02165898382663727, "step": 64270 }, { "epoch": 0.6049411764705882, "grad_norm": 0.4148232563233309, "learning_rate": 2.572047375177739e-06, "loss": 0.014958053827285767, "step": 64275 }, { "epoch": 0.6049882352941176, "grad_norm": 0.32238102110211303, "learning_rate": 2.571947338719705e-06, "loss": 0.014665815234184264, "step": 64280 }, { "epoch": 0.605035294117647, "grad_norm": 0.44237332060595447, "learning_rate": 2.5718473139331297e-06, "loss": 0.016032376885414125, "step": 64285 }, { "epoch": 0.6050823529411765, "grad_norm": 0.603731436865389, "learning_rate": 2.5717473008157435e-06, "loss": 0.016901457309722902, "step": 64290 }, { "epoch": 0.6051294117647059, "grad_norm": 0.569339916105302, "learning_rate": 2.571647299365277e-06, "loss": 0.014802385866641999, "step": 64295 }, { "epoch": 0.6051764705882353, "grad_norm": 0.5455772461651971, "learning_rate": 2.571547309579463e-06, "loss": 0.015711158514022827, "step": 64300 }, { "epoch": 0.6052235294117647, "grad_norm": 0.3733555752481869, "learning_rate": 2.5714473314560327e-06, "loss": 0.01497965008020401, "step": 64305 }, { "epoch": 0.6052705882352941, "grad_norm": 0.4047664147088265, "learning_rate": 2.5713473649927206e-06, "loss": 0.01607903391122818, "step": 64310 }, { "epoch": 0.6053176470588235, "grad_norm": 0.4659060511405547, "learning_rate": 2.57124741018726e-06, "loss": 0.013093426823616028, "step": 64315 }, { "epoch": 0.6053647058823529, "grad_norm": 0.7926848162897295, "learning_rate": 2.5711474670373844e-06, "loss": 0.019773051142692566, "step": 64320 }, { "epoch": 0.6054117647058823, "grad_norm": 0.49145725138873203, "learning_rate": 2.5710475355408295e-06, "loss": 0.016345056891441345, "step": 64325 }, { "epoch": 0.6054588235294117, "grad_norm": 0.4541450322356155, "learning_rate": 2.5709476156953305e-06, "loss": 0.012293905019760132, "step": 64330 }, { "epoch": 0.6055058823529412, "grad_norm": 0.4142368512531632, "learning_rate": 2.5708477074986245e-06, "loss": 0.017908433079719545, "step": 64335 }, { "epoch": 0.6055529411764706, "grad_norm": 0.6026484832204413, "learning_rate": 2.5707478109484467e-06, "loss": 0.01605443060398102, "step": 64340 }, { "epoch": 0.6056, "grad_norm": 0.4375919009893348, "learning_rate": 2.570647926042536e-06, "loss": 0.013547666370868683, "step": 64345 }, { "epoch": 0.6056470588235294, "grad_norm": 0.4439482093087157, "learning_rate": 2.5705480527786296e-06, "loss": 0.01586916744709015, "step": 64350 }, { "epoch": 0.6056941176470588, "grad_norm": 0.4222870042993501, "learning_rate": 2.5704481911544658e-06, "loss": 0.013462141156196594, "step": 64355 }, { "epoch": 0.6057411764705882, "grad_norm": 0.45728801167290484, "learning_rate": 2.570348341167786e-06, "loss": 0.015571627020835876, "step": 64360 }, { "epoch": 0.6057882352941176, "grad_norm": 0.49660065623139904, "learning_rate": 2.570248502816327e-06, "loss": 0.01705416142940521, "step": 64365 }, { "epoch": 0.605835294117647, "grad_norm": 0.5636046738392321, "learning_rate": 2.570148676097831e-06, "loss": 0.016203463077545166, "step": 64370 }, { "epoch": 0.6058823529411764, "grad_norm": 0.6975262181125585, "learning_rate": 2.5700488610100387e-06, "loss": 0.018375489115715026, "step": 64375 }, { "epoch": 0.6059294117647058, "grad_norm": 0.4272727814692135, "learning_rate": 2.5699490575506923e-06, "loss": 0.018928234279155732, "step": 64380 }, { "epoch": 0.6059764705882353, "grad_norm": 0.34674167733228484, "learning_rate": 2.5698492657175337e-06, "loss": 0.009732285141944885, "step": 64385 }, { "epoch": 0.6060235294117647, "grad_norm": 0.5162428620309227, "learning_rate": 2.5697494855083053e-06, "loss": 0.014689092338085175, "step": 64390 }, { "epoch": 0.6060705882352941, "grad_norm": 0.35354000791891405, "learning_rate": 2.5696497169207518e-06, "loss": 0.020183254778385163, "step": 64395 }, { "epoch": 0.6061176470588235, "grad_norm": 0.5543903188853764, "learning_rate": 2.569549959952617e-06, "loss": 0.01910674273967743, "step": 64400 }, { "epoch": 0.6061647058823529, "grad_norm": 0.42200842551885637, "learning_rate": 2.5694502146016455e-06, "loss": 0.01711236834526062, "step": 64405 }, { "epoch": 0.6062117647058823, "grad_norm": 0.648483185560002, "learning_rate": 2.5693504808655824e-06, "loss": 0.016720396280288697, "step": 64410 }, { "epoch": 0.6062588235294117, "grad_norm": 0.6192032115077626, "learning_rate": 2.5692507587421732e-06, "loss": 0.018860429525375366, "step": 64415 }, { "epoch": 0.6063058823529411, "grad_norm": 0.49801631247095235, "learning_rate": 2.5691510482291667e-06, "loss": 0.01452031433582306, "step": 64420 }, { "epoch": 0.6063529411764705, "grad_norm": 0.6349361079071252, "learning_rate": 2.5690513493243074e-06, "loss": 0.01846672296524048, "step": 64425 }, { "epoch": 0.6064, "grad_norm": 0.9390766738153814, "learning_rate": 2.568951662025345e-06, "loss": 0.02459903210401535, "step": 64430 }, { "epoch": 0.6064470588235295, "grad_norm": 0.45691525905900804, "learning_rate": 2.5688519863300275e-06, "loss": 0.017299866676330565, "step": 64435 }, { "epoch": 0.6064941176470589, "grad_norm": 0.6062673487256959, "learning_rate": 2.568752322236103e-06, "loss": 0.012065254151821136, "step": 64440 }, { "epoch": 0.6065411764705883, "grad_norm": 0.4849698126617524, "learning_rate": 2.5686526697413235e-06, "loss": 0.020620056986808778, "step": 64445 }, { "epoch": 0.6065882352941177, "grad_norm": 0.5692110282580034, "learning_rate": 2.568553028843436e-06, "loss": 0.021291431784629822, "step": 64450 }, { "epoch": 0.606635294117647, "grad_norm": 0.33553592269328314, "learning_rate": 2.568453399540195e-06, "loss": 0.017184911668300627, "step": 64455 }, { "epoch": 0.6066823529411765, "grad_norm": 0.3213691253868584, "learning_rate": 2.5683537818293484e-06, "loss": 0.01579018235206604, "step": 64460 }, { "epoch": 0.6067294117647059, "grad_norm": 0.7799105707763636, "learning_rate": 2.5682541757086514e-06, "loss": 0.016050371527671813, "step": 64465 }, { "epoch": 0.6067764705882353, "grad_norm": 0.5896134829982105, "learning_rate": 2.568154581175855e-06, "loss": 0.01821815371513367, "step": 64470 }, { "epoch": 0.6068235294117647, "grad_norm": 0.5213863132417318, "learning_rate": 2.5680549982287133e-06, "loss": 0.015594516694545747, "step": 64475 }, { "epoch": 0.6068705882352942, "grad_norm": 0.6159518916372524, "learning_rate": 2.567955426864979e-06, "loss": 0.013432884216308593, "step": 64480 }, { "epoch": 0.6069176470588236, "grad_norm": 0.5197179729252185, "learning_rate": 2.5678558670824084e-06, "loss": 0.01595742404460907, "step": 64485 }, { "epoch": 0.606964705882353, "grad_norm": 0.4510346963086347, "learning_rate": 2.567756318878756e-06, "loss": 0.013958612084388733, "step": 64490 }, { "epoch": 0.6070117647058824, "grad_norm": 0.7694656906244817, "learning_rate": 2.567656782251777e-06, "loss": 0.016088196635246278, "step": 64495 }, { "epoch": 0.6070588235294118, "grad_norm": 0.3539919137181278, "learning_rate": 2.5675572571992273e-06, "loss": 0.014418050646781921, "step": 64500 }, { "epoch": 0.6071058823529412, "grad_norm": 0.46377688877291495, "learning_rate": 2.567457743718866e-06, "loss": 0.014747941493988037, "step": 64505 }, { "epoch": 0.6071529411764706, "grad_norm": 0.5722326565610522, "learning_rate": 2.5673582418084487e-06, "loss": 0.01566547155380249, "step": 64510 }, { "epoch": 0.6072, "grad_norm": 0.40268636213310693, "learning_rate": 2.5672587514657342e-06, "loss": 0.014962723851203919, "step": 64515 }, { "epoch": 0.6072470588235294, "grad_norm": 0.4741242043182923, "learning_rate": 2.567159272688482e-06, "loss": 0.01544651985168457, "step": 64520 }, { "epoch": 0.6072941176470589, "grad_norm": 0.6297346170153186, "learning_rate": 2.5670598054744513e-06, "loss": 0.016143418848514557, "step": 64525 }, { "epoch": 0.6073411764705883, "grad_norm": 0.39627011555031494, "learning_rate": 2.5669603498214007e-06, "loss": 0.013838151097297668, "step": 64530 }, { "epoch": 0.6073882352941177, "grad_norm": 0.5683633355408723, "learning_rate": 2.5668609057270926e-06, "loss": 0.015617820620536804, "step": 64535 }, { "epoch": 0.6074352941176471, "grad_norm": 0.727359471944759, "learning_rate": 2.5667614731892878e-06, "loss": 0.014574989676475525, "step": 64540 }, { "epoch": 0.6074823529411765, "grad_norm": 0.6502833721235053, "learning_rate": 2.566662052205748e-06, "loss": 0.020569178462028503, "step": 64545 }, { "epoch": 0.6075294117647059, "grad_norm": 0.4917596349350289, "learning_rate": 2.566562642774235e-06, "loss": 0.014334698021411896, "step": 64550 }, { "epoch": 0.6075764705882353, "grad_norm": 0.34545601183749275, "learning_rate": 2.566463244892513e-06, "loss": 0.01562730371952057, "step": 64555 }, { "epoch": 0.6076235294117647, "grad_norm": 0.4877552511712374, "learning_rate": 2.566363858558345e-06, "loss": 0.019069579243659974, "step": 64560 }, { "epoch": 0.6076705882352941, "grad_norm": 0.5579348830152396, "learning_rate": 2.566264483769495e-06, "loss": 0.018376679718494417, "step": 64565 }, { "epoch": 0.6077176470588236, "grad_norm": 0.4111904090796223, "learning_rate": 2.5661651205237287e-06, "loss": 0.015444803237915038, "step": 64570 }, { "epoch": 0.607764705882353, "grad_norm": 0.6241775965264625, "learning_rate": 2.5660657688188113e-06, "loss": 0.01685154438018799, "step": 64575 }, { "epoch": 0.6078117647058824, "grad_norm": 0.3933284173710925, "learning_rate": 2.5659664286525087e-06, "loss": 0.013263781368732453, "step": 64580 }, { "epoch": 0.6078588235294118, "grad_norm": 0.4316806819441337, "learning_rate": 2.5658671000225878e-06, "loss": 0.01794385015964508, "step": 64585 }, { "epoch": 0.6079058823529412, "grad_norm": 0.6867660965447775, "learning_rate": 2.5657677829268157e-06, "loss": 0.01742987334728241, "step": 64590 }, { "epoch": 0.6079529411764706, "grad_norm": 0.8020577127608339, "learning_rate": 2.5656684773629597e-06, "loss": 0.021624734997749327, "step": 64595 }, { "epoch": 0.608, "grad_norm": 0.4732902984624611, "learning_rate": 2.56556918332879e-06, "loss": 0.021058610081672667, "step": 64600 }, { "epoch": 0.6080470588235294, "grad_norm": 0.41248800958742926, "learning_rate": 2.565469900822074e-06, "loss": 0.016000697016716005, "step": 64605 }, { "epoch": 0.6080941176470588, "grad_norm": 0.8605437025910224, "learning_rate": 2.565370629840583e-06, "loss": 0.019753751158714295, "step": 64610 }, { "epoch": 0.6081411764705882, "grad_norm": 0.6440268069041792, "learning_rate": 2.565271370382086e-06, "loss": 0.01669364869594574, "step": 64615 }, { "epoch": 0.6081882352941177, "grad_norm": 0.6156650466668493, "learning_rate": 2.5651721224443542e-06, "loss": 0.015481115877628326, "step": 64620 }, { "epoch": 0.6082352941176471, "grad_norm": 0.398806795075903, "learning_rate": 2.56507288602516e-06, "loss": 0.012980538606643676, "step": 64625 }, { "epoch": 0.6082823529411765, "grad_norm": 0.5937329929999415, "learning_rate": 2.5649736611222744e-06, "loss": 0.015597623586654664, "step": 64630 }, { "epoch": 0.6083294117647059, "grad_norm": 0.5915991284263041, "learning_rate": 2.5648744477334703e-06, "loss": 0.018609923124313355, "step": 64635 }, { "epoch": 0.6083764705882353, "grad_norm": 0.6878037647376689, "learning_rate": 2.5647752458565217e-06, "loss": 0.015196411311626435, "step": 64640 }, { "epoch": 0.6084235294117647, "grad_norm": 0.3450123232759049, "learning_rate": 2.5646760554892024e-06, "loss": 0.01681787073612213, "step": 64645 }, { "epoch": 0.6084705882352941, "grad_norm": 0.5426870555299826, "learning_rate": 2.5645768766292866e-06, "loss": 0.019799670577049254, "step": 64650 }, { "epoch": 0.6085176470588235, "grad_norm": 0.4078390953228435, "learning_rate": 2.5644777092745497e-06, "loss": 0.016261029243469238, "step": 64655 }, { "epoch": 0.6085647058823529, "grad_norm": 0.5183528143901005, "learning_rate": 2.5643785534227673e-06, "loss": 0.016986650228500367, "step": 64660 }, { "epoch": 0.6086117647058824, "grad_norm": 0.5025782525704117, "learning_rate": 2.564279409071716e-06, "loss": 0.023930469155311586, "step": 64665 }, { "epoch": 0.6086588235294118, "grad_norm": 0.8497871666402471, "learning_rate": 2.564180276219172e-06, "loss": 0.017269407212734223, "step": 64670 }, { "epoch": 0.6087058823529412, "grad_norm": 0.5656430952500517, "learning_rate": 2.564081154862914e-06, "loss": 0.03501828610897064, "step": 64675 }, { "epoch": 0.6087529411764706, "grad_norm": 0.3724455257009697, "learning_rate": 2.5639820450007195e-06, "loss": 0.01567908376455307, "step": 64680 }, { "epoch": 0.6088, "grad_norm": 0.45876395264596315, "learning_rate": 2.5638829466303677e-06, "loss": 0.012055899202823638, "step": 64685 }, { "epoch": 0.6088470588235294, "grad_norm": 0.4679584458959983, "learning_rate": 2.5637838597496368e-06, "loss": 0.014849381148815155, "step": 64690 }, { "epoch": 0.6088941176470588, "grad_norm": 0.3628992120104068, "learning_rate": 2.563684784356308e-06, "loss": 0.01829257160425186, "step": 64695 }, { "epoch": 0.6089411764705882, "grad_norm": 0.5188656947271536, "learning_rate": 2.563585720448162e-06, "loss": 0.013041028380393982, "step": 64700 }, { "epoch": 0.6089882352941176, "grad_norm": 0.5188938800646783, "learning_rate": 2.5634866680229788e-06, "loss": 0.013498325645923615, "step": 64705 }, { "epoch": 0.609035294117647, "grad_norm": 0.433231779465776, "learning_rate": 2.5633876270785407e-06, "loss": 0.015766346454620363, "step": 64710 }, { "epoch": 0.6090823529411765, "grad_norm": 0.6016689379493347, "learning_rate": 2.5632885976126303e-06, "loss": 0.017808327078819276, "step": 64715 }, { "epoch": 0.6091294117647059, "grad_norm": 0.5108276658709359, "learning_rate": 2.563189579623031e-06, "loss": 0.016498014330863953, "step": 64720 }, { "epoch": 0.6091764705882353, "grad_norm": 0.4263195811006028, "learning_rate": 2.563090573107525e-06, "loss": 0.013622009754180908, "step": 64725 }, { "epoch": 0.6092235294117647, "grad_norm": 0.37849016921228623, "learning_rate": 2.5629915780638976e-06, "loss": 0.013495172560214996, "step": 64730 }, { "epoch": 0.6092705882352941, "grad_norm": 0.6509070775501826, "learning_rate": 2.562892594489933e-06, "loss": 0.02104692906141281, "step": 64735 }, { "epoch": 0.6093176470588235, "grad_norm": 0.5765183411686889, "learning_rate": 2.5627936223834173e-06, "loss": 0.01792098879814148, "step": 64740 }, { "epoch": 0.6093647058823529, "grad_norm": 0.5135554657431891, "learning_rate": 2.5626946617421356e-06, "loss": 0.015756426751613616, "step": 64745 }, { "epoch": 0.6094117647058823, "grad_norm": 0.5366971304984839, "learning_rate": 2.5625957125638744e-06, "loss": 0.012676747143268585, "step": 64750 }, { "epoch": 0.6094588235294117, "grad_norm": 0.5192150630979562, "learning_rate": 2.5624967748464214e-06, "loss": 0.015073937177658082, "step": 64755 }, { "epoch": 0.6095058823529412, "grad_norm": 0.37291484919039597, "learning_rate": 2.5623978485875644e-06, "loss": 0.0155254065990448, "step": 64760 }, { "epoch": 0.6095529411764706, "grad_norm": 0.42407541125163717, "learning_rate": 2.562298933785092e-06, "loss": 0.01833013892173767, "step": 64765 }, { "epoch": 0.6096, "grad_norm": 0.3922977088008215, "learning_rate": 2.5622000304367917e-06, "loss": 0.015321749448776244, "step": 64770 }, { "epoch": 0.6096470588235294, "grad_norm": 0.8222429895848485, "learning_rate": 2.562101138540454e-06, "loss": 0.022186245024204253, "step": 64775 }, { "epoch": 0.6096941176470588, "grad_norm": 0.5381045418960705, "learning_rate": 2.5620022580938696e-06, "loss": 0.017511266469955444, "step": 64780 }, { "epoch": 0.6097411764705882, "grad_norm": 0.32635320145301194, "learning_rate": 2.561903389094829e-06, "loss": 0.012055546045303345, "step": 64785 }, { "epoch": 0.6097882352941176, "grad_norm": 0.5419116917543302, "learning_rate": 2.5618045315411226e-06, "loss": 0.02050090879201889, "step": 64790 }, { "epoch": 0.609835294117647, "grad_norm": 0.5146607259602894, "learning_rate": 2.561705685430543e-06, "loss": 0.02125793695449829, "step": 64795 }, { "epoch": 0.6098823529411764, "grad_norm": 0.5336272606749201, "learning_rate": 2.561606850760883e-06, "loss": 0.017043735086917877, "step": 64800 }, { "epoch": 0.6099294117647058, "grad_norm": 0.5627767513085488, "learning_rate": 2.561508027529936e-06, "loss": 0.016043031215667726, "step": 64805 }, { "epoch": 0.6099764705882353, "grad_norm": 0.3097015717997527, "learning_rate": 2.5614092157354946e-06, "loss": 0.011251789331436158, "step": 64810 }, { "epoch": 0.6100235294117647, "grad_norm": 0.7747332267632361, "learning_rate": 2.561310415375354e-06, "loss": 0.02071356773376465, "step": 64815 }, { "epoch": 0.6100705882352941, "grad_norm": 0.4021553144748036, "learning_rate": 2.5612116264473085e-06, "loss": 0.014151394367218018, "step": 64820 }, { "epoch": 0.6101176470588235, "grad_norm": 0.5325777772755591, "learning_rate": 2.5611128489491537e-06, "loss": 0.01844901144504547, "step": 64825 }, { "epoch": 0.6101647058823529, "grad_norm": 0.5289458128754805, "learning_rate": 2.5610140828786863e-06, "loss": 0.01692478358745575, "step": 64830 }, { "epoch": 0.6102117647058823, "grad_norm": 0.5399707810533984, "learning_rate": 2.5609153282337024e-06, "loss": 0.014797519147396087, "step": 64835 }, { "epoch": 0.6102588235294117, "grad_norm": 0.5915324887780065, "learning_rate": 2.560816585011999e-06, "loss": 0.012888506054878235, "step": 64840 }, { "epoch": 0.6103058823529411, "grad_norm": 0.9230237288328964, "learning_rate": 2.560717853211376e-06, "loss": 0.015849468111991883, "step": 64845 }, { "epoch": 0.6103529411764705, "grad_norm": 0.37272313227631937, "learning_rate": 2.5606191328296286e-06, "loss": 0.01989462673664093, "step": 64850 }, { "epoch": 0.6104, "grad_norm": 0.43003190264756536, "learning_rate": 2.560520423864559e-06, "loss": 0.01797814667224884, "step": 64855 }, { "epoch": 0.6104470588235295, "grad_norm": 0.5675277673289095, "learning_rate": 2.5604217263139645e-06, "loss": 0.01723438799381256, "step": 64860 }, { "epoch": 0.6104941176470589, "grad_norm": 0.3772664252678041, "learning_rate": 2.5603230401756467e-06, "loss": 0.01645122468471527, "step": 64865 }, { "epoch": 0.6105411764705883, "grad_norm": 0.43046377713257167, "learning_rate": 2.560224365447407e-06, "loss": 0.0165378600358963, "step": 64870 }, { "epoch": 0.6105882352941177, "grad_norm": 0.5526699659031156, "learning_rate": 2.560125702127045e-06, "loss": 0.016353990137577056, "step": 64875 }, { "epoch": 0.610635294117647, "grad_norm": 0.5085480671741236, "learning_rate": 2.560027050212364e-06, "loss": 0.01637265533208847, "step": 64880 }, { "epoch": 0.6106823529411765, "grad_norm": 0.31101849019854616, "learning_rate": 2.559928409701166e-06, "loss": 0.016675955057144164, "step": 64885 }, { "epoch": 0.6107294117647059, "grad_norm": 0.5553707597032818, "learning_rate": 2.5598297805912557e-06, "loss": 0.016613924503326417, "step": 64890 }, { "epoch": 0.6107764705882353, "grad_norm": 0.5101714717544084, "learning_rate": 2.559731162880435e-06, "loss": 0.017160555720329283, "step": 64895 }, { "epoch": 0.6108235294117647, "grad_norm": 0.46285646760011484, "learning_rate": 2.559632556566509e-06, "loss": 0.017795464396476744, "step": 64900 }, { "epoch": 0.6108705882352942, "grad_norm": 0.3659902369859966, "learning_rate": 2.559533961647283e-06, "loss": 0.015525391697883606, "step": 64905 }, { "epoch": 0.6109176470588236, "grad_norm": 0.6990335302367525, "learning_rate": 2.5594353781205627e-06, "loss": 0.019074280560016633, "step": 64910 }, { "epoch": 0.610964705882353, "grad_norm": 0.5365130435584766, "learning_rate": 2.559336805984154e-06, "loss": 0.017890727519989012, "step": 64915 }, { "epoch": 0.6110117647058824, "grad_norm": 0.6472932790563951, "learning_rate": 2.5592382452358632e-06, "loss": 0.019344156980514525, "step": 64920 }, { "epoch": 0.6110588235294118, "grad_norm": 0.6177401383216621, "learning_rate": 2.559139695873499e-06, "loss": 0.01433870792388916, "step": 64925 }, { "epoch": 0.6111058823529412, "grad_norm": 0.47660891979033626, "learning_rate": 2.5590411578948675e-06, "loss": 0.01711103320121765, "step": 64930 }, { "epoch": 0.6111529411764706, "grad_norm": 0.5879146094413479, "learning_rate": 2.558942631297779e-06, "loss": 0.016315284371376037, "step": 64935 }, { "epoch": 0.6112, "grad_norm": 0.6333784764975176, "learning_rate": 2.558844116080042e-06, "loss": 0.020784682035446166, "step": 64940 }, { "epoch": 0.6112470588235294, "grad_norm": 0.6915908299530648, "learning_rate": 2.558745612239466e-06, "loss": 0.015368270874023437, "step": 64945 }, { "epoch": 0.6112941176470589, "grad_norm": 0.4371181699420628, "learning_rate": 2.558647119773861e-06, "loss": 0.013303905725479126, "step": 64950 }, { "epoch": 0.6113411764705883, "grad_norm": 0.46267303209453897, "learning_rate": 2.5585486386810392e-06, "loss": 0.01757299304008484, "step": 64955 }, { "epoch": 0.6113882352941177, "grad_norm": 0.48427109296192433, "learning_rate": 2.558450168958811e-06, "loss": 0.01790708601474762, "step": 64960 }, { "epoch": 0.6114352941176471, "grad_norm": 0.4292905575839931, "learning_rate": 2.558351710604989e-06, "loss": 0.012287969887256622, "step": 64965 }, { "epoch": 0.6114823529411765, "grad_norm": 0.5317095337720785, "learning_rate": 2.558253263617385e-06, "loss": 0.02022969126701355, "step": 64970 }, { "epoch": 0.6115294117647059, "grad_norm": 0.5236128638846965, "learning_rate": 2.5581548279938132e-06, "loss": 0.01866513192653656, "step": 64975 }, { "epoch": 0.6115764705882353, "grad_norm": 0.5571241779348652, "learning_rate": 2.5580564037320877e-06, "loss": 0.019830521941184998, "step": 64980 }, { "epoch": 0.6116235294117647, "grad_norm": 0.3578565918860726, "learning_rate": 2.557957990830022e-06, "loss": 0.018188945949077606, "step": 64985 }, { "epoch": 0.6116705882352941, "grad_norm": 0.45204020348436164, "learning_rate": 2.557859589285432e-06, "loss": 0.015196684002876281, "step": 64990 }, { "epoch": 0.6117176470588235, "grad_norm": 0.5926227675383581, "learning_rate": 2.5577611990961326e-06, "loss": 0.014633280038833619, "step": 64995 }, { "epoch": 0.611764705882353, "grad_norm": 0.5400338122214409, "learning_rate": 2.5576628202599408e-06, "loss": 0.018560631573200224, "step": 65000 }, { "epoch": 0.6118117647058824, "grad_norm": 0.5202313486210817, "learning_rate": 2.5575644527746723e-06, "loss": 0.015164059400558472, "step": 65005 }, { "epoch": 0.6118588235294118, "grad_norm": 0.5565764338685185, "learning_rate": 2.557466096638146e-06, "loss": 0.014729940891265869, "step": 65010 }, { "epoch": 0.6119058823529412, "grad_norm": 0.3952475232402788, "learning_rate": 2.557367751848178e-06, "loss": 0.01641945093870163, "step": 65015 }, { "epoch": 0.6119529411764706, "grad_norm": 0.638219403887324, "learning_rate": 2.5572694184025887e-06, "loss": 0.01796276867389679, "step": 65020 }, { "epoch": 0.612, "grad_norm": 0.5743000997898797, "learning_rate": 2.557171096299196e-06, "loss": 0.01420891135931015, "step": 65025 }, { "epoch": 0.6120470588235294, "grad_norm": 0.4934119530947353, "learning_rate": 2.5570727855358202e-06, "loss": 0.024027509987354277, "step": 65030 }, { "epoch": 0.6120941176470588, "grad_norm": 0.5609570890108848, "learning_rate": 2.5569744861102814e-06, "loss": 0.01878254860639572, "step": 65035 }, { "epoch": 0.6121411764705882, "grad_norm": 1.2098382159613248, "learning_rate": 2.556876198020401e-06, "loss": 0.014103737473487855, "step": 65040 }, { "epoch": 0.6121882352941177, "grad_norm": 0.560313238848802, "learning_rate": 2.556777921264e-06, "loss": 0.01630622148513794, "step": 65045 }, { "epoch": 0.6122352941176471, "grad_norm": 0.5975696917055577, "learning_rate": 2.5566796558389e-06, "loss": 0.01928735673427582, "step": 65050 }, { "epoch": 0.6122823529411765, "grad_norm": 0.42749564062230844, "learning_rate": 2.5565814017429245e-06, "loss": 0.014106030762195586, "step": 65055 }, { "epoch": 0.6123294117647059, "grad_norm": 0.4965912261973852, "learning_rate": 2.556483158973897e-06, "loss": 0.016596004366874695, "step": 65060 }, { "epoch": 0.6123764705882353, "grad_norm": 0.5642771547035847, "learning_rate": 2.556384927529641e-06, "loss": 0.017262327671051025, "step": 65065 }, { "epoch": 0.6124235294117647, "grad_norm": 0.6893544041708556, "learning_rate": 2.55628670740798e-06, "loss": 0.01779761016368866, "step": 65070 }, { "epoch": 0.6124705882352941, "grad_norm": 0.5360328174235911, "learning_rate": 2.5561884986067405e-06, "loss": 0.019025498628616334, "step": 65075 }, { "epoch": 0.6125176470588235, "grad_norm": 0.6855818937228642, "learning_rate": 2.556090301123747e-06, "loss": 0.017191168665885926, "step": 65080 }, { "epoch": 0.6125647058823529, "grad_norm": 0.3592987353404119, "learning_rate": 2.5559921149568264e-06, "loss": 0.018557581305503845, "step": 65085 }, { "epoch": 0.6126117647058823, "grad_norm": 0.5759623927455094, "learning_rate": 2.5558939401038047e-06, "loss": 0.015539278090000153, "step": 65090 }, { "epoch": 0.6126588235294118, "grad_norm": 0.4909465370572337, "learning_rate": 2.5557957765625096e-06, "loss": 0.01790025532245636, "step": 65095 }, { "epoch": 0.6127058823529412, "grad_norm": 0.3676334153735526, "learning_rate": 2.5556976243307695e-06, "loss": 0.016160787642002107, "step": 65100 }, { "epoch": 0.6127529411764706, "grad_norm": 0.6905249912253657, "learning_rate": 2.555599483406413e-06, "loss": 0.01859355568885803, "step": 65105 }, { "epoch": 0.6128, "grad_norm": 0.5297583545375192, "learning_rate": 2.5555013537872676e-06, "loss": 0.016485737264156343, "step": 65110 }, { "epoch": 0.6128470588235294, "grad_norm": 0.34502752624629257, "learning_rate": 2.555403235471165e-06, "loss": 0.011413124948740005, "step": 65115 }, { "epoch": 0.6128941176470588, "grad_norm": 0.5841347434906801, "learning_rate": 2.5553051284559344e-06, "loss": 0.018113650381565094, "step": 65120 }, { "epoch": 0.6129411764705882, "grad_norm": 0.6313793976695925, "learning_rate": 2.555207032739407e-06, "loss": 0.016961225867271425, "step": 65125 }, { "epoch": 0.6129882352941176, "grad_norm": 0.3662055199237406, "learning_rate": 2.555108948319414e-06, "loss": 0.017624101042747496, "step": 65130 }, { "epoch": 0.613035294117647, "grad_norm": 0.4854308601644559, "learning_rate": 2.5550108751937874e-06, "loss": 0.016775447130203246, "step": 65135 }, { "epoch": 0.6130823529411765, "grad_norm": 0.6790592201571322, "learning_rate": 2.55491281336036e-06, "loss": 0.018377809226512908, "step": 65140 }, { "epoch": 0.6131294117647059, "grad_norm": 0.3413030547228489, "learning_rate": 2.554814762816965e-06, "loss": 0.016517266631126404, "step": 65145 }, { "epoch": 0.6131764705882353, "grad_norm": 0.6206365279602587, "learning_rate": 2.5547167235614355e-06, "loss": 0.01981617510318756, "step": 65150 }, { "epoch": 0.6132235294117647, "grad_norm": 0.34733909294558907, "learning_rate": 2.5546186955916074e-06, "loss": 0.01522301733493805, "step": 65155 }, { "epoch": 0.6132705882352941, "grad_norm": 0.380286474625993, "learning_rate": 2.554520678905314e-06, "loss": 0.0157832533121109, "step": 65160 }, { "epoch": 0.6133176470588235, "grad_norm": 0.43657485520236156, "learning_rate": 2.554422673500391e-06, "loss": 0.016503241658210755, "step": 65165 }, { "epoch": 0.6133647058823529, "grad_norm": 0.46697434567885665, "learning_rate": 2.554324679374676e-06, "loss": 0.016522791981697083, "step": 65170 }, { "epoch": 0.6134117647058823, "grad_norm": 0.38214218222408414, "learning_rate": 2.5542266965260035e-06, "loss": 0.01588185429573059, "step": 65175 }, { "epoch": 0.6134588235294117, "grad_norm": 0.47401715908305164, "learning_rate": 2.5541287249522127e-06, "loss": 0.014923597872257232, "step": 65180 }, { "epoch": 0.6135058823529411, "grad_norm": 0.3996047549050904, "learning_rate": 2.5540307646511403e-06, "loss": 0.011518833041191102, "step": 65185 }, { "epoch": 0.6135529411764706, "grad_norm": 0.546052109012489, "learning_rate": 2.5539328156206252e-06, "loss": 0.017242881655693054, "step": 65190 }, { "epoch": 0.6136, "grad_norm": 0.5552620369990738, "learning_rate": 2.5538348778585054e-06, "loss": 0.018398062884807588, "step": 65195 }, { "epoch": 0.6136470588235294, "grad_norm": 0.3671461673782224, "learning_rate": 2.553736951362622e-06, "loss": 0.01728512942790985, "step": 65200 }, { "epoch": 0.6136941176470588, "grad_norm": 0.6059149576305163, "learning_rate": 2.5536390361308142e-06, "loss": 0.015715083479881285, "step": 65205 }, { "epoch": 0.6137411764705882, "grad_norm": 0.695297642501993, "learning_rate": 2.5535411321609235e-06, "loss": 0.014845651388168336, "step": 65210 }, { "epoch": 0.6137882352941176, "grad_norm": 0.3094949610831417, "learning_rate": 2.5534432394507895e-06, "loss": 0.012414437532424927, "step": 65215 }, { "epoch": 0.613835294117647, "grad_norm": 0.476665621391809, "learning_rate": 2.5533453579982566e-06, "loss": 0.013284213840961456, "step": 65220 }, { "epoch": 0.6138823529411764, "grad_norm": 0.400728728273384, "learning_rate": 2.553247487801165e-06, "loss": 0.016171765327453614, "step": 65225 }, { "epoch": 0.6139294117647058, "grad_norm": 0.5107792145158585, "learning_rate": 2.553149628857359e-06, "loss": 0.01228383556008339, "step": 65230 }, { "epoch": 0.6139764705882353, "grad_norm": 0.4416788546427266, "learning_rate": 2.5530517811646815e-06, "loss": 0.01930519938468933, "step": 65235 }, { "epoch": 0.6140235294117647, "grad_norm": 0.35468218911144367, "learning_rate": 2.5529539447209773e-06, "loss": 0.01689040958881378, "step": 65240 }, { "epoch": 0.6140705882352941, "grad_norm": 0.5735570558844443, "learning_rate": 2.5528561195240914e-06, "loss": 0.01730615645647049, "step": 65245 }, { "epoch": 0.6141176470588235, "grad_norm": 0.39953861539826474, "learning_rate": 2.5527583055718686e-06, "loss": 0.015227331221103669, "step": 65250 }, { "epoch": 0.614164705882353, "grad_norm": 0.46278276475196645, "learning_rate": 2.552660502862155e-06, "loss": 0.01381896436214447, "step": 65255 }, { "epoch": 0.6142117647058823, "grad_norm": 0.5673437380417701, "learning_rate": 2.5525627113927966e-06, "loss": 0.016260087490081787, "step": 65260 }, { "epoch": 0.6142588235294117, "grad_norm": 0.4438540034862079, "learning_rate": 2.552464931161642e-06, "loss": 0.015249554812908173, "step": 65265 }, { "epoch": 0.6143058823529411, "grad_norm": 0.45151840092559203, "learning_rate": 2.552367162166537e-06, "loss": 0.020931935310363768, "step": 65270 }, { "epoch": 0.6143529411764705, "grad_norm": 0.6072759255680942, "learning_rate": 2.5522694044053308e-06, "loss": 0.016009482741355895, "step": 65275 }, { "epoch": 0.6144, "grad_norm": 0.39285595821653274, "learning_rate": 2.5521716578758726e-06, "loss": 0.015487265586853028, "step": 65280 }, { "epoch": 0.6144470588235295, "grad_norm": 0.5180958893861396, "learning_rate": 2.552073922576011e-06, "loss": 0.015895040333271028, "step": 65285 }, { "epoch": 0.6144941176470589, "grad_norm": 0.46800701395792504, "learning_rate": 2.5519761985035964e-06, "loss": 0.030077356100082397, "step": 65290 }, { "epoch": 0.6145411764705883, "grad_norm": 0.6953382715662272, "learning_rate": 2.55187848565648e-06, "loss": 0.01633552312850952, "step": 65295 }, { "epoch": 0.6145882352941177, "grad_norm": 1.3560271670843969, "learning_rate": 2.5517807840325117e-06, "loss": 0.013724225759506225, "step": 65300 }, { "epoch": 0.6146352941176471, "grad_norm": 0.6248414200055624, "learning_rate": 2.5516830936295433e-06, "loss": 0.02061968147754669, "step": 65305 }, { "epoch": 0.6146823529411765, "grad_norm": 0.5349350841181891, "learning_rate": 2.5515854144454276e-06, "loss": 0.01587357521057129, "step": 65310 }, { "epoch": 0.6147294117647059, "grad_norm": 0.5672680312473836, "learning_rate": 2.551487746478018e-06, "loss": 0.024092310667037965, "step": 65315 }, { "epoch": 0.6147764705882353, "grad_norm": 0.7580053780907118, "learning_rate": 2.551390089725167e-06, "loss": 0.01616069972515106, "step": 65320 }, { "epoch": 0.6148235294117647, "grad_norm": 0.42558092033482847, "learning_rate": 2.551292444184729e-06, "loss": 0.014132045209407806, "step": 65325 }, { "epoch": 0.6148705882352942, "grad_norm": 0.4832927670910361, "learning_rate": 2.5511948098545588e-06, "loss": 0.017077034711837767, "step": 65330 }, { "epoch": 0.6149176470588236, "grad_norm": 0.37428727550403645, "learning_rate": 2.55109718673251e-06, "loss": 0.015206056833267211, "step": 65335 }, { "epoch": 0.614964705882353, "grad_norm": 0.3620700232321232, "learning_rate": 2.5509995748164412e-06, "loss": 0.016543495655059814, "step": 65340 }, { "epoch": 0.6150117647058824, "grad_norm": 0.44093717411825234, "learning_rate": 2.5509019741042064e-06, "loss": 0.01637014150619507, "step": 65345 }, { "epoch": 0.6150588235294118, "grad_norm": 0.2950226750750709, "learning_rate": 2.550804384593664e-06, "loss": 0.011947902292013169, "step": 65350 }, { "epoch": 0.6151058823529412, "grad_norm": 0.5844483261106479, "learning_rate": 2.5507068062826696e-06, "loss": 0.017599967122077943, "step": 65355 }, { "epoch": 0.6151529411764706, "grad_norm": 0.8486957537429218, "learning_rate": 2.550609239169082e-06, "loss": 0.01380203664302826, "step": 65360 }, { "epoch": 0.6152, "grad_norm": 0.6523971846090172, "learning_rate": 2.550511683250761e-06, "loss": 0.01601538062095642, "step": 65365 }, { "epoch": 0.6152470588235294, "grad_norm": 0.3265990921831062, "learning_rate": 2.5504141385255645e-06, "loss": 0.011703889071941375, "step": 65370 }, { "epoch": 0.6152941176470588, "grad_norm": 0.571789796390745, "learning_rate": 2.550316604991353e-06, "loss": 0.01358097791671753, "step": 65375 }, { "epoch": 0.6153411764705883, "grad_norm": 0.5072715620857378, "learning_rate": 2.550219082645986e-06, "loss": 0.015812303125858306, "step": 65380 }, { "epoch": 0.6153882352941177, "grad_norm": 1.0583533834119656, "learning_rate": 2.550121571487325e-06, "loss": 0.01721721887588501, "step": 65385 }, { "epoch": 0.6154352941176471, "grad_norm": 0.5084248431649252, "learning_rate": 2.550024071513231e-06, "loss": 0.017610909044742586, "step": 65390 }, { "epoch": 0.6154823529411765, "grad_norm": 0.5276490560513389, "learning_rate": 2.549926582721567e-06, "loss": 0.017270827293395997, "step": 65395 }, { "epoch": 0.6155294117647059, "grad_norm": 0.37058925922251384, "learning_rate": 2.549829105110194e-06, "loss": 0.013091447949409484, "step": 65400 }, { "epoch": 0.6155764705882353, "grad_norm": 0.6427267129151842, "learning_rate": 2.549731638676977e-06, "loss": 0.01930176615715027, "step": 65405 }, { "epoch": 0.6156235294117647, "grad_norm": 0.5740511162430186, "learning_rate": 2.5496341834197787e-06, "loss": 0.018245872855186463, "step": 65410 }, { "epoch": 0.6156705882352941, "grad_norm": 0.5988538347516416, "learning_rate": 2.5495367393364634e-06, "loss": 0.01795153468847275, "step": 65415 }, { "epoch": 0.6157176470588235, "grad_norm": 0.3386076765341883, "learning_rate": 2.549439306424896e-06, "loss": 0.013174837827682495, "step": 65420 }, { "epoch": 0.615764705882353, "grad_norm": 0.32069818029378133, "learning_rate": 2.5493418846829425e-06, "loss": 0.016153249144554137, "step": 65425 }, { "epoch": 0.6158117647058824, "grad_norm": 0.5668423777533299, "learning_rate": 2.5492444741084688e-06, "loss": 0.01565893888473511, "step": 65430 }, { "epoch": 0.6158588235294118, "grad_norm": 0.3662945170211083, "learning_rate": 2.5491470746993415e-06, "loss": 0.01673460900783539, "step": 65435 }, { "epoch": 0.6159058823529412, "grad_norm": 0.5440113867572027, "learning_rate": 2.549049686453427e-06, "loss": 0.015367984771728516, "step": 65440 }, { "epoch": 0.6159529411764706, "grad_norm": 0.3759926844067592, "learning_rate": 2.548952309368594e-06, "loss": 0.013482959568500518, "step": 65445 }, { "epoch": 0.616, "grad_norm": 0.5450953479966483, "learning_rate": 2.5488549434427102e-06, "loss": 0.014812669157981873, "step": 65450 }, { "epoch": 0.6160470588235294, "grad_norm": 0.30698074282870724, "learning_rate": 2.5487575886736452e-06, "loss": 0.01320883333683014, "step": 65455 }, { "epoch": 0.6160941176470588, "grad_norm": 0.5791384441622431, "learning_rate": 2.548660245059268e-06, "loss": 0.017330075800418853, "step": 65460 }, { "epoch": 0.6161411764705882, "grad_norm": 0.5022006249900505, "learning_rate": 2.548562912597448e-06, "loss": 0.017603379487991334, "step": 65465 }, { "epoch": 0.6161882352941176, "grad_norm": 0.5970959182881371, "learning_rate": 2.5484655912860575e-06, "loss": 0.015323933959007264, "step": 65470 }, { "epoch": 0.6162352941176471, "grad_norm": 0.4405760355837982, "learning_rate": 2.548368281122966e-06, "loss": 0.015361833572387695, "step": 65475 }, { "epoch": 0.6162823529411765, "grad_norm": 0.6264417373540905, "learning_rate": 2.548270982106046e-06, "loss": 0.01794569343328476, "step": 65480 }, { "epoch": 0.6163294117647059, "grad_norm": 0.4813282749758973, "learning_rate": 2.5481736942331705e-06, "loss": 0.014015139639377594, "step": 65485 }, { "epoch": 0.6163764705882353, "grad_norm": 0.7949908907375862, "learning_rate": 2.5480764175022106e-06, "loss": 0.017295679450035094, "step": 65490 }, { "epoch": 0.6164235294117647, "grad_norm": 0.4826249910563289, "learning_rate": 2.547979151911041e-06, "loss": 0.013566781580448151, "step": 65495 }, { "epoch": 0.6164705882352941, "grad_norm": 0.5318547469007076, "learning_rate": 2.5478818974575355e-06, "loss": 0.017016905546188354, "step": 65500 }, { "epoch": 0.6165176470588235, "grad_norm": 0.563200983995867, "learning_rate": 2.5477846541395677e-06, "loss": 0.015859027206897736, "step": 65505 }, { "epoch": 0.6165647058823529, "grad_norm": 0.5047143863979576, "learning_rate": 2.5476874219550146e-06, "loss": 0.017051641643047333, "step": 65510 }, { "epoch": 0.6166117647058823, "grad_norm": 0.47372213248610145, "learning_rate": 2.5475902009017504e-06, "loss": 0.0190928652882576, "step": 65515 }, { "epoch": 0.6166588235294118, "grad_norm": 0.7968037763344135, "learning_rate": 2.547492990977653e-06, "loss": 0.015049508213996888, "step": 65520 }, { "epoch": 0.6167058823529412, "grad_norm": 0.49034343136465963, "learning_rate": 2.5473957921805975e-06, "loss": 0.016735684871673585, "step": 65525 }, { "epoch": 0.6167529411764706, "grad_norm": 0.5051107517630637, "learning_rate": 2.5472986045084618e-06, "loss": 0.013597492873668671, "step": 65530 }, { "epoch": 0.6168, "grad_norm": 0.4404884269117513, "learning_rate": 2.5472014279591243e-06, "loss": 0.024381503462791443, "step": 65535 }, { "epoch": 0.6168470588235294, "grad_norm": 0.7343209201825583, "learning_rate": 2.5471042625304643e-06, "loss": 0.017586052417755127, "step": 65540 }, { "epoch": 0.6168941176470588, "grad_norm": 0.35654466340419444, "learning_rate": 2.547007108220359e-06, "loss": 0.017102792859077454, "step": 65545 }, { "epoch": 0.6169411764705882, "grad_norm": 0.4281393570466976, "learning_rate": 2.546909965026689e-06, "loss": 0.017373019456863405, "step": 65550 }, { "epoch": 0.6169882352941176, "grad_norm": 0.34999221206377756, "learning_rate": 2.5468128329473346e-06, "loss": 0.01503591239452362, "step": 65555 }, { "epoch": 0.617035294117647, "grad_norm": 0.4753764854504289, "learning_rate": 2.5467157119801772e-06, "loss": 0.018249650299549103, "step": 65560 }, { "epoch": 0.6170823529411764, "grad_norm": 0.5256678439844975, "learning_rate": 2.546618602123098e-06, "loss": 0.013581393659114838, "step": 65565 }, { "epoch": 0.6171294117647059, "grad_norm": 0.8859677493817523, "learning_rate": 2.5465215033739776e-06, "loss": 0.020573508739471436, "step": 65570 }, { "epoch": 0.6171764705882353, "grad_norm": 0.513490657287366, "learning_rate": 2.5464244157306994e-06, "loss": 0.016044673323631287, "step": 65575 }, { "epoch": 0.6172235294117647, "grad_norm": 0.7041553515327147, "learning_rate": 2.5463273391911464e-06, "loss": 0.025244298577308654, "step": 65580 }, { "epoch": 0.6172705882352941, "grad_norm": 0.7502480247395953, "learning_rate": 2.546230273753203e-06, "loss": 0.019884970784187318, "step": 65585 }, { "epoch": 0.6173176470588235, "grad_norm": 0.6777415860707423, "learning_rate": 2.546133219414753e-06, "loss": 0.02059006839990616, "step": 65590 }, { "epoch": 0.6173647058823529, "grad_norm": 0.4798057469327556, "learning_rate": 2.5460361761736805e-06, "loss": 0.017673027515411378, "step": 65595 }, { "epoch": 0.6174117647058823, "grad_norm": 0.32726487661711984, "learning_rate": 2.5459391440278714e-06, "loss": 0.015544813871383668, "step": 65600 }, { "epoch": 0.6174588235294117, "grad_norm": 0.3598494396058873, "learning_rate": 2.545842122975212e-06, "loss": 0.015162280201911927, "step": 65605 }, { "epoch": 0.6175058823529411, "grad_norm": 0.6639881408103362, "learning_rate": 2.5457451130135874e-06, "loss": 0.01759406179189682, "step": 65610 }, { "epoch": 0.6175529411764706, "grad_norm": 0.45241271785577647, "learning_rate": 2.5456481141408862e-06, "loss": 0.021759256720542908, "step": 65615 }, { "epoch": 0.6176, "grad_norm": 0.3843294148368552, "learning_rate": 2.5455511263549947e-06, "loss": 0.016736392676830292, "step": 65620 }, { "epoch": 0.6176470588235294, "grad_norm": 0.43870441947587796, "learning_rate": 2.5454541496538023e-06, "loss": 0.016209155321121216, "step": 65625 }, { "epoch": 0.6176941176470588, "grad_norm": 0.37294848453682916, "learning_rate": 2.5453571840351967e-06, "loss": 0.011914515495300293, "step": 65630 }, { "epoch": 0.6177411764705882, "grad_norm": 0.7638377746444208, "learning_rate": 2.5452602294970674e-06, "loss": 0.015637937188148498, "step": 65635 }, { "epoch": 0.6177882352941176, "grad_norm": 0.5321196324714089, "learning_rate": 2.545163286037305e-06, "loss": 0.01550973653793335, "step": 65640 }, { "epoch": 0.617835294117647, "grad_norm": 0.43342489525466865, "learning_rate": 2.5450663536537983e-06, "loss": 0.014041051268577576, "step": 65645 }, { "epoch": 0.6178823529411764, "grad_norm": 0.38627371719617637, "learning_rate": 2.5449694323444394e-06, "loss": 0.014211073517799377, "step": 65650 }, { "epoch": 0.6179294117647058, "grad_norm": 0.33044663632813753, "learning_rate": 2.544872522107121e-06, "loss": 0.011141200363636018, "step": 65655 }, { "epoch": 0.6179764705882353, "grad_norm": 0.6030126048869211, "learning_rate": 2.5447756229397323e-06, "loss": 0.014649595320224761, "step": 65660 }, { "epoch": 0.6180235294117647, "grad_norm": 0.5712124530916433, "learning_rate": 2.5446787348401687e-06, "loss": 0.019252124428749084, "step": 65665 }, { "epoch": 0.6180705882352941, "grad_norm": 0.5736449164988405, "learning_rate": 2.5445818578063215e-06, "loss": 0.019932860136032106, "step": 65670 }, { "epoch": 0.6181176470588235, "grad_norm": 0.5476337956514997, "learning_rate": 2.5444849918360853e-06, "loss": 0.01563110053539276, "step": 65675 }, { "epoch": 0.618164705882353, "grad_norm": 0.4057153385976287, "learning_rate": 2.5443881369273544e-06, "loss": 0.016718924045562744, "step": 65680 }, { "epoch": 0.6182117647058823, "grad_norm": 0.3954588961890296, "learning_rate": 2.544291293078023e-06, "loss": 0.015054731070995331, "step": 65685 }, { "epoch": 0.6182588235294117, "grad_norm": 0.48826225434141113, "learning_rate": 2.5441944602859888e-06, "loss": 0.016044774651527406, "step": 65690 }, { "epoch": 0.6183058823529411, "grad_norm": 0.5950708987772249, "learning_rate": 2.544097638549145e-06, "loss": 0.024238398671150206, "step": 65695 }, { "epoch": 0.6183529411764705, "grad_norm": 0.5421648787002841, "learning_rate": 2.5440008278653905e-06, "loss": 0.014097565412521362, "step": 65700 }, { "epoch": 0.6184, "grad_norm": 0.6966154214802068, "learning_rate": 2.5439040282326203e-06, "loss": 0.013442693650722504, "step": 65705 }, { "epoch": 0.6184470588235295, "grad_norm": 0.48894362207191094, "learning_rate": 2.5438072396487334e-06, "loss": 0.016079455614089966, "step": 65710 }, { "epoch": 0.6184941176470589, "grad_norm": 0.4112718124794781, "learning_rate": 2.5437104621116283e-06, "loss": 0.015534704923629761, "step": 65715 }, { "epoch": 0.6185411764705883, "grad_norm": 0.5936685873995777, "learning_rate": 2.543613695619203e-06, "loss": 0.015433433651924133, "step": 65720 }, { "epoch": 0.6185882352941177, "grad_norm": 0.4336085642734552, "learning_rate": 2.543516940169357e-06, "loss": 0.01365075707435608, "step": 65725 }, { "epoch": 0.6186352941176471, "grad_norm": 0.5511774628532448, "learning_rate": 2.5434201957599907e-06, "loss": 0.016906234622001647, "step": 65730 }, { "epoch": 0.6186823529411765, "grad_norm": 0.40573955897322467, "learning_rate": 2.5433234623890046e-06, "loss": 0.01221800222992897, "step": 65735 }, { "epoch": 0.6187294117647059, "grad_norm": 0.298105940238823, "learning_rate": 2.5432267400543e-06, "loss": 0.014847034215927124, "step": 65740 }, { "epoch": 0.6187764705882353, "grad_norm": 0.7844839635436743, "learning_rate": 2.543130028753777e-06, "loss": 0.018328115344047546, "step": 65745 }, { "epoch": 0.6188235294117647, "grad_norm": 0.5234344515620227, "learning_rate": 2.5430333284853387e-06, "loss": 0.013919469714164735, "step": 65750 }, { "epoch": 0.6188705882352942, "grad_norm": 0.37520527504697915, "learning_rate": 2.5429366392468884e-06, "loss": 0.015853261947631835, "step": 65755 }, { "epoch": 0.6189176470588236, "grad_norm": 0.5343522824151106, "learning_rate": 2.5428399610363293e-06, "loss": 0.011136825382709502, "step": 65760 }, { "epoch": 0.618964705882353, "grad_norm": 0.5507228469932935, "learning_rate": 2.5427432938515646e-06, "loss": 0.016985464096069335, "step": 65765 }, { "epoch": 0.6190117647058824, "grad_norm": 0.5687219168857215, "learning_rate": 2.542646637690499e-06, "loss": 0.0183760404586792, "step": 65770 }, { "epoch": 0.6190588235294118, "grad_norm": 0.4427077144112434, "learning_rate": 2.542549992551037e-06, "loss": 0.01527612805366516, "step": 65775 }, { "epoch": 0.6191058823529412, "grad_norm": 0.5761203936279296, "learning_rate": 2.542453358431085e-06, "loss": 0.01608867198228836, "step": 65780 }, { "epoch": 0.6191529411764706, "grad_norm": 0.7000654318167322, "learning_rate": 2.5423567353285487e-06, "loss": 0.015847326815128328, "step": 65785 }, { "epoch": 0.6192, "grad_norm": 0.5361428397313064, "learning_rate": 2.5422601232413346e-06, "loss": 0.01586977541446686, "step": 65790 }, { "epoch": 0.6192470588235294, "grad_norm": 0.448021371715167, "learning_rate": 2.54216352216735e-06, "loss": 0.017272990942001343, "step": 65795 }, { "epoch": 0.6192941176470588, "grad_norm": 0.42946651302607614, "learning_rate": 2.542066932104503e-06, "loss": 0.013338595628738403, "step": 65800 }, { "epoch": 0.6193411764705883, "grad_norm": 0.6698230572540885, "learning_rate": 2.541970353050701e-06, "loss": 0.017359903454780577, "step": 65805 }, { "epoch": 0.6193882352941177, "grad_norm": 0.6476506107991856, "learning_rate": 2.541873785003854e-06, "loss": 0.019113197922706604, "step": 65810 }, { "epoch": 0.6194352941176471, "grad_norm": 0.5170332814765379, "learning_rate": 2.5417772279618703e-06, "loss": 0.016835804283618926, "step": 65815 }, { "epoch": 0.6194823529411765, "grad_norm": 0.44678062391331536, "learning_rate": 2.541680681922661e-06, "loss": 0.014500609040260315, "step": 65820 }, { "epoch": 0.6195294117647059, "grad_norm": 0.43652277328228367, "learning_rate": 2.541584146884135e-06, "loss": 0.01415843516588211, "step": 65825 }, { "epoch": 0.6195764705882353, "grad_norm": 0.645542462378141, "learning_rate": 2.5414876228442055e-06, "loss": 0.014924699068069458, "step": 65830 }, { "epoch": 0.6196235294117647, "grad_norm": 0.4362666818781086, "learning_rate": 2.541391109800782e-06, "loss": 0.01574133336544037, "step": 65835 }, { "epoch": 0.6196705882352941, "grad_norm": 0.4650838942203561, "learning_rate": 2.5412946077517787e-06, "loss": 0.015168024599552155, "step": 65840 }, { "epoch": 0.6197176470588235, "grad_norm": 0.6098754213501786, "learning_rate": 2.5411981166951073e-06, "loss": 0.01707630753517151, "step": 65845 }, { "epoch": 0.619764705882353, "grad_norm": 0.529360604719901, "learning_rate": 2.5411016366286807e-06, "loss": 0.016974356770515443, "step": 65850 }, { "epoch": 0.6198117647058824, "grad_norm": 0.4395725123238266, "learning_rate": 2.541005167550413e-06, "loss": 0.016544008255004884, "step": 65855 }, { "epoch": 0.6198588235294118, "grad_norm": 0.4753583012197121, "learning_rate": 2.5409087094582197e-06, "loss": 0.016102889180183412, "step": 65860 }, { "epoch": 0.6199058823529412, "grad_norm": 0.544559061017768, "learning_rate": 2.540812262350015e-06, "loss": 0.01697804033756256, "step": 65865 }, { "epoch": 0.6199529411764706, "grad_norm": 0.3443904542736423, "learning_rate": 2.540715826223714e-06, "loss": 0.01378108412027359, "step": 65870 }, { "epoch": 0.62, "grad_norm": 0.480992739509494, "learning_rate": 2.5406194010772327e-06, "loss": 0.016276288032531738, "step": 65875 }, { "epoch": 0.6200470588235294, "grad_norm": 0.48519012329270056, "learning_rate": 2.5405229869084884e-06, "loss": 0.015276801586151124, "step": 65880 }, { "epoch": 0.6200941176470588, "grad_norm": 0.547515498964162, "learning_rate": 2.5404265837153983e-06, "loss": 0.016011661291122435, "step": 65885 }, { "epoch": 0.6201411764705882, "grad_norm": 0.4039171363477793, "learning_rate": 2.54033019149588e-06, "loss": 0.014705976843833924, "step": 65890 }, { "epoch": 0.6201882352941176, "grad_norm": 0.5163337862071857, "learning_rate": 2.540233810247852e-06, "loss": 0.01626308262348175, "step": 65895 }, { "epoch": 0.6202352941176471, "grad_norm": 0.9475794087807846, "learning_rate": 2.540137439969232e-06, "loss": 0.017504793405532838, "step": 65900 }, { "epoch": 0.6202823529411765, "grad_norm": 0.6878791863868466, "learning_rate": 2.5400410806579406e-06, "loss": 0.016559040546417235, "step": 65905 }, { "epoch": 0.6203294117647059, "grad_norm": 0.4078544981870701, "learning_rate": 2.5399447323118975e-06, "loss": 0.017705309391021728, "step": 65910 }, { "epoch": 0.6203764705882353, "grad_norm": 0.5352253890937653, "learning_rate": 2.539848394929023e-06, "loss": 0.014667558670043945, "step": 65915 }, { "epoch": 0.6204235294117647, "grad_norm": 0.5348138211925311, "learning_rate": 2.539752068507238e-06, "loss": 0.014829739928245544, "step": 65920 }, { "epoch": 0.6204705882352941, "grad_norm": 0.28517672057929383, "learning_rate": 2.539655753044465e-06, "loss": 0.019022235274314882, "step": 65925 }, { "epoch": 0.6205176470588235, "grad_norm": 0.6328986570274411, "learning_rate": 2.5395594485386245e-06, "loss": 0.015428707003593445, "step": 65930 }, { "epoch": 0.6205647058823529, "grad_norm": 0.8454719825457144, "learning_rate": 2.539463154987641e-06, "loss": 0.028480449318885805, "step": 65935 }, { "epoch": 0.6206117647058823, "grad_norm": 0.3516705127369667, "learning_rate": 2.5393668723894364e-06, "loss": 0.01691233515739441, "step": 65940 }, { "epoch": 0.6206588235294118, "grad_norm": 0.4875624454105965, "learning_rate": 2.5392706007419353e-06, "loss": 0.01601187288761139, "step": 65945 }, { "epoch": 0.6207058823529412, "grad_norm": 0.514115041220483, "learning_rate": 2.5391743400430617e-06, "loss": 0.01436741054058075, "step": 65950 }, { "epoch": 0.6207529411764706, "grad_norm": 0.49459027720993815, "learning_rate": 2.539078090290741e-06, "loss": 0.01792101263999939, "step": 65955 }, { "epoch": 0.6208, "grad_norm": 0.31598418892990554, "learning_rate": 2.538981851482898e-06, "loss": 0.015204642713069916, "step": 65960 }, { "epoch": 0.6208470588235294, "grad_norm": 0.6114887508521084, "learning_rate": 2.538885623617459e-06, "loss": 0.014544188976287842, "step": 65965 }, { "epoch": 0.6208941176470588, "grad_norm": 0.4827040235338265, "learning_rate": 2.538789406692351e-06, "loss": 0.02038324922323227, "step": 65970 }, { "epoch": 0.6209411764705882, "grad_norm": 0.612876547152998, "learning_rate": 2.5386932007055e-06, "loss": 0.016258206963539124, "step": 65975 }, { "epoch": 0.6209882352941176, "grad_norm": 0.1995104061746146, "learning_rate": 2.538597005654834e-06, "loss": 0.01655946224927902, "step": 65980 }, { "epoch": 0.621035294117647, "grad_norm": 0.7643627123633897, "learning_rate": 2.538500821538282e-06, "loss": 0.01854248344898224, "step": 65985 }, { "epoch": 0.6210823529411764, "grad_norm": 0.3131259553104604, "learning_rate": 2.5384046483537727e-06, "loss": 0.01214025616645813, "step": 65990 }, { "epoch": 0.6211294117647059, "grad_norm": 0.4842017660361819, "learning_rate": 2.5383084860992346e-06, "loss": 0.01455085575580597, "step": 65995 }, { "epoch": 0.6211764705882353, "grad_norm": 0.4220805908876137, "learning_rate": 2.5382123347725984e-06, "loss": 0.011667342483997345, "step": 66000 }, { "epoch": 0.6212235294117647, "grad_norm": 0.4317829111680537, "learning_rate": 2.5381161943717937e-06, "loss": 0.013943704962730407, "step": 66005 }, { "epoch": 0.6212705882352941, "grad_norm": 0.5478036941166082, "learning_rate": 2.538020064894752e-06, "loss": 0.01509668231010437, "step": 66010 }, { "epoch": 0.6213176470588235, "grad_norm": 0.4386147719414731, "learning_rate": 2.5379239463394045e-06, "loss": 0.02414577603340149, "step": 66015 }, { "epoch": 0.6213647058823529, "grad_norm": 0.6072863338846418, "learning_rate": 2.5378278387036833e-06, "loss": 0.023382434248924257, "step": 66020 }, { "epoch": 0.6214117647058823, "grad_norm": 0.6421104842404959, "learning_rate": 2.5377317419855213e-06, "loss": 0.013912263512611388, "step": 66025 }, { "epoch": 0.6214588235294117, "grad_norm": 0.5658780288973978, "learning_rate": 2.5376356561828514e-06, "loss": 0.01341080218553543, "step": 66030 }, { "epoch": 0.6215058823529411, "grad_norm": 0.4894356099476127, "learning_rate": 2.537539581293607e-06, "loss": 0.013917762041091918, "step": 66035 }, { "epoch": 0.6215529411764706, "grad_norm": 0.5886918834917586, "learning_rate": 2.5374435173157237e-06, "loss": 0.015108403563499451, "step": 66040 }, { "epoch": 0.6216, "grad_norm": 0.690636982345367, "learning_rate": 2.537347464247134e-06, "loss": 0.02300698459148407, "step": 66045 }, { "epoch": 0.6216470588235294, "grad_norm": 0.4939791453498226, "learning_rate": 2.5372514220857752e-06, "loss": 0.019462773203849794, "step": 66050 }, { "epoch": 0.6216941176470588, "grad_norm": 0.4893874401360793, "learning_rate": 2.5371553908295826e-06, "loss": 0.018401046097278596, "step": 66055 }, { "epoch": 0.6217411764705882, "grad_norm": 0.6427388088523185, "learning_rate": 2.537059370476493e-06, "loss": 0.01817273199558258, "step": 66060 }, { "epoch": 0.6217882352941176, "grad_norm": 0.6189571856886443, "learning_rate": 2.536963361024442e-06, "loss": 0.015976087749004365, "step": 66065 }, { "epoch": 0.621835294117647, "grad_norm": 0.45122960077777596, "learning_rate": 2.5368673624713685e-06, "loss": 0.016346293687820434, "step": 66070 }, { "epoch": 0.6218823529411764, "grad_norm": 0.6312803418954338, "learning_rate": 2.5367713748152096e-06, "loss": 0.019955366849899292, "step": 66075 }, { "epoch": 0.6219294117647058, "grad_norm": 0.29723813638038216, "learning_rate": 2.536675398053905e-06, "loss": 0.01647307872772217, "step": 66080 }, { "epoch": 0.6219764705882352, "grad_norm": 0.5096528995406335, "learning_rate": 2.536579432185393e-06, "loss": 0.01798984855413437, "step": 66085 }, { "epoch": 0.6220235294117648, "grad_norm": 0.702006217255845, "learning_rate": 2.5364834772076136e-06, "loss": 0.01705603301525116, "step": 66090 }, { "epoch": 0.6220705882352942, "grad_norm": 0.39207009941845555, "learning_rate": 2.536387533118507e-06, "loss": 0.015161079168319703, "step": 66095 }, { "epoch": 0.6221176470588236, "grad_norm": 0.48338425109393374, "learning_rate": 2.5362915999160143e-06, "loss": 0.01864439994096756, "step": 66100 }, { "epoch": 0.622164705882353, "grad_norm": 0.46057997156461106, "learning_rate": 2.5361956775980767e-06, "loss": 0.014341050386428833, "step": 66105 }, { "epoch": 0.6222117647058824, "grad_norm": 0.5302207218972473, "learning_rate": 2.5360997661626358e-06, "loss": 0.016950930655002593, "step": 66110 }, { "epoch": 0.6222588235294118, "grad_norm": 0.5402838086131698, "learning_rate": 2.5360038656076346e-06, "loss": 0.015424296259880066, "step": 66115 }, { "epoch": 0.6223058823529412, "grad_norm": 0.45570963926302344, "learning_rate": 2.535907975931015e-06, "loss": 0.017516085505485536, "step": 66120 }, { "epoch": 0.6223529411764706, "grad_norm": 0.5310385119244105, "learning_rate": 2.5358120971307223e-06, "loss": 0.0200737327337265, "step": 66125 }, { "epoch": 0.6224, "grad_norm": 0.3138755559859622, "learning_rate": 2.5357162292046987e-06, "loss": 0.021041491627693178, "step": 66130 }, { "epoch": 0.6224470588235295, "grad_norm": 0.3559249619286749, "learning_rate": 2.5356203721508897e-06, "loss": 0.016422614455223083, "step": 66135 }, { "epoch": 0.6224941176470589, "grad_norm": 0.3672941872303635, "learning_rate": 2.5355245259672405e-06, "loss": 0.014793238043785096, "step": 66140 }, { "epoch": 0.6225411764705883, "grad_norm": 0.5766662740104348, "learning_rate": 2.5354286906516974e-06, "loss": 0.014005526900291443, "step": 66145 }, { "epoch": 0.6225882352941177, "grad_norm": 0.6476066660226184, "learning_rate": 2.5353328662022047e-06, "loss": 0.016688786447048187, "step": 66150 }, { "epoch": 0.6226352941176471, "grad_norm": 0.5729384042789097, "learning_rate": 2.535237052616711e-06, "loss": 0.019487255811691286, "step": 66155 }, { "epoch": 0.6226823529411765, "grad_norm": 0.33896431606638605, "learning_rate": 2.5351412498931633e-06, "loss": 0.01705017238855362, "step": 66160 }, { "epoch": 0.6227294117647059, "grad_norm": 0.3539448366232543, "learning_rate": 2.5350454580295087e-06, "loss": 0.01307525932788849, "step": 66165 }, { "epoch": 0.6227764705882353, "grad_norm": 0.49012613707795866, "learning_rate": 2.534949677023697e-06, "loss": 0.014104737341403962, "step": 66170 }, { "epoch": 0.6228235294117647, "grad_norm": 0.6568459363458309, "learning_rate": 2.534853906873675e-06, "loss": 0.018185046315193177, "step": 66175 }, { "epoch": 0.6228705882352941, "grad_norm": 0.5487258063434749, "learning_rate": 2.5347581475773946e-06, "loss": 0.01919034719467163, "step": 66180 }, { "epoch": 0.6229176470588236, "grad_norm": 0.6276742522404385, "learning_rate": 2.534662399132804e-06, "loss": 0.017257732152938843, "step": 66185 }, { "epoch": 0.622964705882353, "grad_norm": 0.4876624322524747, "learning_rate": 2.534566661537855e-06, "loss": 0.014853914082050324, "step": 66190 }, { "epoch": 0.6230117647058824, "grad_norm": 0.49655562620812543, "learning_rate": 2.5344709347904984e-06, "loss": 0.014112162590026855, "step": 66195 }, { "epoch": 0.6230588235294118, "grad_norm": 0.3697654016924708, "learning_rate": 2.5343752188886857e-06, "loss": 0.015936601161956786, "step": 66200 }, { "epoch": 0.6231058823529412, "grad_norm": 0.6637239924633169, "learning_rate": 2.534279513830369e-06, "loss": 0.01803736686706543, "step": 66205 }, { "epoch": 0.6231529411764706, "grad_norm": 0.5614872637657098, "learning_rate": 2.534183819613501e-06, "loss": 0.013332611322402954, "step": 66210 }, { "epoch": 0.6232, "grad_norm": 0.6778364294263701, "learning_rate": 2.534088136236035e-06, "loss": 0.016170328855514525, "step": 66215 }, { "epoch": 0.6232470588235294, "grad_norm": 0.7154760810585239, "learning_rate": 2.5339924636959257e-06, "loss": 0.015505991876125336, "step": 66220 }, { "epoch": 0.6232941176470588, "grad_norm": 0.3877448322876838, "learning_rate": 2.5338968019911265e-06, "loss": 0.014744219183921815, "step": 66225 }, { "epoch": 0.6233411764705883, "grad_norm": 0.5308807694393348, "learning_rate": 2.533801151119592e-06, "loss": 0.016252699494361877, "step": 66230 }, { "epoch": 0.6233882352941177, "grad_norm": 0.3376393532583046, "learning_rate": 2.533705511079279e-06, "loss": 0.013706004619598389, "step": 66235 }, { "epoch": 0.6234352941176471, "grad_norm": 0.3648279751009472, "learning_rate": 2.5336098818681426e-06, "loss": 0.013682796061038971, "step": 66240 }, { "epoch": 0.6234823529411765, "grad_norm": 0.5614550135180626, "learning_rate": 2.533514263484139e-06, "loss": 0.015005195140838623, "step": 66245 }, { "epoch": 0.6235294117647059, "grad_norm": 0.9196793206380901, "learning_rate": 2.5334186559252266e-06, "loss": 0.018286101520061493, "step": 66250 }, { "epoch": 0.6235764705882353, "grad_norm": 0.5707503947061454, "learning_rate": 2.533323059189362e-06, "loss": 0.01803499460220337, "step": 66255 }, { "epoch": 0.6236235294117647, "grad_norm": 0.5843916585517346, "learning_rate": 2.5332274732745032e-06, "loss": 0.01766648143529892, "step": 66260 }, { "epoch": 0.6236705882352941, "grad_norm": 0.5408551558628737, "learning_rate": 2.533131898178609e-06, "loss": 0.016471627354621887, "step": 66265 }, { "epoch": 0.6237176470588235, "grad_norm": 0.35807868864713277, "learning_rate": 2.5330363338996393e-06, "loss": 0.013179072737693786, "step": 66270 }, { "epoch": 0.6237647058823529, "grad_norm": 0.5307722409609884, "learning_rate": 2.5329407804355537e-06, "loss": 0.013988447189331055, "step": 66275 }, { "epoch": 0.6238117647058824, "grad_norm": 0.65507533195679, "learning_rate": 2.5328452377843116e-06, "loss": 0.020319448411464693, "step": 66280 }, { "epoch": 0.6238588235294118, "grad_norm": 0.5362396120069242, "learning_rate": 2.5327497059438748e-06, "loss": 0.012579075992107391, "step": 66285 }, { "epoch": 0.6239058823529412, "grad_norm": 0.5419710350274789, "learning_rate": 2.5326541849122042e-06, "loss": 0.01969943642616272, "step": 66290 }, { "epoch": 0.6239529411764706, "grad_norm": 0.6161946964351287, "learning_rate": 2.532558674687262e-06, "loss": 0.013762935996055603, "step": 66295 }, { "epoch": 0.624, "grad_norm": 0.41351706426524043, "learning_rate": 2.532463175267011e-06, "loss": 0.012900617718696595, "step": 66300 }, { "epoch": 0.6240470588235294, "grad_norm": 0.4165810058560688, "learning_rate": 2.5323676866494134e-06, "loss": 0.017892515659332274, "step": 66305 }, { "epoch": 0.6240941176470588, "grad_norm": 0.46410185139060217, "learning_rate": 2.532272208832433e-06, "loss": 0.019193826615810393, "step": 66310 }, { "epoch": 0.6241411764705882, "grad_norm": 0.48632477954503983, "learning_rate": 2.532176741814034e-06, "loss": 0.01173691377043724, "step": 66315 }, { "epoch": 0.6241882352941176, "grad_norm": 0.42961591969303514, "learning_rate": 2.532081285592181e-06, "loss": 0.014944478869438171, "step": 66320 }, { "epoch": 0.6242352941176471, "grad_norm": 0.49175557077921067, "learning_rate": 2.5319858401648393e-06, "loss": 0.015396007895469665, "step": 66325 }, { "epoch": 0.6242823529411765, "grad_norm": 0.6640962322636317, "learning_rate": 2.5318904055299746e-06, "loss": 0.023136065900325777, "step": 66330 }, { "epoch": 0.6243294117647059, "grad_norm": 1.0224132236018457, "learning_rate": 2.5317949816855524e-06, "loss": 0.015469956398010253, "step": 66335 }, { "epoch": 0.6243764705882353, "grad_norm": 0.5535583694711276, "learning_rate": 2.531699568629541e-06, "loss": 0.017126557230949403, "step": 66340 }, { "epoch": 0.6244235294117647, "grad_norm": 0.44966970114731525, "learning_rate": 2.531604166359906e-06, "loss": 0.01599445641040802, "step": 66345 }, { "epoch": 0.6244705882352941, "grad_norm": 0.5690072496844061, "learning_rate": 2.531508774874616e-06, "loss": 0.016492369771003722, "step": 66350 }, { "epoch": 0.6245176470588235, "grad_norm": 0.6210354958992477, "learning_rate": 2.5314133941716395e-06, "loss": 0.016329339146614073, "step": 66355 }, { "epoch": 0.6245647058823529, "grad_norm": 0.33135183667082446, "learning_rate": 2.531318024248945e-06, "loss": 0.012589894235134125, "step": 66360 }, { "epoch": 0.6246117647058823, "grad_norm": 0.5030331476134323, "learning_rate": 2.531222665104502e-06, "loss": 0.01849348396062851, "step": 66365 }, { "epoch": 0.6246588235294117, "grad_norm": 0.4327141368490702, "learning_rate": 2.531127316736281e-06, "loss": 0.015812504291534423, "step": 66370 }, { "epoch": 0.6247058823529412, "grad_norm": 0.5350327380111412, "learning_rate": 2.531031979142252e-06, "loss": 0.01405792087316513, "step": 66375 }, { "epoch": 0.6247529411764706, "grad_norm": 0.6398805044264478, "learning_rate": 2.5309366523203866e-06, "loss": 0.01448470801115036, "step": 66380 }, { "epoch": 0.6248, "grad_norm": 0.5635136146559309, "learning_rate": 2.5308413362686556e-06, "loss": 0.017292127013206482, "step": 66385 }, { "epoch": 0.6248470588235294, "grad_norm": 0.5502458024769963, "learning_rate": 2.530746030985031e-06, "loss": 0.017504678666591646, "step": 66390 }, { "epoch": 0.6248941176470588, "grad_norm": 0.49412232475496687, "learning_rate": 2.5306507364674866e-06, "loss": 0.017591255903244018, "step": 66395 }, { "epoch": 0.6249411764705882, "grad_norm": 0.6303105573308931, "learning_rate": 2.530555452713995e-06, "loss": 0.019148191809654234, "step": 66400 }, { "epoch": 0.6249882352941176, "grad_norm": 0.5258899580597088, "learning_rate": 2.53046017972253e-06, "loss": 0.015084303915500641, "step": 66405 }, { "epoch": 0.625035294117647, "grad_norm": 0.36103467544555173, "learning_rate": 2.530364917491065e-06, "loss": 0.015247318148612975, "step": 66410 }, { "epoch": 0.6250823529411764, "grad_norm": 0.5812184354051723, "learning_rate": 2.5302696660175755e-06, "loss": 0.01953680217266083, "step": 66415 }, { "epoch": 0.6251294117647059, "grad_norm": 0.37286069360530955, "learning_rate": 2.530174425300037e-06, "loss": 0.017215177416801453, "step": 66420 }, { "epoch": 0.6251764705882353, "grad_norm": 0.6785870754831744, "learning_rate": 2.5300791953364256e-06, "loss": 0.01802067756652832, "step": 66425 }, { "epoch": 0.6252235294117647, "grad_norm": 0.5317883229777471, "learning_rate": 2.529983976124717e-06, "loss": 0.014967098832130432, "step": 66430 }, { "epoch": 0.6252705882352941, "grad_norm": 0.4498575669840457, "learning_rate": 2.529888767662888e-06, "loss": 0.013642717897891999, "step": 66435 }, { "epoch": 0.6253176470588235, "grad_norm": 0.6648516733816179, "learning_rate": 2.529793569948917e-06, "loss": 0.02032712697982788, "step": 66440 }, { "epoch": 0.6253647058823529, "grad_norm": 0.37043019743397515, "learning_rate": 2.5296983829807815e-06, "loss": 0.014573046565055847, "step": 66445 }, { "epoch": 0.6254117647058823, "grad_norm": 0.7343386480473236, "learning_rate": 2.529603206756459e-06, "loss": 0.01605382710695267, "step": 66450 }, { "epoch": 0.6254588235294117, "grad_norm": 0.36499428691773317, "learning_rate": 2.5295080412739304e-06, "loss": 0.01736338287591934, "step": 66455 }, { "epoch": 0.6255058823529411, "grad_norm": 0.516162802274225, "learning_rate": 2.529412886531174e-06, "loss": 0.012566238641738892, "step": 66460 }, { "epoch": 0.6255529411764705, "grad_norm": 0.7377683331224136, "learning_rate": 2.52931774252617e-06, "loss": 0.02066812664270401, "step": 66465 }, { "epoch": 0.6256, "grad_norm": 0.5219039626148714, "learning_rate": 2.5292226092568996e-06, "loss": 0.01696391701698303, "step": 66470 }, { "epoch": 0.6256470588235294, "grad_norm": 0.6290704775242059, "learning_rate": 2.5291274867213435e-06, "loss": 0.018004198372364045, "step": 66475 }, { "epoch": 0.6256941176470588, "grad_norm": 0.3630531432197237, "learning_rate": 2.529032374917484e-06, "loss": 0.017475834488868712, "step": 66480 }, { "epoch": 0.6257411764705882, "grad_norm": 0.3855649811699343, "learning_rate": 2.5289372738433027e-06, "loss": 0.014543616771697998, "step": 66485 }, { "epoch": 0.6257882352941176, "grad_norm": 0.6518201161814402, "learning_rate": 2.528842183496782e-06, "loss": 0.01223457306623459, "step": 66490 }, { "epoch": 0.625835294117647, "grad_norm": 0.5437345259676802, "learning_rate": 2.5287471038759065e-06, "loss": 0.013418476283550262, "step": 66495 }, { "epoch": 0.6258823529411764, "grad_norm": 0.49025640625988776, "learning_rate": 2.5286520349786592e-06, "loss": 0.014522820711135864, "step": 66500 }, { "epoch": 0.6259294117647058, "grad_norm": 0.671179062168276, "learning_rate": 2.528556976803025e-06, "loss": 0.016420602798461914, "step": 66505 }, { "epoch": 0.6259764705882352, "grad_norm": 0.4037636660918351, "learning_rate": 2.528461929346988e-06, "loss": 0.012526893615722656, "step": 66510 }, { "epoch": 0.6260235294117648, "grad_norm": 0.478145245349972, "learning_rate": 2.5283668926085343e-06, "loss": 0.015853849053382874, "step": 66515 }, { "epoch": 0.6260705882352942, "grad_norm": 0.5657467717578928, "learning_rate": 2.5282718665856494e-06, "loss": 0.014812493324279785, "step": 66520 }, { "epoch": 0.6261176470588236, "grad_norm": 0.6314451335177454, "learning_rate": 2.5281768512763193e-06, "loss": 0.015290537476539611, "step": 66525 }, { "epoch": 0.626164705882353, "grad_norm": 0.6483053051250465, "learning_rate": 2.5280818466785327e-06, "loss": 0.020303502678871155, "step": 66530 }, { "epoch": 0.6262117647058824, "grad_norm": 0.5842611919230112, "learning_rate": 2.5279868527902757e-06, "loss": 0.014827185869216919, "step": 66535 }, { "epoch": 0.6262588235294118, "grad_norm": 0.4500218654617105, "learning_rate": 2.527891869609537e-06, "loss": 0.016436253488063813, "step": 66540 }, { "epoch": 0.6263058823529412, "grad_norm": 0.5922218699303445, "learning_rate": 2.527796897134305e-06, "loss": 0.018369820713996888, "step": 66545 }, { "epoch": 0.6263529411764706, "grad_norm": 0.7467497925404684, "learning_rate": 2.5277019353625685e-06, "loss": 0.018054768443107605, "step": 66550 }, { "epoch": 0.6264, "grad_norm": 0.6848163267019324, "learning_rate": 2.5276069842923175e-06, "loss": 0.014608436822891235, "step": 66555 }, { "epoch": 0.6264470588235294, "grad_norm": 0.5385573770466293, "learning_rate": 2.5275120439215423e-06, "loss": 0.011821390688419342, "step": 66560 }, { "epoch": 0.6264941176470589, "grad_norm": 0.4996992532557195, "learning_rate": 2.5274171142482334e-06, "loss": 0.018081283569335936, "step": 66565 }, { "epoch": 0.6265411764705883, "grad_norm": 0.37825688833035487, "learning_rate": 2.5273221952703823e-06, "loss": 0.016435328125953674, "step": 66570 }, { "epoch": 0.6265882352941177, "grad_norm": 0.5590301366767301, "learning_rate": 2.527227286985981e-06, "loss": 0.012520682811737061, "step": 66575 }, { "epoch": 0.6266352941176471, "grad_norm": 0.5296217383232078, "learning_rate": 2.527132389393021e-06, "loss": 0.012221933156251908, "step": 66580 }, { "epoch": 0.6266823529411765, "grad_norm": 0.47319946311621347, "learning_rate": 2.5270375024894957e-06, "loss": 0.01612379848957062, "step": 66585 }, { "epoch": 0.6267294117647059, "grad_norm": 0.39804963548200195, "learning_rate": 2.5269426262733974e-06, "loss": 0.014439721405506135, "step": 66590 }, { "epoch": 0.6267764705882353, "grad_norm": 0.3871400844404068, "learning_rate": 2.5268477607427222e-06, "loss": 0.011388573795557022, "step": 66595 }, { "epoch": 0.6268235294117647, "grad_norm": 0.5562409434480905, "learning_rate": 2.5267529058954627e-06, "loss": 0.018643015623092653, "step": 66600 }, { "epoch": 0.6268705882352941, "grad_norm": 0.9192058844837843, "learning_rate": 2.526658061729614e-06, "loss": 0.015684381127357483, "step": 66605 }, { "epoch": 0.6269176470588236, "grad_norm": 0.5301232587771241, "learning_rate": 2.5265632282431723e-06, "loss": 0.015071332454681396, "step": 66610 }, { "epoch": 0.626964705882353, "grad_norm": 0.4937776302805502, "learning_rate": 2.526468405434133e-06, "loss": 0.015231490135192871, "step": 66615 }, { "epoch": 0.6270117647058824, "grad_norm": 0.617116616116989, "learning_rate": 2.5263735933004924e-06, "loss": 0.018947677314281465, "step": 66620 }, { "epoch": 0.6270588235294118, "grad_norm": 0.5707577699876438, "learning_rate": 2.5262787918402487e-06, "loss": 0.01398652195930481, "step": 66625 }, { "epoch": 0.6271058823529412, "grad_norm": 0.4486519839957955, "learning_rate": 2.5261840010513983e-06, "loss": 0.013087892532348632, "step": 66630 }, { "epoch": 0.6271529411764706, "grad_norm": 0.5571574355577062, "learning_rate": 2.52608922093194e-06, "loss": 0.013010773062705993, "step": 66635 }, { "epoch": 0.6272, "grad_norm": 0.6173688419652787, "learning_rate": 2.5259944514798716e-06, "loss": 0.01377100646495819, "step": 66640 }, { "epoch": 0.6272470588235294, "grad_norm": 0.337006998330128, "learning_rate": 2.5258996926931935e-06, "loss": 0.017819055914878847, "step": 66645 }, { "epoch": 0.6272941176470588, "grad_norm": 0.7365785788880438, "learning_rate": 2.5258049445699034e-06, "loss": 0.018521036207675933, "step": 66650 }, { "epoch": 0.6273411764705882, "grad_norm": 1.2339994266843737, "learning_rate": 2.5257102071080036e-06, "loss": 0.016643747687339783, "step": 66655 }, { "epoch": 0.6273882352941177, "grad_norm": 0.46030290970119614, "learning_rate": 2.5256154803054946e-06, "loss": 0.0180379182100296, "step": 66660 }, { "epoch": 0.6274352941176471, "grad_norm": 0.5216549963126365, "learning_rate": 2.5255207641603757e-06, "loss": 0.01570478081703186, "step": 66665 }, { "epoch": 0.6274823529411765, "grad_norm": 0.47465951613162866, "learning_rate": 2.525426058670651e-06, "loss": 0.017752845585346223, "step": 66670 }, { "epoch": 0.6275294117647059, "grad_norm": 0.5402801634784766, "learning_rate": 2.5253313638343212e-06, "loss": 0.01690215915441513, "step": 66675 }, { "epoch": 0.6275764705882353, "grad_norm": 0.24569102036535911, "learning_rate": 2.52523667964939e-06, "loss": 0.013074514269828797, "step": 66680 }, { "epoch": 0.6276235294117647, "grad_norm": 0.838052756870001, "learning_rate": 2.5251420061138606e-06, "loss": 0.01574385166168213, "step": 66685 }, { "epoch": 0.6276705882352941, "grad_norm": 0.3954834569422307, "learning_rate": 2.5250473432257363e-06, "loss": 0.012461023777723313, "step": 66690 }, { "epoch": 0.6277176470588235, "grad_norm": 0.3790778202673071, "learning_rate": 2.524952690983022e-06, "loss": 0.016548427939414977, "step": 66695 }, { "epoch": 0.6277647058823529, "grad_norm": 0.6106424996438141, "learning_rate": 2.524858049383722e-06, "loss": 0.013524718582630157, "step": 66700 }, { "epoch": 0.6278117647058824, "grad_norm": 0.6346003084224823, "learning_rate": 2.524763418425843e-06, "loss": 0.015947991609573366, "step": 66705 }, { "epoch": 0.6278588235294118, "grad_norm": 0.6463829104617488, "learning_rate": 2.5246687981073905e-06, "loss": 0.0183562695980072, "step": 66710 }, { "epoch": 0.6279058823529412, "grad_norm": 0.49392900605686796, "learning_rate": 2.5245741884263705e-06, "loss": 0.01641191691160202, "step": 66715 }, { "epoch": 0.6279529411764706, "grad_norm": 0.490417871694062, "learning_rate": 2.5244795893807893e-06, "loss": 0.015359251201152802, "step": 66720 }, { "epoch": 0.628, "grad_norm": 0.396690644707405, "learning_rate": 2.524385000968656e-06, "loss": 0.01663437783718109, "step": 66725 }, { "epoch": 0.6280470588235294, "grad_norm": 0.6524148451753573, "learning_rate": 2.5242904231879776e-06, "loss": 0.01370682120323181, "step": 66730 }, { "epoch": 0.6280941176470588, "grad_norm": 0.5914395308727058, "learning_rate": 2.524195856036764e-06, "loss": 0.015016071498394012, "step": 66735 }, { "epoch": 0.6281411764705882, "grad_norm": 0.5351858763183758, "learning_rate": 2.5241012995130227e-06, "loss": 0.015186406672000885, "step": 66740 }, { "epoch": 0.6281882352941176, "grad_norm": 0.47764147903617965, "learning_rate": 2.5240067536147644e-06, "loss": 0.01613866686820984, "step": 66745 }, { "epoch": 0.6282352941176471, "grad_norm": 0.5375466660118913, "learning_rate": 2.5239122183399983e-06, "loss": 0.015986350178718568, "step": 66750 }, { "epoch": 0.6282823529411765, "grad_norm": 0.4398700100513274, "learning_rate": 2.5238176936867358e-06, "loss": 0.012920492887496948, "step": 66755 }, { "epoch": 0.6283294117647059, "grad_norm": 0.6457351879469586, "learning_rate": 2.5237231796529883e-06, "loss": 0.016763046383857727, "step": 66760 }, { "epoch": 0.6283764705882353, "grad_norm": 0.5907682300536156, "learning_rate": 2.5236286762367666e-06, "loss": 0.020120537281036376, "step": 66765 }, { "epoch": 0.6284235294117647, "grad_norm": 0.5929545406374916, "learning_rate": 2.523534183436084e-06, "loss": 0.01479806900024414, "step": 66770 }, { "epoch": 0.6284705882352941, "grad_norm": 0.5484870288107907, "learning_rate": 2.523439701248952e-06, "loss": 0.018526171147823334, "step": 66775 }, { "epoch": 0.6285176470588235, "grad_norm": 0.5040429150648484, "learning_rate": 2.5233452296733845e-06, "loss": 0.015567761659622193, "step": 66780 }, { "epoch": 0.6285647058823529, "grad_norm": 0.4052469542617504, "learning_rate": 2.523250768707396e-06, "loss": 0.014794270694255828, "step": 66785 }, { "epoch": 0.6286117647058823, "grad_norm": 0.3964114808879653, "learning_rate": 2.523156318348999e-06, "loss": 0.013492636382579803, "step": 66790 }, { "epoch": 0.6286588235294117, "grad_norm": 0.5298926380462137, "learning_rate": 2.523061878596211e-06, "loss": 0.015203604102134704, "step": 66795 }, { "epoch": 0.6287058823529412, "grad_norm": 0.35566872133715216, "learning_rate": 2.5229674494470447e-06, "loss": 0.01361982524394989, "step": 66800 }, { "epoch": 0.6287529411764706, "grad_norm": 0.47037704085945753, "learning_rate": 2.5228730308995175e-06, "loss": 0.012306004762649536, "step": 66805 }, { "epoch": 0.6288, "grad_norm": 0.3816116625722274, "learning_rate": 2.5227786229516453e-06, "loss": 0.015471324324607849, "step": 66810 }, { "epoch": 0.6288470588235294, "grad_norm": 0.4736756004957724, "learning_rate": 2.5226842256014458e-06, "loss": 0.01328255981206894, "step": 66815 }, { "epoch": 0.6288941176470588, "grad_norm": 0.5847583644118147, "learning_rate": 2.5225898388469342e-06, "loss": 0.021199007332324982, "step": 66820 }, { "epoch": 0.6289411764705882, "grad_norm": 0.9124115299827856, "learning_rate": 2.5224954626861312e-06, "loss": 0.019495363533496856, "step": 66825 }, { "epoch": 0.6289882352941176, "grad_norm": 0.6454423712846654, "learning_rate": 2.522401097117054e-06, "loss": 0.01635163128376007, "step": 66830 }, { "epoch": 0.629035294117647, "grad_norm": 0.5768699984679331, "learning_rate": 2.5223067421377213e-06, "loss": 0.015649786591529845, "step": 66835 }, { "epoch": 0.6290823529411764, "grad_norm": 0.46659406361366756, "learning_rate": 2.5222123977461527e-06, "loss": 0.017642927169799805, "step": 66840 }, { "epoch": 0.6291294117647059, "grad_norm": 0.37338290471013674, "learning_rate": 2.522118063940368e-06, "loss": 0.019880950450897217, "step": 66845 }, { "epoch": 0.6291764705882353, "grad_norm": 0.5004267702107233, "learning_rate": 2.5220237407183897e-06, "loss": 0.014413098990917205, "step": 66850 }, { "epoch": 0.6292235294117647, "grad_norm": 0.3811510348213134, "learning_rate": 2.521929428078236e-06, "loss": 0.016571229696273802, "step": 66855 }, { "epoch": 0.6292705882352941, "grad_norm": 0.48628151014391946, "learning_rate": 2.5218351260179303e-06, "loss": 0.018018585443496705, "step": 66860 }, { "epoch": 0.6293176470588235, "grad_norm": 0.44538156332657036, "learning_rate": 2.5217408345354943e-06, "loss": 0.015480303764343261, "step": 66865 }, { "epoch": 0.6293647058823529, "grad_norm": 0.4944083710331108, "learning_rate": 2.5216465536289507e-06, "loss": 0.013707113265991212, "step": 66870 }, { "epoch": 0.6294117647058823, "grad_norm": 0.6989268167927148, "learning_rate": 2.5215522832963226e-06, "loss": 0.018230751156806946, "step": 66875 }, { "epoch": 0.6294588235294117, "grad_norm": 0.5539336716525228, "learning_rate": 2.5214580235356334e-06, "loss": 0.014189790189266204, "step": 66880 }, { "epoch": 0.6295058823529411, "grad_norm": 0.5722712455583466, "learning_rate": 2.521363774344907e-06, "loss": 0.01864871084690094, "step": 66885 }, { "epoch": 0.6295529411764705, "grad_norm": 0.6364658028067968, "learning_rate": 2.5212695357221685e-06, "loss": 0.01653270423412323, "step": 66890 }, { "epoch": 0.6296, "grad_norm": 0.5660496037981759, "learning_rate": 2.5211753076654435e-06, "loss": 0.016478627920150757, "step": 66895 }, { "epoch": 0.6296470588235294, "grad_norm": 0.2864658874307983, "learning_rate": 2.5210810901727567e-06, "loss": 0.012857231497764587, "step": 66900 }, { "epoch": 0.6296941176470588, "grad_norm": 0.3433768385940409, "learning_rate": 2.5209868832421354e-06, "loss": 0.012490835785865784, "step": 66905 }, { "epoch": 0.6297411764705882, "grad_norm": 0.4916656561968921, "learning_rate": 2.5208926868716056e-06, "loss": 0.01240173950791359, "step": 66910 }, { "epoch": 0.6297882352941176, "grad_norm": 0.5487090703474686, "learning_rate": 2.520798501059195e-06, "loss": 0.012854798138141632, "step": 66915 }, { "epoch": 0.629835294117647, "grad_norm": 0.47797268452832026, "learning_rate": 2.520704325802931e-06, "loss": 0.017986707389354706, "step": 66920 }, { "epoch": 0.6298823529411764, "grad_norm": 0.47455202478722075, "learning_rate": 2.5206101611008426e-06, "loss": 0.01649882346391678, "step": 66925 }, { "epoch": 0.6299294117647058, "grad_norm": 0.5700085238001895, "learning_rate": 2.520516006950958e-06, "loss": 0.017721834778785705, "step": 66930 }, { "epoch": 0.6299764705882352, "grad_norm": 0.35538690616700214, "learning_rate": 2.5204218633513068e-06, "loss": 0.015190547704696656, "step": 66935 }, { "epoch": 0.6300235294117648, "grad_norm": 0.46643200427924597, "learning_rate": 2.520327730299918e-06, "loss": 0.019828173518180846, "step": 66940 }, { "epoch": 0.6300705882352942, "grad_norm": 0.4287728880156759, "learning_rate": 2.520233607794823e-06, "loss": 0.01471300721168518, "step": 66945 }, { "epoch": 0.6301176470588236, "grad_norm": 0.3533201672036197, "learning_rate": 2.5201394958340526e-06, "loss": 0.017927733063697816, "step": 66950 }, { "epoch": 0.630164705882353, "grad_norm": 0.4622413955901463, "learning_rate": 2.520045394415637e-06, "loss": 0.015111947059631347, "step": 66955 }, { "epoch": 0.6302117647058824, "grad_norm": 0.4343932969535897, "learning_rate": 2.5199513035376098e-06, "loss": 0.014292845129966735, "step": 66960 }, { "epoch": 0.6302588235294118, "grad_norm": 0.3432834883844543, "learning_rate": 2.519857223198003e-06, "loss": 0.015536706149578094, "step": 66965 }, { "epoch": 0.6303058823529412, "grad_norm": 0.468843816044342, "learning_rate": 2.519763153394848e-06, "loss": 0.01683751344680786, "step": 66970 }, { "epoch": 0.6303529411764706, "grad_norm": 0.518303556179278, "learning_rate": 2.5196690941261805e-06, "loss": 0.017264530062675476, "step": 66975 }, { "epoch": 0.6304, "grad_norm": 0.3590282697231573, "learning_rate": 2.5195750453900324e-06, "loss": 0.022120678424835206, "step": 66980 }, { "epoch": 0.6304470588235294, "grad_norm": 0.4140270695967411, "learning_rate": 2.51948100718444e-06, "loss": 0.015311825275421142, "step": 66985 }, { "epoch": 0.6304941176470589, "grad_norm": 0.42614634095771653, "learning_rate": 2.519386979507437e-06, "loss": 0.013852311670780182, "step": 66990 }, { "epoch": 0.6305411764705883, "grad_norm": 0.4417263025750367, "learning_rate": 2.5192929623570594e-06, "loss": 0.016035470366477966, "step": 66995 }, { "epoch": 0.6305882352941177, "grad_norm": 0.40464096287709883, "learning_rate": 2.5191989557313436e-06, "loss": 0.020979669690132142, "step": 67000 }, { "epoch": 0.6306352941176471, "grad_norm": 0.5277828021673252, "learning_rate": 2.5191049596283244e-06, "loss": 0.015793272852897645, "step": 67005 }, { "epoch": 0.6306823529411765, "grad_norm": 0.48631743599920313, "learning_rate": 2.519010974046041e-06, "loss": 0.01755455732345581, "step": 67010 }, { "epoch": 0.6307294117647059, "grad_norm": 0.7110354524150782, "learning_rate": 2.51891699898253e-06, "loss": 0.01957440972328186, "step": 67015 }, { "epoch": 0.6307764705882353, "grad_norm": 0.5011822122261855, "learning_rate": 2.5188230344358296e-06, "loss": 0.01596532464027405, "step": 67020 }, { "epoch": 0.6308235294117647, "grad_norm": 0.48379066995860054, "learning_rate": 2.5187290804039777e-06, "loss": 0.014747364819049836, "step": 67025 }, { "epoch": 0.6308705882352941, "grad_norm": 0.5869344824252164, "learning_rate": 2.5186351368850142e-06, "loss": 0.018498265743255617, "step": 67030 }, { "epoch": 0.6309176470588236, "grad_norm": 0.48427622506839113, "learning_rate": 2.5185412038769787e-06, "loss": 0.015204422175884247, "step": 67035 }, { "epoch": 0.630964705882353, "grad_norm": 0.4599408455991791, "learning_rate": 2.518447281377911e-06, "loss": 0.01566462516784668, "step": 67040 }, { "epoch": 0.6310117647058824, "grad_norm": 0.4963637292372289, "learning_rate": 2.518353369385852e-06, "loss": 0.01659502983093262, "step": 67045 }, { "epoch": 0.6310588235294118, "grad_norm": 0.5811594245492618, "learning_rate": 2.518259467898842e-06, "loss": 0.015414425730705261, "step": 67050 }, { "epoch": 0.6311058823529412, "grad_norm": 0.39180896916705793, "learning_rate": 2.518165576914924e-06, "loss": 0.014027915894985199, "step": 67055 }, { "epoch": 0.6311529411764706, "grad_norm": 0.46400839664893273, "learning_rate": 2.5180716964321383e-06, "loss": 0.014890679717063903, "step": 67060 }, { "epoch": 0.6312, "grad_norm": 0.44739106753694147, "learning_rate": 2.51797782644853e-06, "loss": 0.01633715033531189, "step": 67065 }, { "epoch": 0.6312470588235294, "grad_norm": 0.47315132707320867, "learning_rate": 2.51788396696214e-06, "loss": 0.014902906119823455, "step": 67070 }, { "epoch": 0.6312941176470588, "grad_norm": 0.5315343716169421, "learning_rate": 2.5177901179710135e-06, "loss": 0.014409597218036651, "step": 67075 }, { "epoch": 0.6313411764705882, "grad_norm": 0.5507598860459344, "learning_rate": 2.5176962794731935e-06, "loss": 0.01685005724430084, "step": 67080 }, { "epoch": 0.6313882352941177, "grad_norm": 0.5785128822334016, "learning_rate": 2.5176024514667265e-06, "loss": 0.016631776094436647, "step": 67085 }, { "epoch": 0.6314352941176471, "grad_norm": 0.6194511374139102, "learning_rate": 2.5175086339496556e-06, "loss": 0.01854012906551361, "step": 67090 }, { "epoch": 0.6314823529411765, "grad_norm": 0.5355449476767276, "learning_rate": 2.5174148269200277e-06, "loss": 0.02093766927719116, "step": 67095 }, { "epoch": 0.6315294117647059, "grad_norm": 0.41810342165665204, "learning_rate": 2.517321030375889e-06, "loss": 0.016243526339530946, "step": 67100 }, { "epoch": 0.6315764705882353, "grad_norm": 0.6862639450625959, "learning_rate": 2.5172272443152864e-06, "loss": 0.017955929040908813, "step": 67105 }, { "epoch": 0.6316235294117647, "grad_norm": 0.6346012238727893, "learning_rate": 2.5171334687362663e-06, "loss": 0.014619576930999755, "step": 67110 }, { "epoch": 0.6316705882352941, "grad_norm": 0.340503034243891, "learning_rate": 2.5170397036368772e-06, "loss": 0.013474032282829285, "step": 67115 }, { "epoch": 0.6317176470588235, "grad_norm": 0.5276466627822893, "learning_rate": 2.516945949015167e-06, "loss": 0.01653049886226654, "step": 67120 }, { "epoch": 0.6317647058823529, "grad_norm": 0.4145830523544642, "learning_rate": 2.5168522048691852e-06, "loss": 0.013912299275398254, "step": 67125 }, { "epoch": 0.6318117647058824, "grad_norm": 0.7130143849790327, "learning_rate": 2.51675847119698e-06, "loss": 0.019646039605140685, "step": 67130 }, { "epoch": 0.6318588235294118, "grad_norm": 0.8994062486325161, "learning_rate": 2.516664747996602e-06, "loss": 0.015018853545188903, "step": 67135 }, { "epoch": 0.6319058823529412, "grad_norm": 0.7949986981659185, "learning_rate": 2.5165710352661016e-06, "loss": 0.0174616739153862, "step": 67140 }, { "epoch": 0.6319529411764706, "grad_norm": 0.5913922443852851, "learning_rate": 2.516477333003529e-06, "loss": 0.01676907539367676, "step": 67145 }, { "epoch": 0.632, "grad_norm": 0.6851156029170897, "learning_rate": 2.516383641206936e-06, "loss": 0.01727603077888489, "step": 67150 }, { "epoch": 0.6320470588235294, "grad_norm": 0.7367076448641543, "learning_rate": 2.516289959874374e-06, "loss": 0.014782558381557464, "step": 67155 }, { "epoch": 0.6320941176470588, "grad_norm": 0.5147771285009136, "learning_rate": 2.5161962890038956e-06, "loss": 0.019273881614208222, "step": 67160 }, { "epoch": 0.6321411764705882, "grad_norm": 0.5353476525164961, "learning_rate": 2.5161026285935534e-06, "loss": 0.015800562500953675, "step": 67165 }, { "epoch": 0.6321882352941176, "grad_norm": 0.49585033533395256, "learning_rate": 2.5160089786414016e-06, "loss": 0.020104345679283143, "step": 67170 }, { "epoch": 0.632235294117647, "grad_norm": 0.6801888388159038, "learning_rate": 2.5159153391454933e-06, "loss": 0.022320964932441713, "step": 67175 }, { "epoch": 0.6322823529411765, "grad_norm": 0.4166434893539274, "learning_rate": 2.515821710103883e-06, "loss": 0.014870068430900574, "step": 67180 }, { "epoch": 0.6323294117647059, "grad_norm": 0.6134832269227464, "learning_rate": 2.5157280915146253e-06, "loss": 0.017658305168151856, "step": 67185 }, { "epoch": 0.6323764705882353, "grad_norm": 0.5865868981227418, "learning_rate": 2.5156344833757764e-06, "loss": 0.024732337892055513, "step": 67190 }, { "epoch": 0.6324235294117647, "grad_norm": 0.5623138812592857, "learning_rate": 2.5155408856853914e-06, "loss": 0.02094769775867462, "step": 67195 }, { "epoch": 0.6324705882352941, "grad_norm": 0.5766384323529736, "learning_rate": 2.515447298441527e-06, "loss": 0.014982914924621582, "step": 67200 }, { "epoch": 0.6325176470588235, "grad_norm": 0.5846009360864878, "learning_rate": 2.5153537216422397e-06, "loss": 0.018818765878677368, "step": 67205 }, { "epoch": 0.6325647058823529, "grad_norm": 0.5915298013032887, "learning_rate": 2.5152601552855876e-06, "loss": 0.014767673611640931, "step": 67210 }, { "epoch": 0.6326117647058823, "grad_norm": 0.5644823753371578, "learning_rate": 2.5151665993696284e-06, "loss": 0.015968760848045348, "step": 67215 }, { "epoch": 0.6326588235294117, "grad_norm": 0.38869899831353755, "learning_rate": 2.5150730538924206e-06, "loss": 0.013670319318771362, "step": 67220 }, { "epoch": 0.6327058823529412, "grad_norm": 0.7412736092602115, "learning_rate": 2.5149795188520225e-06, "loss": 0.01731841266155243, "step": 67225 }, { "epoch": 0.6327529411764706, "grad_norm": 1.8705283814454818, "learning_rate": 2.5148859942464943e-06, "loss": 0.01574894189834595, "step": 67230 }, { "epoch": 0.6328, "grad_norm": 0.3564679287147411, "learning_rate": 2.5147924800738955e-06, "loss": 0.01276262104511261, "step": 67235 }, { "epoch": 0.6328470588235294, "grad_norm": 0.4608082928718509, "learning_rate": 2.5146989763322867e-06, "loss": 0.018368275463581087, "step": 67240 }, { "epoch": 0.6328941176470588, "grad_norm": 0.6065339066918525, "learning_rate": 2.5146054830197292e-06, "loss": 0.018771548569202424, "step": 67245 }, { "epoch": 0.6329411764705882, "grad_norm": 0.6162526747401754, "learning_rate": 2.5145120001342832e-06, "loss": 0.014171868562698364, "step": 67250 }, { "epoch": 0.6329882352941176, "grad_norm": 0.4436784772688641, "learning_rate": 2.5144185276740125e-06, "loss": 0.016191381216049194, "step": 67255 }, { "epoch": 0.633035294117647, "grad_norm": 0.5441550531395914, "learning_rate": 2.5143250656369777e-06, "loss": 0.01495155245065689, "step": 67260 }, { "epoch": 0.6330823529411764, "grad_norm": 0.4949291051413644, "learning_rate": 2.514231614021243e-06, "loss": 0.017056180536746977, "step": 67265 }, { "epoch": 0.6331294117647058, "grad_norm": 0.43287688128018154, "learning_rate": 2.5141381728248714e-06, "loss": 0.017193862795829774, "step": 67270 }, { "epoch": 0.6331764705882353, "grad_norm": 0.4520085709603215, "learning_rate": 2.514044742045927e-06, "loss": 0.0142999529838562, "step": 67275 }, { "epoch": 0.6332235294117647, "grad_norm": 0.4185395105582754, "learning_rate": 2.513951321682474e-06, "loss": 0.013624300062656403, "step": 67280 }, { "epoch": 0.6332705882352941, "grad_norm": 0.44898385974036276, "learning_rate": 2.5138579117325777e-06, "loss": 0.01717318147420883, "step": 67285 }, { "epoch": 0.6333176470588235, "grad_norm": 0.371177156355295, "learning_rate": 2.513764512194304e-06, "loss": 0.014342135190963745, "step": 67290 }, { "epoch": 0.6333647058823529, "grad_norm": 0.577859966702183, "learning_rate": 2.513671123065718e-06, "loss": 0.016543829441070558, "step": 67295 }, { "epoch": 0.6334117647058823, "grad_norm": 0.3217676542288098, "learning_rate": 2.5135777443448865e-06, "loss": 0.014328067004680634, "step": 67300 }, { "epoch": 0.6334588235294117, "grad_norm": 0.4554924443414275, "learning_rate": 2.5134843760298768e-06, "loss": 0.016878435015678407, "step": 67305 }, { "epoch": 0.6335058823529411, "grad_norm": 1.9512837040347981, "learning_rate": 2.513391018118756e-06, "loss": 0.019844818115234374, "step": 67310 }, { "epoch": 0.6335529411764705, "grad_norm": 0.4364942414649839, "learning_rate": 2.5132976706095918e-06, "loss": 0.014840230345726013, "step": 67315 }, { "epoch": 0.6336, "grad_norm": 0.5292528672533574, "learning_rate": 2.5132043335004536e-06, "loss": 0.04168510735034943, "step": 67320 }, { "epoch": 0.6336470588235295, "grad_norm": 0.5107006674264527, "learning_rate": 2.5131110067894098e-06, "loss": 0.02214931845664978, "step": 67325 }, { "epoch": 0.6336941176470589, "grad_norm": 0.48959310613199547, "learning_rate": 2.5130176904745306e-06, "loss": 0.013881699740886688, "step": 67330 }, { "epoch": 0.6337411764705883, "grad_norm": 0.6507398592745954, "learning_rate": 2.5129243845538846e-06, "loss": 0.015485280752182006, "step": 67335 }, { "epoch": 0.6337882352941177, "grad_norm": 0.547491442112267, "learning_rate": 2.5128310890255435e-06, "loss": 0.014263096451759338, "step": 67340 }, { "epoch": 0.633835294117647, "grad_norm": 0.5156256417291429, "learning_rate": 2.5127378038875784e-06, "loss": 0.015398141741752625, "step": 67345 }, { "epoch": 0.6338823529411765, "grad_norm": 0.4536616649070668, "learning_rate": 2.5126445291380595e-06, "loss": 0.011620163172483444, "step": 67350 }, { "epoch": 0.6339294117647059, "grad_norm": 0.5053797247925037, "learning_rate": 2.5125512647750607e-06, "loss": 0.018664464354515076, "step": 67355 }, { "epoch": 0.6339764705882353, "grad_norm": 0.45228677960869024, "learning_rate": 2.5124580107966523e-06, "loss": 0.014154431223869324, "step": 67360 }, { "epoch": 0.6340235294117647, "grad_norm": 0.4952650701872328, "learning_rate": 2.5123647672009087e-06, "loss": 0.018185344338417054, "step": 67365 }, { "epoch": 0.6340705882352942, "grad_norm": 0.4604893443816851, "learning_rate": 2.512271533985904e-06, "loss": 0.015014368295669555, "step": 67370 }, { "epoch": 0.6341176470588236, "grad_norm": 0.4142839545054085, "learning_rate": 2.512178311149711e-06, "loss": 0.011965746432542801, "step": 67375 }, { "epoch": 0.634164705882353, "grad_norm": 0.5774063262552113, "learning_rate": 2.5120850986904043e-06, "loss": 0.021675854921340942, "step": 67380 }, { "epoch": 0.6342117647058824, "grad_norm": 0.4886068480386489, "learning_rate": 2.5119918966060598e-06, "loss": 0.015796875953674315, "step": 67385 }, { "epoch": 0.6342588235294118, "grad_norm": 0.5287310200788595, "learning_rate": 2.511898704894752e-06, "loss": 0.015347501635551453, "step": 67390 }, { "epoch": 0.6343058823529412, "grad_norm": 0.5594503650760829, "learning_rate": 2.5118055235545576e-06, "loss": 0.012493855506181716, "step": 67395 }, { "epoch": 0.6343529411764706, "grad_norm": 0.7828451960638905, "learning_rate": 2.5117123525835523e-06, "loss": 0.01720735728740692, "step": 67400 }, { "epoch": 0.6344, "grad_norm": 0.2973721187329194, "learning_rate": 2.5116191919798143e-06, "loss": 0.015053294599056244, "step": 67405 }, { "epoch": 0.6344470588235294, "grad_norm": 0.5351106009096211, "learning_rate": 2.51152604174142e-06, "loss": 0.017260012030601502, "step": 67410 }, { "epoch": 0.6344941176470589, "grad_norm": 0.566741784610301, "learning_rate": 2.511432901866448e-06, "loss": 0.016406729817390442, "step": 67415 }, { "epoch": 0.6345411764705883, "grad_norm": 0.3765310971082707, "learning_rate": 2.511339772352977e-06, "loss": 0.015382006764411926, "step": 67420 }, { "epoch": 0.6345882352941177, "grad_norm": 0.5663879734590153, "learning_rate": 2.5112466531990853e-06, "loss": 0.018863384425640107, "step": 67425 }, { "epoch": 0.6346352941176471, "grad_norm": 0.327171167654757, "learning_rate": 2.5111535444028533e-06, "loss": 0.01679481416940689, "step": 67430 }, { "epoch": 0.6346823529411765, "grad_norm": 0.6146187128890739, "learning_rate": 2.51106044596236e-06, "loss": 0.015601205825805663, "step": 67435 }, { "epoch": 0.6347294117647059, "grad_norm": 0.5170283171516266, "learning_rate": 2.5109673578756866e-06, "loss": 0.022315990924835206, "step": 67440 }, { "epoch": 0.6347764705882353, "grad_norm": 0.4533798062829153, "learning_rate": 2.5108742801409144e-06, "loss": 0.014380167424678802, "step": 67445 }, { "epoch": 0.6348235294117647, "grad_norm": 0.48289832114924874, "learning_rate": 2.5107812127561232e-06, "loss": 0.015032857656478882, "step": 67450 }, { "epoch": 0.6348705882352941, "grad_norm": 0.365936120575455, "learning_rate": 2.510688155719397e-06, "loss": 0.021556304395198823, "step": 67455 }, { "epoch": 0.6349176470588235, "grad_norm": 0.49941860540450994, "learning_rate": 2.510595109028817e-06, "loss": 0.014303502440452576, "step": 67460 }, { "epoch": 0.634964705882353, "grad_norm": 0.596676448406051, "learning_rate": 2.510502072682467e-06, "loss": 0.012361003458499909, "step": 67465 }, { "epoch": 0.6350117647058824, "grad_norm": 0.37691435511026217, "learning_rate": 2.5104090466784303e-06, "loss": 0.01586054116487503, "step": 67470 }, { "epoch": 0.6350588235294118, "grad_norm": 0.3810443817889224, "learning_rate": 2.5103160310147906e-06, "loss": 0.024846044182777405, "step": 67475 }, { "epoch": 0.6351058823529412, "grad_norm": 0.34201867989739454, "learning_rate": 2.510223025689632e-06, "loss": 0.015050634741783142, "step": 67480 }, { "epoch": 0.6351529411764706, "grad_norm": 0.3735960324675665, "learning_rate": 2.5101300307010407e-06, "loss": 0.015369856357574463, "step": 67485 }, { "epoch": 0.6352, "grad_norm": 0.7211618810374024, "learning_rate": 2.5100370460471014e-06, "loss": 0.017897190153598787, "step": 67490 }, { "epoch": 0.6352470588235294, "grad_norm": 0.4174488774464551, "learning_rate": 2.5099440717259e-06, "loss": 0.016819557547569274, "step": 67495 }, { "epoch": 0.6352941176470588, "grad_norm": 0.5263629324298013, "learning_rate": 2.5098511077355233e-06, "loss": 0.015036439895629883, "step": 67500 }, { "epoch": 0.6353411764705882, "grad_norm": 0.5367616208396051, "learning_rate": 2.5097581540740575e-06, "loss": 0.016127696633338927, "step": 67505 }, { "epoch": 0.6353882352941177, "grad_norm": 0.63015982710169, "learning_rate": 2.5096652107395907e-06, "loss": 0.02029842436313629, "step": 67510 }, { "epoch": 0.6354352941176471, "grad_norm": 0.4980736053736712, "learning_rate": 2.5095722777302112e-06, "loss": 0.017945578694343566, "step": 67515 }, { "epoch": 0.6354823529411765, "grad_norm": 0.6374479541025612, "learning_rate": 2.5094793550440068e-06, "loss": 0.01220298707485199, "step": 67520 }, { "epoch": 0.6355294117647059, "grad_norm": 0.49417579545619467, "learning_rate": 2.5093864426790665e-06, "loss": 0.011910896003246307, "step": 67525 }, { "epoch": 0.6355764705882353, "grad_norm": 0.5588804508719113, "learning_rate": 2.50929354063348e-06, "loss": 0.01677151918411255, "step": 67530 }, { "epoch": 0.6356235294117647, "grad_norm": 0.5034364647093146, "learning_rate": 2.509200648905337e-06, "loss": 0.0178004115819931, "step": 67535 }, { "epoch": 0.6356705882352941, "grad_norm": 0.4834547779555745, "learning_rate": 2.509107767492729e-06, "loss": 0.012623222172260284, "step": 67540 }, { "epoch": 0.6357176470588235, "grad_norm": 0.6951502720898831, "learning_rate": 2.509014896393745e-06, "loss": 0.0284592866897583, "step": 67545 }, { "epoch": 0.6357647058823529, "grad_norm": 0.6001258307523466, "learning_rate": 2.5089220356064775e-06, "loss": 0.015452340245246887, "step": 67550 }, { "epoch": 0.6358117647058823, "grad_norm": 0.4840505071761352, "learning_rate": 2.5088291851290187e-06, "loss": 0.0158595472574234, "step": 67555 }, { "epoch": 0.6358588235294118, "grad_norm": 0.3257962824651116, "learning_rate": 2.5087363449594604e-06, "loss": 0.014250171184539796, "step": 67560 }, { "epoch": 0.6359058823529412, "grad_norm": 0.49321705398365573, "learning_rate": 2.508643515095896e-06, "loss": 0.01361444890499115, "step": 67565 }, { "epoch": 0.6359529411764706, "grad_norm": 0.38484415707510555, "learning_rate": 2.5085506955364187e-06, "loss": 0.01699151247739792, "step": 67570 }, { "epoch": 0.636, "grad_norm": 0.5963921456615477, "learning_rate": 2.508457886279122e-06, "loss": 0.017587754130363464, "step": 67575 }, { "epoch": 0.6360470588235294, "grad_norm": 0.36428653468312944, "learning_rate": 2.5083650873221005e-06, "loss": 0.016748526692390443, "step": 67580 }, { "epoch": 0.6360941176470588, "grad_norm": 0.39213374603866086, "learning_rate": 2.5082722986634494e-06, "loss": 0.012531667947769165, "step": 67585 }, { "epoch": 0.6361411764705882, "grad_norm": 0.4667463247883025, "learning_rate": 2.5081795203012636e-06, "loss": 0.015957842767238616, "step": 67590 }, { "epoch": 0.6361882352941176, "grad_norm": 0.6022006882986447, "learning_rate": 2.50808675223364e-06, "loss": 0.017034979164600374, "step": 67595 }, { "epoch": 0.636235294117647, "grad_norm": 0.39324756551408446, "learning_rate": 2.507993994458674e-06, "loss": 0.013781842589378358, "step": 67600 }, { "epoch": 0.6362823529411765, "grad_norm": 0.6172234775013404, "learning_rate": 2.5079012469744625e-06, "loss": 0.014082548022270203, "step": 67605 }, { "epoch": 0.6363294117647059, "grad_norm": 0.4592380933272818, "learning_rate": 2.507808509779102e-06, "loss": 0.014722016453742982, "step": 67610 }, { "epoch": 0.6363764705882353, "grad_norm": 0.7291812672804103, "learning_rate": 2.5077157828706923e-06, "loss": 0.013995552062988281, "step": 67615 }, { "epoch": 0.6364235294117647, "grad_norm": 0.48204633472546515, "learning_rate": 2.5076230662473306e-06, "loss": 0.01123809814453125, "step": 67620 }, { "epoch": 0.6364705882352941, "grad_norm": 0.556941555614292, "learning_rate": 2.507530359907116e-06, "loss": 0.019070157408714296, "step": 67625 }, { "epoch": 0.6365176470588235, "grad_norm": 0.6923862545581876, "learning_rate": 2.5074376638481475e-06, "loss": 0.012312746793031692, "step": 67630 }, { "epoch": 0.6365647058823529, "grad_norm": 0.36999131723652817, "learning_rate": 2.507344978068525e-06, "loss": 0.02129245102405548, "step": 67635 }, { "epoch": 0.6366117647058823, "grad_norm": 0.286784967082642, "learning_rate": 2.5072523025663488e-06, "loss": 0.012766420841217041, "step": 67640 }, { "epoch": 0.6366588235294117, "grad_norm": 0.45697879968210514, "learning_rate": 2.50715963733972e-06, "loss": 0.014481830596923827, "step": 67645 }, { "epoch": 0.6367058823529411, "grad_norm": 0.40135879578573636, "learning_rate": 2.5070669823867395e-06, "loss": 0.013295325636863708, "step": 67650 }, { "epoch": 0.6367529411764706, "grad_norm": 0.6062466664117144, "learning_rate": 2.506974337705509e-06, "loss": 0.01233285591006279, "step": 67655 }, { "epoch": 0.6368, "grad_norm": 0.4904697635629889, "learning_rate": 2.5068817032941312e-06, "loss": 0.015824411809444428, "step": 67660 }, { "epoch": 0.6368470588235294, "grad_norm": 0.6095458436111385, "learning_rate": 2.506789079150708e-06, "loss": 0.015678466856479646, "step": 67665 }, { "epoch": 0.6368941176470588, "grad_norm": 0.6618143160047406, "learning_rate": 2.5066964652733446e-06, "loss": 0.015351542830467224, "step": 67670 }, { "epoch": 0.6369411764705882, "grad_norm": 0.5271088986200032, "learning_rate": 2.5066038616601425e-06, "loss": 0.015294942259788512, "step": 67675 }, { "epoch": 0.6369882352941176, "grad_norm": 0.5617145104743297, "learning_rate": 2.5065112683092063e-06, "loss": 0.015909668803215028, "step": 67680 }, { "epoch": 0.637035294117647, "grad_norm": 0.48696050487891873, "learning_rate": 2.506418685218642e-06, "loss": 0.017438909411430357, "step": 67685 }, { "epoch": 0.6370823529411764, "grad_norm": 0.4460202432058842, "learning_rate": 2.5063261123865535e-06, "loss": 0.01817079484462738, "step": 67690 }, { "epoch": 0.6371294117647058, "grad_norm": 6.599598131736062, "learning_rate": 2.5062335498110475e-06, "loss": 0.01745886355638504, "step": 67695 }, { "epoch": 0.6371764705882353, "grad_norm": 0.4544531045127738, "learning_rate": 2.506140997490229e-06, "loss": 0.013265621662139893, "step": 67700 }, { "epoch": 0.6372235294117647, "grad_norm": 0.8622884053976655, "learning_rate": 2.506048455422206e-06, "loss": 0.01757569909095764, "step": 67705 }, { "epoch": 0.6372705882352941, "grad_norm": 0.615685242741226, "learning_rate": 2.5059559236050845e-06, "loss": 0.0163188174366951, "step": 67710 }, { "epoch": 0.6373176470588235, "grad_norm": 0.3663875374641051, "learning_rate": 2.505863402036973e-06, "loss": 0.014519281685352325, "step": 67715 }, { "epoch": 0.6373647058823529, "grad_norm": 0.319154399395487, "learning_rate": 2.5057708907159795e-06, "loss": 0.014929196238517762, "step": 67720 }, { "epoch": 0.6374117647058823, "grad_norm": 0.4617268453434666, "learning_rate": 2.505678389640212e-06, "loss": 0.015442058444023132, "step": 67725 }, { "epoch": 0.6374588235294117, "grad_norm": 0.5295256664566197, "learning_rate": 2.50558589880778e-06, "loss": 0.02128462344408035, "step": 67730 }, { "epoch": 0.6375058823529411, "grad_norm": 0.27911103654777014, "learning_rate": 2.5054934182167935e-06, "loss": 0.0121842660009861, "step": 67735 }, { "epoch": 0.6375529411764705, "grad_norm": 0.5742328143028975, "learning_rate": 2.505400947865362e-06, "loss": 0.018512308597564697, "step": 67740 }, { "epoch": 0.6376, "grad_norm": 0.4374165379238633, "learning_rate": 2.5053084877515964e-06, "loss": 0.015087834000587464, "step": 67745 }, { "epoch": 0.6376470588235295, "grad_norm": 0.3989664434322038, "learning_rate": 2.5052160378736076e-06, "loss": 0.010248880833387375, "step": 67750 }, { "epoch": 0.6376941176470589, "grad_norm": 0.6285069080482151, "learning_rate": 2.5051235982295076e-06, "loss": 0.017872661352157593, "step": 67755 }, { "epoch": 0.6377411764705883, "grad_norm": 0.38010276357358097, "learning_rate": 2.5050311688174073e-06, "loss": 0.01778845936059952, "step": 67760 }, { "epoch": 0.6377882352941177, "grad_norm": 0.5602608440924065, "learning_rate": 2.5049387496354206e-06, "loss": 0.020364835858345032, "step": 67765 }, { "epoch": 0.6378352941176471, "grad_norm": 0.4695819168370815, "learning_rate": 2.5048463406816596e-06, "loss": 0.015390977263450623, "step": 67770 }, { "epoch": 0.6378823529411765, "grad_norm": 0.6936716474848205, "learning_rate": 2.5047539419542388e-06, "loss": 0.021165472269058228, "step": 67775 }, { "epoch": 0.6379294117647059, "grad_norm": 0.578597552600375, "learning_rate": 2.504661553451271e-06, "loss": 0.015974128246307374, "step": 67780 }, { "epoch": 0.6379764705882353, "grad_norm": 0.5289715730930181, "learning_rate": 2.5045691751708713e-06, "loss": 0.016031351685523988, "step": 67785 }, { "epoch": 0.6380235294117647, "grad_norm": 0.49211513606762813, "learning_rate": 2.5044768071111546e-06, "loss": 0.01640354096889496, "step": 67790 }, { "epoch": 0.6380705882352942, "grad_norm": 0.39845521758121644, "learning_rate": 2.5043844492702365e-06, "loss": 0.014917281270027161, "step": 67795 }, { "epoch": 0.6381176470588236, "grad_norm": 0.8507914381521551, "learning_rate": 2.504292101646233e-06, "loss": 0.019266128540039062, "step": 67800 }, { "epoch": 0.638164705882353, "grad_norm": 0.4596090116002019, "learning_rate": 2.5041997642372595e-06, "loss": 0.014388194680213929, "step": 67805 }, { "epoch": 0.6382117647058824, "grad_norm": 0.42060965429010655, "learning_rate": 2.5041074370414344e-06, "loss": 0.01766228526830673, "step": 67810 }, { "epoch": 0.6382588235294118, "grad_norm": 0.5869105184140131, "learning_rate": 2.504015120056874e-06, "loss": 0.01811935156583786, "step": 67815 }, { "epoch": 0.6383058823529412, "grad_norm": 0.4735461650323752, "learning_rate": 2.5039228132816972e-06, "loss": 0.020283468067646027, "step": 67820 }, { "epoch": 0.6383529411764706, "grad_norm": 0.6748536344909456, "learning_rate": 2.5038305167140215e-06, "loss": 0.019037811458110808, "step": 67825 }, { "epoch": 0.6384, "grad_norm": 0.45399269513166063, "learning_rate": 2.5037382303519657e-06, "loss": 0.01463303118944168, "step": 67830 }, { "epoch": 0.6384470588235294, "grad_norm": 0.5740742654130703, "learning_rate": 2.5036459541936505e-06, "loss": 0.01584285795688629, "step": 67835 }, { "epoch": 0.6384941176470589, "grad_norm": 0.9530806803222241, "learning_rate": 2.5035536882371936e-06, "loss": 0.014705753326416016, "step": 67840 }, { "epoch": 0.6385411764705883, "grad_norm": 0.6741428742440834, "learning_rate": 2.5034614324807167e-06, "loss": 0.019598530232906343, "step": 67845 }, { "epoch": 0.6385882352941177, "grad_norm": 0.4469532447855049, "learning_rate": 2.5033691869223404e-06, "loss": 0.01712958812713623, "step": 67850 }, { "epoch": 0.6386352941176471, "grad_norm": 0.48434995721049223, "learning_rate": 2.503276951560186e-06, "loss": 0.012706267833709716, "step": 67855 }, { "epoch": 0.6386823529411765, "grad_norm": 0.7082614307983529, "learning_rate": 2.5031847263923748e-06, "loss": 0.01817084103822708, "step": 67860 }, { "epoch": 0.6387294117647059, "grad_norm": 0.47109066540885847, "learning_rate": 2.5030925114170297e-06, "loss": 0.016289103031158447, "step": 67865 }, { "epoch": 0.6387764705882353, "grad_norm": 0.49050102280260094, "learning_rate": 2.5030003066322733e-06, "loss": 0.016404730081558228, "step": 67870 }, { "epoch": 0.6388235294117647, "grad_norm": 0.8396645197150481, "learning_rate": 2.502908112036228e-06, "loss": 0.021182844042778017, "step": 67875 }, { "epoch": 0.6388705882352941, "grad_norm": 0.3916053588434488, "learning_rate": 2.5028159276270194e-06, "loss": 0.011724472790956498, "step": 67880 }, { "epoch": 0.6389176470588235, "grad_norm": 0.36266680754946157, "learning_rate": 2.502723753402769e-06, "loss": 0.014144065976142883, "step": 67885 }, { "epoch": 0.638964705882353, "grad_norm": 0.42086433178882227, "learning_rate": 2.502631589361604e-06, "loss": 0.014387363195419311, "step": 67890 }, { "epoch": 0.6390117647058824, "grad_norm": 0.5236573966925064, "learning_rate": 2.502539435501648e-06, "loss": 0.01544540524482727, "step": 67895 }, { "epoch": 0.6390588235294118, "grad_norm": 0.5544352747836617, "learning_rate": 2.502447291821027e-06, "loss": 0.014457334578037263, "step": 67900 }, { "epoch": 0.6391058823529412, "grad_norm": 0.4202142433159801, "learning_rate": 2.5023551583178675e-06, "loss": 0.013216787576675415, "step": 67905 }, { "epoch": 0.6391529411764706, "grad_norm": 0.3936054633966163, "learning_rate": 2.5022630349902956e-06, "loss": 0.015679803490638734, "step": 67910 }, { "epoch": 0.6392, "grad_norm": 0.7182365876410103, "learning_rate": 2.502170921836439e-06, "loss": 0.017085421085357665, "step": 67915 }, { "epoch": 0.6392470588235294, "grad_norm": 0.5445388265647263, "learning_rate": 2.5020788188544243e-06, "loss": 0.019106119871139526, "step": 67920 }, { "epoch": 0.6392941176470588, "grad_norm": 0.41668715925348243, "learning_rate": 2.5019867260423803e-06, "loss": 0.01644098460674286, "step": 67925 }, { "epoch": 0.6393411764705882, "grad_norm": 0.5878091016836524, "learning_rate": 2.5018946433984353e-06, "loss": 0.01594928503036499, "step": 67930 }, { "epoch": 0.6393882352941177, "grad_norm": 0.5255873647073868, "learning_rate": 2.501802570920719e-06, "loss": 0.018681833148002626, "step": 67935 }, { "epoch": 0.6394352941176471, "grad_norm": 0.48254552507221976, "learning_rate": 2.5017105086073596e-06, "loss": 0.01703149378299713, "step": 67940 }, { "epoch": 0.6394823529411765, "grad_norm": 0.641681935433473, "learning_rate": 2.5016184564564884e-06, "loss": 0.02232208251953125, "step": 67945 }, { "epoch": 0.6395294117647059, "grad_norm": 1.3071050734550589, "learning_rate": 2.5015264144662336e-06, "loss": 0.01667635440826416, "step": 67950 }, { "epoch": 0.6395764705882353, "grad_norm": 0.49306772101665974, "learning_rate": 2.501434382634729e-06, "loss": 0.017656755447387696, "step": 67955 }, { "epoch": 0.6396235294117647, "grad_norm": 0.5949268882190811, "learning_rate": 2.501342360960105e-06, "loss": 0.015389065444469451, "step": 67960 }, { "epoch": 0.6396705882352941, "grad_norm": 0.5233336611269819, "learning_rate": 2.5012503494404926e-06, "loss": 0.018346071243286133, "step": 67965 }, { "epoch": 0.6397176470588235, "grad_norm": 0.7562381241553494, "learning_rate": 2.501158348074025e-06, "loss": 0.014874076843261719, "step": 67970 }, { "epoch": 0.6397647058823529, "grad_norm": 0.5349895059409225, "learning_rate": 2.5010663568588355e-06, "loss": 0.01635967195034027, "step": 67975 }, { "epoch": 0.6398117647058823, "grad_norm": 0.5350605546145176, "learning_rate": 2.5009743757930564e-06, "loss": 0.018656036257743834, "step": 67980 }, { "epoch": 0.6398588235294118, "grad_norm": 0.5241824369571735, "learning_rate": 2.500882404874822e-06, "loss": 0.015587541460990905, "step": 67985 }, { "epoch": 0.6399058823529412, "grad_norm": 0.5820547328692073, "learning_rate": 2.500790444102267e-06, "loss": 0.015303751826286316, "step": 67990 }, { "epoch": 0.6399529411764706, "grad_norm": 0.5101268841527535, "learning_rate": 2.5006984934735253e-06, "loss": 0.0166045218706131, "step": 67995 }, { "epoch": 0.64, "grad_norm": 0.7823434730797099, "learning_rate": 2.500606552986733e-06, "loss": 0.019297210872173308, "step": 68000 }, { "epoch": 0.6400470588235294, "grad_norm": 0.45161493332783864, "learning_rate": 2.5005146226400252e-06, "loss": 0.016891396045684813, "step": 68005 }, { "epoch": 0.6400941176470588, "grad_norm": 0.34363685645141046, "learning_rate": 2.5004227024315385e-06, "loss": 0.01523224413394928, "step": 68010 }, { "epoch": 0.6401411764705882, "grad_norm": 0.4864461730072374, "learning_rate": 2.5003307923594095e-06, "loss": 0.014156383275985718, "step": 68015 }, { "epoch": 0.6401882352941176, "grad_norm": 0.36589658639620304, "learning_rate": 2.5002388924217752e-06, "loss": 0.018095463514328003, "step": 68020 }, { "epoch": 0.640235294117647, "grad_norm": 0.566118391273646, "learning_rate": 2.500147002616774e-06, "loss": 0.016662216186523436, "step": 68025 }, { "epoch": 0.6402823529411765, "grad_norm": 0.7798753168859688, "learning_rate": 2.5000551229425427e-06, "loss": 0.024108946323394775, "step": 68030 }, { "epoch": 0.6403294117647059, "grad_norm": 0.48643102336039634, "learning_rate": 2.4999632533972214e-06, "loss": 0.015324310958385467, "step": 68035 }, { "epoch": 0.6403764705882353, "grad_norm": 0.5574483849603427, "learning_rate": 2.499871393978949e-06, "loss": 0.01568608731031418, "step": 68040 }, { "epoch": 0.6404235294117647, "grad_norm": 0.3194023611284332, "learning_rate": 2.4997795446858634e-06, "loss": 0.01583792269229889, "step": 68045 }, { "epoch": 0.6404705882352941, "grad_norm": 0.4646416184344626, "learning_rate": 2.4996877055161066e-06, "loss": 0.01714780479669571, "step": 68050 }, { "epoch": 0.6405176470588235, "grad_norm": 0.39191287995743596, "learning_rate": 2.4995958764678185e-06, "loss": 0.02004273533821106, "step": 68055 }, { "epoch": 0.6405647058823529, "grad_norm": 0.5206690727240905, "learning_rate": 2.49950405753914e-06, "loss": 0.015086367726325989, "step": 68060 }, { "epoch": 0.6406117647058823, "grad_norm": 0.5949796852566658, "learning_rate": 2.4994122487282126e-06, "loss": 0.016120384633541106, "step": 68065 }, { "epoch": 0.6406588235294117, "grad_norm": 0.4424521477669585, "learning_rate": 2.4993204500331785e-06, "loss": 0.015890626609325408, "step": 68070 }, { "epoch": 0.6407058823529411, "grad_norm": 0.3528277676319864, "learning_rate": 2.4992286614521786e-06, "loss": 0.015078075230121613, "step": 68075 }, { "epoch": 0.6407529411764706, "grad_norm": 0.5815608147594952, "learning_rate": 2.499136882983359e-06, "loss": 0.01607494056224823, "step": 68080 }, { "epoch": 0.6408, "grad_norm": 0.49085959308415833, "learning_rate": 2.4990451146248605e-06, "loss": 0.014347033202648162, "step": 68085 }, { "epoch": 0.6408470588235294, "grad_norm": 0.5087863511086128, "learning_rate": 2.4989533563748277e-06, "loss": 0.013440683484077454, "step": 68090 }, { "epoch": 0.6408941176470588, "grad_norm": 0.40356101233018177, "learning_rate": 2.498861608231405e-06, "loss": 0.01642594486474991, "step": 68095 }, { "epoch": 0.6409411764705882, "grad_norm": 0.4353497896803773, "learning_rate": 2.498769870192737e-06, "loss": 0.014648231863975524, "step": 68100 }, { "epoch": 0.6409882352941176, "grad_norm": 0.6868855168344916, "learning_rate": 2.49867814225697e-06, "loss": 0.015667256712913514, "step": 68105 }, { "epoch": 0.641035294117647, "grad_norm": 0.3764532924555104, "learning_rate": 2.4985864244222484e-06, "loss": 0.01076534390449524, "step": 68110 }, { "epoch": 0.6410823529411764, "grad_norm": 0.7861942663993522, "learning_rate": 2.498494716686719e-06, "loss": 0.01903299242258072, "step": 68115 }, { "epoch": 0.6411294117647058, "grad_norm": 0.4184919892858904, "learning_rate": 2.498403019048529e-06, "loss": 0.011123557388782502, "step": 68120 }, { "epoch": 0.6411764705882353, "grad_norm": 0.8375245892929236, "learning_rate": 2.498311331505825e-06, "loss": 0.01642434298992157, "step": 68125 }, { "epoch": 0.6412235294117647, "grad_norm": 0.6004845269595622, "learning_rate": 2.498219654056755e-06, "loss": 0.016738590598106385, "step": 68130 }, { "epoch": 0.6412705882352941, "grad_norm": 0.4137092396095166, "learning_rate": 2.498127986699468e-06, "loss": 0.01681262403726578, "step": 68135 }, { "epoch": 0.6413176470588235, "grad_norm": 0.43778272761990045, "learning_rate": 2.498036329432111e-06, "loss": 0.010828649252653122, "step": 68140 }, { "epoch": 0.641364705882353, "grad_norm": 0.5043619743096521, "learning_rate": 2.497944682252834e-06, "loss": 0.013569161295890808, "step": 68145 }, { "epoch": 0.6414117647058823, "grad_norm": 0.4715693763199559, "learning_rate": 2.497853045159786e-06, "loss": 0.019355261325836183, "step": 68150 }, { "epoch": 0.6414588235294117, "grad_norm": 0.38769018836003905, "learning_rate": 2.497761418151118e-06, "loss": 0.016585896909236907, "step": 68155 }, { "epoch": 0.6415058823529411, "grad_norm": 0.6078939011483739, "learning_rate": 2.4976698012249803e-06, "loss": 0.019352757930755617, "step": 68160 }, { "epoch": 0.6415529411764705, "grad_norm": 0.5671103329909544, "learning_rate": 2.4975781943795236e-06, "loss": 0.013026827573776245, "step": 68165 }, { "epoch": 0.6416, "grad_norm": 0.4153856041401147, "learning_rate": 2.497486597612899e-06, "loss": 0.013796643912792205, "step": 68170 }, { "epoch": 0.6416470588235295, "grad_norm": 0.393617305577428, "learning_rate": 2.4973950109232595e-06, "loss": 0.013565155863761901, "step": 68175 }, { "epoch": 0.6416941176470589, "grad_norm": 0.43828523795452173, "learning_rate": 2.4973034343087572e-06, "loss": 0.016149359941482543, "step": 68180 }, { "epoch": 0.6417411764705883, "grad_norm": 0.6461215315541484, "learning_rate": 2.4972118677675444e-06, "loss": 0.013893228769302369, "step": 68185 }, { "epoch": 0.6417882352941177, "grad_norm": 0.6547305258787924, "learning_rate": 2.497120311297775e-06, "loss": 0.01741919368505478, "step": 68190 }, { "epoch": 0.6418352941176471, "grad_norm": 0.3591748164228084, "learning_rate": 2.497028764897603e-06, "loss": 0.018064385652542113, "step": 68195 }, { "epoch": 0.6418823529411765, "grad_norm": 0.26081482192269645, "learning_rate": 2.496937228565182e-06, "loss": 0.015647590160369873, "step": 68200 }, { "epoch": 0.6419294117647059, "grad_norm": 0.3864499055359272, "learning_rate": 2.4968457022986676e-06, "loss": 0.013657647371292114, "step": 68205 }, { "epoch": 0.6419764705882353, "grad_norm": 0.5560189310754066, "learning_rate": 2.496754186096215e-06, "loss": 0.013180720806121825, "step": 68210 }, { "epoch": 0.6420235294117647, "grad_norm": 0.6682971878376993, "learning_rate": 2.4966626799559796e-06, "loss": 0.017751058936119078, "step": 68215 }, { "epoch": 0.6420705882352942, "grad_norm": 0.4412654382209055, "learning_rate": 2.496571183876118e-06, "loss": 0.01430448740720749, "step": 68220 }, { "epoch": 0.6421176470588236, "grad_norm": 0.46951742792268064, "learning_rate": 2.4964796978547864e-06, "loss": 0.017039002478122713, "step": 68225 }, { "epoch": 0.642164705882353, "grad_norm": 0.7470048043484783, "learning_rate": 2.496388221890143e-06, "loss": 0.01418779194355011, "step": 68230 }, { "epoch": 0.6422117647058824, "grad_norm": 0.6893700596024122, "learning_rate": 2.496296755980344e-06, "loss": 0.014321428537368775, "step": 68235 }, { "epoch": 0.6422588235294118, "grad_norm": 0.3959807046099023, "learning_rate": 2.4962053001235487e-06, "loss": 0.016191744804382326, "step": 68240 }, { "epoch": 0.6423058823529412, "grad_norm": 0.5054601068792567, "learning_rate": 2.496113854317915e-06, "loss": 0.01571417450904846, "step": 68245 }, { "epoch": 0.6423529411764706, "grad_norm": 0.3973430668350555, "learning_rate": 2.496022418561602e-06, "loss": 0.013512729108333588, "step": 68250 }, { "epoch": 0.6424, "grad_norm": 0.43015308398181873, "learning_rate": 2.49593099285277e-06, "loss": 0.016918173432350157, "step": 68255 }, { "epoch": 0.6424470588235294, "grad_norm": 0.6581582562198122, "learning_rate": 2.4958395771895785e-06, "loss": 0.01920093595981598, "step": 68260 }, { "epoch": 0.6424941176470588, "grad_norm": 0.5750403016343778, "learning_rate": 2.4957481715701875e-06, "loss": 0.016710996627807617, "step": 68265 }, { "epoch": 0.6425411764705883, "grad_norm": 0.5206096073844688, "learning_rate": 2.495656775992759e-06, "loss": 0.013737133145332337, "step": 68270 }, { "epoch": 0.6425882352941177, "grad_norm": 0.5157929828720064, "learning_rate": 2.4955653904554533e-06, "loss": 0.015333715081214904, "step": 68275 }, { "epoch": 0.6426352941176471, "grad_norm": 0.5199906669369327, "learning_rate": 2.495474014956433e-06, "loss": 0.02237723916769028, "step": 68280 }, { "epoch": 0.6426823529411765, "grad_norm": 0.4384523016047082, "learning_rate": 2.4953826494938604e-06, "loss": 0.014332957565784454, "step": 68285 }, { "epoch": 0.6427294117647059, "grad_norm": 0.33452747936409866, "learning_rate": 2.495291294065898e-06, "loss": 0.015214657783508301, "step": 68290 }, { "epoch": 0.6427764705882353, "grad_norm": 0.5738058100926601, "learning_rate": 2.49519994867071e-06, "loss": 0.015789388120174407, "step": 68295 }, { "epoch": 0.6428235294117647, "grad_norm": 0.8253475671741523, "learning_rate": 2.4951086133064585e-06, "loss": 0.0159515380859375, "step": 68300 }, { "epoch": 0.6428705882352941, "grad_norm": 0.48245911006232084, "learning_rate": 2.4950172879713096e-06, "loss": 0.01672048568725586, "step": 68305 }, { "epoch": 0.6429176470588235, "grad_norm": 0.36793537817209554, "learning_rate": 2.4949259726634273e-06, "loss": 0.013178013265132904, "step": 68310 }, { "epoch": 0.642964705882353, "grad_norm": 0.4588389540330952, "learning_rate": 2.4948346673809763e-06, "loss": 0.016670215129852294, "step": 68315 }, { "epoch": 0.6430117647058824, "grad_norm": 0.3205610413725985, "learning_rate": 2.4947433721221225e-06, "loss": 0.01513855904340744, "step": 68320 }, { "epoch": 0.6430588235294118, "grad_norm": 0.5861647661440526, "learning_rate": 2.4946520868850326e-06, "loss": 0.01653616726398468, "step": 68325 }, { "epoch": 0.6431058823529412, "grad_norm": 0.42678484821544177, "learning_rate": 2.494560811667873e-06, "loss": 0.017734333872795105, "step": 68330 }, { "epoch": 0.6431529411764706, "grad_norm": 0.7201733648394081, "learning_rate": 2.4944695464688106e-06, "loss": 0.016043029725551605, "step": 68335 }, { "epoch": 0.6432, "grad_norm": 2.840689828254709, "learning_rate": 2.494378291286012e-06, "loss": 0.021943095326423644, "step": 68340 }, { "epoch": 0.6432470588235294, "grad_norm": 0.6404851493311232, "learning_rate": 2.4942870461176467e-06, "loss": 0.016835394501686095, "step": 68345 }, { "epoch": 0.6432941176470588, "grad_norm": 0.45412001925625156, "learning_rate": 2.4941958109618826e-06, "loss": 0.013161948323249817, "step": 68350 }, { "epoch": 0.6433411764705882, "grad_norm": 1.1361320440032392, "learning_rate": 2.4941045858168887e-06, "loss": 0.014165228605270386, "step": 68355 }, { "epoch": 0.6433882352941176, "grad_norm": 0.5728271662948128, "learning_rate": 2.494013370680834e-06, "loss": 0.02087644636631012, "step": 68360 }, { "epoch": 0.6434352941176471, "grad_norm": 0.3498283290415587, "learning_rate": 2.4939221655518888e-06, "loss": 0.011192011088132859, "step": 68365 }, { "epoch": 0.6434823529411765, "grad_norm": 0.3495971070253775, "learning_rate": 2.493830970428224e-06, "loss": 0.01776318848133087, "step": 68370 }, { "epoch": 0.6435294117647059, "grad_norm": 0.676807177861653, "learning_rate": 2.493739785308009e-06, "loss": 0.022314462065696716, "step": 68375 }, { "epoch": 0.6435764705882353, "grad_norm": 0.45037864705209346, "learning_rate": 2.4936486101894166e-06, "loss": 0.01611417531967163, "step": 68380 }, { "epoch": 0.6436235294117647, "grad_norm": 0.720148746696562, "learning_rate": 2.493557445070618e-06, "loss": 0.019587452709674835, "step": 68385 }, { "epoch": 0.6436705882352941, "grad_norm": 0.6884541003398529, "learning_rate": 2.4934662899497843e-06, "loss": 0.02099027782678604, "step": 68390 }, { "epoch": 0.6437176470588235, "grad_norm": 0.44709161160024774, "learning_rate": 2.4933751448250895e-06, "loss": 0.012604208290576934, "step": 68395 }, { "epoch": 0.6437647058823529, "grad_norm": 0.4040705385164178, "learning_rate": 2.4932840096947066e-06, "loss": 0.017207412421703337, "step": 68400 }, { "epoch": 0.6438117647058823, "grad_norm": 0.7095364265052264, "learning_rate": 2.493192884556809e-06, "loss": 0.014678649604320526, "step": 68405 }, { "epoch": 0.6438588235294118, "grad_norm": 0.47738553267368833, "learning_rate": 2.493101769409571e-06, "loss": 0.01458928883075714, "step": 68410 }, { "epoch": 0.6439058823529412, "grad_norm": 0.4903625071887845, "learning_rate": 2.4930106642511667e-06, "loss": 0.01608712375164032, "step": 68415 }, { "epoch": 0.6439529411764706, "grad_norm": 0.5613514059458419, "learning_rate": 2.4929195690797715e-06, "loss": 0.020324194431304933, "step": 68420 }, { "epoch": 0.644, "grad_norm": 0.49435574498080126, "learning_rate": 2.4928284838935615e-06, "loss": 0.016007100045681, "step": 68425 }, { "epoch": 0.6440470588235294, "grad_norm": 0.8618321673347831, "learning_rate": 2.4927374086907118e-06, "loss": 0.014270073175430298, "step": 68430 }, { "epoch": 0.6440941176470588, "grad_norm": 0.34277654241075445, "learning_rate": 2.4926463434693983e-06, "loss": 0.015251742303371429, "step": 68435 }, { "epoch": 0.6441411764705882, "grad_norm": 0.4858094502142704, "learning_rate": 2.4925552882278e-06, "loss": 0.01637928932905197, "step": 68440 }, { "epoch": 0.6441882352941176, "grad_norm": 0.7513107168541026, "learning_rate": 2.4924642429640916e-06, "loss": 0.016479316353797912, "step": 68445 }, { "epoch": 0.644235294117647, "grad_norm": 0.5427007385765802, "learning_rate": 2.4923732076764528e-06, "loss": 0.01771807074546814, "step": 68450 }, { "epoch": 0.6442823529411764, "grad_norm": 0.4763012575788406, "learning_rate": 2.492282182363061e-06, "loss": 0.01449013501405716, "step": 68455 }, { "epoch": 0.6443294117647059, "grad_norm": 0.4977160194357246, "learning_rate": 2.4921911670220956e-06, "loss": 0.01669327765703201, "step": 68460 }, { "epoch": 0.6443764705882353, "grad_norm": 0.780151703981716, "learning_rate": 2.492100161651736e-06, "loss": 0.016781729459762574, "step": 68465 }, { "epoch": 0.6444235294117647, "grad_norm": 0.6268041677885854, "learning_rate": 2.4920091662501598e-06, "loss": 0.01488904058933258, "step": 68470 }, { "epoch": 0.6444705882352941, "grad_norm": 0.4323149007253731, "learning_rate": 2.4919181808155497e-06, "loss": 0.0139775812625885, "step": 68475 }, { "epoch": 0.6445176470588235, "grad_norm": 0.4520031924702858, "learning_rate": 2.4918272053460853e-06, "loss": 0.016789041459560394, "step": 68480 }, { "epoch": 0.6445647058823529, "grad_norm": 0.49519142340132427, "learning_rate": 2.491736239839948e-06, "loss": 0.01453588604927063, "step": 68485 }, { "epoch": 0.6446117647058823, "grad_norm": 0.7565961185462076, "learning_rate": 2.491645284295319e-06, "loss": 0.016594094038009644, "step": 68490 }, { "epoch": 0.6446588235294117, "grad_norm": 0.49456488158794676, "learning_rate": 2.49155433871038e-06, "loss": 0.018274377286434173, "step": 68495 }, { "epoch": 0.6447058823529411, "grad_norm": 0.41843718892011156, "learning_rate": 2.491463403083314e-06, "loss": 0.020718376338481902, "step": 68500 }, { "epoch": 0.6447529411764706, "grad_norm": 0.49462229938542857, "learning_rate": 2.4913724774123043e-06, "loss": 0.018277353048324584, "step": 68505 }, { "epoch": 0.6448, "grad_norm": 0.4444112799739603, "learning_rate": 2.491281561695533e-06, "loss": 0.013550464808940888, "step": 68510 }, { "epoch": 0.6448470588235294, "grad_norm": 0.3628531345259397, "learning_rate": 2.4911906559311857e-06, "loss": 0.015171051025390625, "step": 68515 }, { "epoch": 0.6448941176470588, "grad_norm": 0.4488616895306647, "learning_rate": 2.491099760117445e-06, "loss": 0.013139745593070984, "step": 68520 }, { "epoch": 0.6449411764705882, "grad_norm": 0.33928469271643086, "learning_rate": 2.4910088742524973e-06, "loss": 0.015329939126968384, "step": 68525 }, { "epoch": 0.6449882352941176, "grad_norm": 0.6290582585221913, "learning_rate": 2.4909179983345268e-06, "loss": 0.018440037965774536, "step": 68530 }, { "epoch": 0.645035294117647, "grad_norm": 0.6104389664662186, "learning_rate": 2.4908271323617196e-06, "loss": 0.018725313246250153, "step": 68535 }, { "epoch": 0.6450823529411764, "grad_norm": 0.48748304401304654, "learning_rate": 2.4907362763322622e-06, "loss": 0.016384053230285644, "step": 68540 }, { "epoch": 0.6451294117647058, "grad_norm": 0.49067785867872143, "learning_rate": 2.4906454302443395e-06, "loss": 0.017794832587242126, "step": 68545 }, { "epoch": 0.6451764705882352, "grad_norm": 0.5927255009976635, "learning_rate": 2.4905545940961414e-06, "loss": 0.011264504492282867, "step": 68550 }, { "epoch": 0.6452235294117648, "grad_norm": 0.49461694298673786, "learning_rate": 2.4904637678858534e-06, "loss": 0.017281216382980347, "step": 68555 }, { "epoch": 0.6452705882352942, "grad_norm": 0.4927753121500863, "learning_rate": 2.4903729516116647e-06, "loss": 0.016789671778678895, "step": 68560 }, { "epoch": 0.6453176470588236, "grad_norm": 0.3280943619602769, "learning_rate": 2.490282145271763e-06, "loss": 0.01636936664581299, "step": 68565 }, { "epoch": 0.645364705882353, "grad_norm": 0.5877776947376036, "learning_rate": 2.490191348864337e-06, "loss": 0.016408777236938475, "step": 68570 }, { "epoch": 0.6454117647058824, "grad_norm": 0.4242687171002609, "learning_rate": 2.4901005623875775e-06, "loss": 0.014846307039260865, "step": 68575 }, { "epoch": 0.6454588235294118, "grad_norm": 0.5104069733988639, "learning_rate": 2.4900097858396736e-06, "loss": 0.015400247275829315, "step": 68580 }, { "epoch": 0.6455058823529412, "grad_norm": 0.44924592068968794, "learning_rate": 2.489919019218815e-06, "loss": 0.016697824001312256, "step": 68585 }, { "epoch": 0.6455529411764706, "grad_norm": 0.48719543485693534, "learning_rate": 2.4898282625231933e-06, "loss": 0.015626530349254607, "step": 68590 }, { "epoch": 0.6456, "grad_norm": 0.42931468752454743, "learning_rate": 2.4897375157510005e-06, "loss": 0.014711052179336548, "step": 68595 }, { "epoch": 0.6456470588235295, "grad_norm": 0.42903776966188606, "learning_rate": 2.489646778900427e-06, "loss": 0.013314062356948852, "step": 68600 }, { "epoch": 0.6456941176470589, "grad_norm": 0.5474050212779631, "learning_rate": 2.4895560519696645e-06, "loss": 0.017844481766223906, "step": 68605 }, { "epoch": 0.6457411764705883, "grad_norm": 0.39867362907797804, "learning_rate": 2.4894653349569074e-06, "loss": 0.01643500030040741, "step": 68610 }, { "epoch": 0.6457882352941177, "grad_norm": 0.43858402684510467, "learning_rate": 2.4893746278603477e-06, "loss": 0.017603963613510132, "step": 68615 }, { "epoch": 0.6458352941176471, "grad_norm": 0.6579080524127009, "learning_rate": 2.4892839306781794e-06, "loss": 0.018774425983428954, "step": 68620 }, { "epoch": 0.6458823529411765, "grad_norm": 0.5502401698488352, "learning_rate": 2.489193243408597e-06, "loss": 0.01572345197200775, "step": 68625 }, { "epoch": 0.6459294117647059, "grad_norm": 0.39236714730220607, "learning_rate": 2.4891025660497943e-06, "loss": 0.015118995308876037, "step": 68630 }, { "epoch": 0.6459764705882353, "grad_norm": 0.49058104869278674, "learning_rate": 2.4890118985999657e-06, "loss": 0.014776185154914856, "step": 68635 }, { "epoch": 0.6460235294117647, "grad_norm": 0.3303528292317966, "learning_rate": 2.488921241057308e-06, "loss": 0.013380736112594604, "step": 68640 }, { "epoch": 0.6460705882352941, "grad_norm": 0.6768410139294977, "learning_rate": 2.4888305934200167e-06, "loss": 0.011688037216663361, "step": 68645 }, { "epoch": 0.6461176470588236, "grad_norm": 0.4747796060045619, "learning_rate": 2.4887399556862875e-06, "loss": 0.014750239253044129, "step": 68650 }, { "epoch": 0.646164705882353, "grad_norm": 0.4503345689432368, "learning_rate": 2.488649327854317e-06, "loss": 0.016783666610717774, "step": 68655 }, { "epoch": 0.6462117647058824, "grad_norm": 0.33932809001222936, "learning_rate": 2.4885587099223035e-06, "loss": 0.015120419859886169, "step": 68660 }, { "epoch": 0.6462588235294118, "grad_norm": 0.4377977821915419, "learning_rate": 2.488468101888444e-06, "loss": 0.01761603355407715, "step": 68665 }, { "epoch": 0.6463058823529412, "grad_norm": 0.43165945730126987, "learning_rate": 2.4883775037509375e-06, "loss": 0.014747671782970428, "step": 68670 }, { "epoch": 0.6463529411764706, "grad_norm": 0.47119723330950325, "learning_rate": 2.4882869155079818e-06, "loss": 0.01719103306531906, "step": 68675 }, { "epoch": 0.6464, "grad_norm": 0.5224340645489587, "learning_rate": 2.4881963371577763e-06, "loss": 0.013030353188514709, "step": 68680 }, { "epoch": 0.6464470588235294, "grad_norm": 0.416718413395077, "learning_rate": 2.4881057686985202e-06, "loss": 0.014849920570850373, "step": 68685 }, { "epoch": 0.6464941176470588, "grad_norm": 0.4840453100200318, "learning_rate": 2.488015210128414e-06, "loss": 0.016202259063720702, "step": 68690 }, { "epoch": 0.6465411764705883, "grad_norm": 0.7241155302907656, "learning_rate": 2.4879246614456583e-06, "loss": 0.01907072067260742, "step": 68695 }, { "epoch": 0.6465882352941177, "grad_norm": 0.5240754664851451, "learning_rate": 2.4878341226484533e-06, "loss": 0.018335726857185364, "step": 68700 }, { "epoch": 0.6466352941176471, "grad_norm": 0.46281290532112845, "learning_rate": 2.487743593735002e-06, "loss": 0.014427180588245391, "step": 68705 }, { "epoch": 0.6466823529411765, "grad_norm": 0.5922555575130918, "learning_rate": 2.4876530747035034e-06, "loss": 0.017809364199638366, "step": 68710 }, { "epoch": 0.6467294117647059, "grad_norm": 0.6523136019068995, "learning_rate": 2.4875625655521623e-06, "loss": 0.019351667165756224, "step": 68715 }, { "epoch": 0.6467764705882353, "grad_norm": 0.43804771480364596, "learning_rate": 2.4874720662791804e-06, "loss": 0.014821273088455201, "step": 68720 }, { "epoch": 0.6468235294117647, "grad_norm": 0.4109329586488885, "learning_rate": 2.4873815768827614e-06, "loss": 0.01618637442588806, "step": 68725 }, { "epoch": 0.6468705882352941, "grad_norm": 2.771689552986502, "learning_rate": 2.4872910973611093e-06, "loss": 0.018489423394203185, "step": 68730 }, { "epoch": 0.6469176470588235, "grad_norm": 0.32373154604649657, "learning_rate": 2.4872006277124266e-06, "loss": 0.014512082934379578, "step": 68735 }, { "epoch": 0.6469647058823529, "grad_norm": 0.4752148883595429, "learning_rate": 2.48711016793492e-06, "loss": 0.014976602792739869, "step": 68740 }, { "epoch": 0.6470117647058824, "grad_norm": 0.9763167458018263, "learning_rate": 2.487019718026793e-06, "loss": 0.014227306842803955, "step": 68745 }, { "epoch": 0.6470588235294118, "grad_norm": 0.5570641847674364, "learning_rate": 2.4869292779862518e-06, "loss": 0.01807427406311035, "step": 68750 }, { "epoch": 0.6471058823529412, "grad_norm": 0.40429171517680984, "learning_rate": 2.486838847811502e-06, "loss": 0.01949595510959625, "step": 68755 }, { "epoch": 0.6471529411764706, "grad_norm": 0.6609225364764315, "learning_rate": 2.4867484275007505e-06, "loss": 0.01905106008052826, "step": 68760 }, { "epoch": 0.6472, "grad_norm": 0.6518440155819133, "learning_rate": 2.486658017052204e-06, "loss": 0.019735229015350342, "step": 68765 }, { "epoch": 0.6472470588235294, "grad_norm": 0.5773521279930326, "learning_rate": 2.4865676164640685e-06, "loss": 0.019850796461105345, "step": 68770 }, { "epoch": 0.6472941176470588, "grad_norm": 0.5978946481876084, "learning_rate": 2.4864772257345543e-06, "loss": 0.015501615405082703, "step": 68775 }, { "epoch": 0.6473411764705882, "grad_norm": 0.5184880064175069, "learning_rate": 2.4863868448618685e-06, "loss": 0.01898728609085083, "step": 68780 }, { "epoch": 0.6473882352941176, "grad_norm": 0.5719565310072875, "learning_rate": 2.4862964738442196e-06, "loss": 0.0133671373128891, "step": 68785 }, { "epoch": 0.6474352941176471, "grad_norm": 0.618112714862557, "learning_rate": 2.4862061126798165e-06, "loss": 0.014542174339294434, "step": 68790 }, { "epoch": 0.6474823529411765, "grad_norm": 0.6049440483306777, "learning_rate": 2.4861157613668687e-06, "loss": 0.019144032895565034, "step": 68795 }, { "epoch": 0.6475294117647059, "grad_norm": 0.3702105642814942, "learning_rate": 2.4860254199035876e-06, "loss": 0.015751948952674864, "step": 68800 }, { "epoch": 0.6475764705882353, "grad_norm": 0.4469207372069949, "learning_rate": 2.4859350882881826e-06, "loss": 0.013495311141014099, "step": 68805 }, { "epoch": 0.6476235294117647, "grad_norm": 0.4530086623998208, "learning_rate": 2.485844766518865e-06, "loss": 0.01305045485496521, "step": 68810 }, { "epoch": 0.6476705882352941, "grad_norm": 0.5099864541235306, "learning_rate": 2.485754454593846e-06, "loss": 0.018935507535934447, "step": 68815 }, { "epoch": 0.6477176470588235, "grad_norm": 0.4886860816769035, "learning_rate": 2.485664152511338e-06, "loss": 0.014746478199958802, "step": 68820 }, { "epoch": 0.6477647058823529, "grad_norm": 0.38698751123093794, "learning_rate": 2.485573860269552e-06, "loss": 0.018052209913730622, "step": 68825 }, { "epoch": 0.6478117647058823, "grad_norm": 0.37030287807827994, "learning_rate": 2.485483577866703e-06, "loss": 0.013822752237319946, "step": 68830 }, { "epoch": 0.6478588235294117, "grad_norm": 0.4101409261131452, "learning_rate": 2.4853933053010026e-06, "loss": 0.015369367599487305, "step": 68835 }, { "epoch": 0.6479058823529412, "grad_norm": 0.6659764752977922, "learning_rate": 2.485303042570665e-06, "loss": 0.024164266884326935, "step": 68840 }, { "epoch": 0.6479529411764706, "grad_norm": 0.5642424245569729, "learning_rate": 2.485212789673904e-06, "loss": 0.0158869668841362, "step": 68845 }, { "epoch": 0.648, "grad_norm": 0.49245638134454073, "learning_rate": 2.4851225466089345e-06, "loss": 0.014810264110565186, "step": 68850 }, { "epoch": 0.6480470588235294, "grad_norm": 0.8007238199628445, "learning_rate": 2.4850323133739724e-06, "loss": 0.01408214271068573, "step": 68855 }, { "epoch": 0.6480941176470588, "grad_norm": 0.4006721537951603, "learning_rate": 2.484942089967232e-06, "loss": 0.014639192819595337, "step": 68860 }, { "epoch": 0.6481411764705882, "grad_norm": 0.5911422495326129, "learning_rate": 2.4848518763869294e-06, "loss": 0.014921879768371582, "step": 68865 }, { "epoch": 0.6481882352941176, "grad_norm": 0.39576919772504837, "learning_rate": 2.4847616726312816e-06, "loss": 0.015556654334068299, "step": 68870 }, { "epoch": 0.648235294117647, "grad_norm": 0.6188891478394912, "learning_rate": 2.484671478698505e-06, "loss": 0.016805090010166168, "step": 68875 }, { "epoch": 0.6482823529411764, "grad_norm": 0.4249253303299684, "learning_rate": 2.4845812945868177e-06, "loss": 0.013577859103679656, "step": 68880 }, { "epoch": 0.6483294117647059, "grad_norm": 0.5060817505832307, "learning_rate": 2.4844911202944368e-06, "loss": 0.021460379660129546, "step": 68885 }, { "epoch": 0.6483764705882353, "grad_norm": 0.3381371892931614, "learning_rate": 2.4844009558195802e-06, "loss": 0.018312126398086548, "step": 68890 }, { "epoch": 0.6484235294117647, "grad_norm": 0.6705722096673843, "learning_rate": 2.4843108011604676e-06, "loss": 0.017631387710571288, "step": 68895 }, { "epoch": 0.6484705882352941, "grad_norm": 0.489839690217813, "learning_rate": 2.484220656315317e-06, "loss": 0.01655130535364151, "step": 68900 }, { "epoch": 0.6485176470588235, "grad_norm": 0.3914249452490837, "learning_rate": 2.4841305212823493e-06, "loss": 0.020529131591320037, "step": 68905 }, { "epoch": 0.6485647058823529, "grad_norm": 0.48190078988607515, "learning_rate": 2.4840403960597834e-06, "loss": 0.014976403117179871, "step": 68910 }, { "epoch": 0.6486117647058823, "grad_norm": 1.2059521786849319, "learning_rate": 2.483950280645841e-06, "loss": 0.016833963990211486, "step": 68915 }, { "epoch": 0.6486588235294117, "grad_norm": 0.5312785845315406, "learning_rate": 2.4838601750387417e-06, "loss": 0.013646471500396728, "step": 68920 }, { "epoch": 0.6487058823529411, "grad_norm": 0.6288056577315961, "learning_rate": 2.4837700792367074e-06, "loss": 0.01961127817630768, "step": 68925 }, { "epoch": 0.6487529411764706, "grad_norm": 0.42819110250843667, "learning_rate": 2.4836799932379606e-06, "loss": 0.01704607903957367, "step": 68930 }, { "epoch": 0.6488, "grad_norm": 0.5455578935378671, "learning_rate": 2.4835899170407228e-06, "loss": 0.015037363767623902, "step": 68935 }, { "epoch": 0.6488470588235294, "grad_norm": 0.4701434312450707, "learning_rate": 2.483499850643217e-06, "loss": 0.01849677413702011, "step": 68940 }, { "epoch": 0.6488941176470588, "grad_norm": 0.3903736992550558, "learning_rate": 2.483409794043667e-06, "loss": 0.016981178522109987, "step": 68945 }, { "epoch": 0.6489411764705882, "grad_norm": 0.3662994824803511, "learning_rate": 2.483319747240296e-06, "loss": 0.013915820419788361, "step": 68950 }, { "epoch": 0.6489882352941176, "grad_norm": 0.4087629936806915, "learning_rate": 2.483229710231327e-06, "loss": 0.016479307413101198, "step": 68955 }, { "epoch": 0.649035294117647, "grad_norm": 0.6926164367065845, "learning_rate": 2.4831396830149866e-06, "loss": 0.01900298148393631, "step": 68960 }, { "epoch": 0.6490823529411764, "grad_norm": 0.479439444622367, "learning_rate": 2.4830496655894983e-06, "loss": 0.015434256196022034, "step": 68965 }, { "epoch": 0.6491294117647058, "grad_norm": 0.4404548300914566, "learning_rate": 2.4829596579530885e-06, "loss": 0.015645641088485717, "step": 68970 }, { "epoch": 0.6491764705882352, "grad_norm": 0.7698157969942719, "learning_rate": 2.4828696601039826e-06, "loss": 0.017473985254764558, "step": 68975 }, { "epoch": 0.6492235294117648, "grad_norm": 0.7160672087431367, "learning_rate": 2.482779672040407e-06, "loss": 0.01674397587776184, "step": 68980 }, { "epoch": 0.6492705882352942, "grad_norm": 0.4007776176951646, "learning_rate": 2.482689693760589e-06, "loss": 0.016473081707954407, "step": 68985 }, { "epoch": 0.6493176470588236, "grad_norm": 0.2964374003676121, "learning_rate": 2.4825997252627546e-06, "loss": 0.014891666173934937, "step": 68990 }, { "epoch": 0.649364705882353, "grad_norm": 0.42260891029285896, "learning_rate": 2.4825097665451335e-06, "loss": 0.014146219193935394, "step": 68995 }, { "epoch": 0.6494117647058824, "grad_norm": 0.5759432797762795, "learning_rate": 2.4824198176059512e-06, "loss": 0.015928688645362853, "step": 69000 }, { "epoch": 0.6494588235294118, "grad_norm": 0.5180181217416608, "learning_rate": 2.4823298784434392e-06, "loss": 0.01580173075199127, "step": 69005 }, { "epoch": 0.6495058823529412, "grad_norm": 0.4772368427644476, "learning_rate": 2.482239949055825e-06, "loss": 0.012535929679870605, "step": 69010 }, { "epoch": 0.6495529411764706, "grad_norm": 0.4095035467581261, "learning_rate": 2.482150029441338e-06, "loss": 0.015478292107582092, "step": 69015 }, { "epoch": 0.6496, "grad_norm": 1.390559768168937, "learning_rate": 2.4820601195982087e-06, "loss": 0.015909427404403688, "step": 69020 }, { "epoch": 0.6496470588235295, "grad_norm": 0.6497379443598145, "learning_rate": 2.4819702195246676e-06, "loss": 0.020208793878555297, "step": 69025 }, { "epoch": 0.6496941176470589, "grad_norm": 0.45845540552482633, "learning_rate": 2.481880329218945e-06, "loss": 0.01898317039012909, "step": 69030 }, { "epoch": 0.6497411764705883, "grad_norm": 0.5351898766559142, "learning_rate": 2.481790448679272e-06, "loss": 0.01730961799621582, "step": 69035 }, { "epoch": 0.6497882352941177, "grad_norm": 0.4882555836354221, "learning_rate": 2.4817005779038817e-06, "loss": 0.020536407828330994, "step": 69040 }, { "epoch": 0.6498352941176471, "grad_norm": 0.48432989113977964, "learning_rate": 2.481610716891005e-06, "loss": 0.013371381163597106, "step": 69045 }, { "epoch": 0.6498823529411765, "grad_norm": 0.4286146627787962, "learning_rate": 2.481520865638875e-06, "loss": 0.01312715858221054, "step": 69050 }, { "epoch": 0.6499294117647059, "grad_norm": 0.5665136863766853, "learning_rate": 2.4814310241457244e-06, "loss": 0.0175200954079628, "step": 69055 }, { "epoch": 0.6499764705882353, "grad_norm": 0.4183897819431677, "learning_rate": 2.4813411924097875e-06, "loss": 0.01771961450576782, "step": 69060 }, { "epoch": 0.6500235294117647, "grad_norm": 0.6777562022664517, "learning_rate": 2.4812513704292986e-06, "loss": 0.021836304664611818, "step": 69065 }, { "epoch": 0.6500705882352941, "grad_norm": 0.5789874784298871, "learning_rate": 2.48116155820249e-06, "loss": 0.01764190346002579, "step": 69070 }, { "epoch": 0.6501176470588236, "grad_norm": 1.0768614043491913, "learning_rate": 2.481071755727599e-06, "loss": 0.021377997100353242, "step": 69075 }, { "epoch": 0.650164705882353, "grad_norm": 0.4687300240457296, "learning_rate": 2.48098196300286e-06, "loss": 0.01673029363155365, "step": 69080 }, { "epoch": 0.6502117647058824, "grad_norm": 0.39511952999341066, "learning_rate": 2.4808921800265086e-06, "loss": 0.01696470081806183, "step": 69085 }, { "epoch": 0.6502588235294118, "grad_norm": 0.5962780173003052, "learning_rate": 2.480802406796781e-06, "loss": 0.01803726553916931, "step": 69090 }, { "epoch": 0.6503058823529412, "grad_norm": 0.5161116638870226, "learning_rate": 2.480712643311915e-06, "loss": 0.015541499853134156, "step": 69095 }, { "epoch": 0.6503529411764706, "grad_norm": 0.4960945279530134, "learning_rate": 2.4806228895701454e-06, "loss": 0.01802130341529846, "step": 69100 }, { "epoch": 0.6504, "grad_norm": 0.46768369962822176, "learning_rate": 2.480533145569712e-06, "loss": 0.015024453401565552, "step": 69105 }, { "epoch": 0.6504470588235294, "grad_norm": 0.6550134336139872, "learning_rate": 2.480443411308852e-06, "loss": 0.014145109057426452, "step": 69110 }, { "epoch": 0.6504941176470588, "grad_norm": 0.4454010856977809, "learning_rate": 2.4803536867858034e-06, "loss": 0.018236520886421203, "step": 69115 }, { "epoch": 0.6505411764705883, "grad_norm": 0.541851546987494, "learning_rate": 2.4802639719988065e-06, "loss": 0.015836888551712038, "step": 69120 }, { "epoch": 0.6505882352941177, "grad_norm": 0.5907221154459169, "learning_rate": 2.480174266946099e-06, "loss": 0.020736929774284363, "step": 69125 }, { "epoch": 0.6506352941176471, "grad_norm": 0.34746941814707427, "learning_rate": 2.4800845716259216e-06, "loss": 0.015022984147071839, "step": 69130 }, { "epoch": 0.6506823529411765, "grad_norm": 0.441671479266066, "learning_rate": 2.4799948860365146e-06, "loss": 0.012361770868301392, "step": 69135 }, { "epoch": 0.6507294117647059, "grad_norm": 0.6416505247261826, "learning_rate": 2.4799052101761183e-06, "loss": 0.015591426193714142, "step": 69140 }, { "epoch": 0.6507764705882353, "grad_norm": 0.5989957789703292, "learning_rate": 2.4798155440429734e-06, "loss": 0.017040562629699708, "step": 69145 }, { "epoch": 0.6508235294117647, "grad_norm": 0.6478801839892574, "learning_rate": 2.479725887635323e-06, "loss": 0.018554002046585083, "step": 69150 }, { "epoch": 0.6508705882352941, "grad_norm": 0.45868455833558275, "learning_rate": 2.479636240951408e-06, "loss": 0.0197025865316391, "step": 69155 }, { "epoch": 0.6509176470588235, "grad_norm": 0.39747147401350597, "learning_rate": 2.4795466039894708e-06, "loss": 0.017347660660743714, "step": 69160 }, { "epoch": 0.6509647058823529, "grad_norm": 0.5821283403672546, "learning_rate": 2.479456976747755e-06, "loss": 0.016308709979057312, "step": 69165 }, { "epoch": 0.6510117647058824, "grad_norm": 0.35472156693284496, "learning_rate": 2.479367359224503e-06, "loss": 0.012793923914432525, "step": 69170 }, { "epoch": 0.6510588235294118, "grad_norm": 0.3252700712905933, "learning_rate": 2.4792777514179595e-06, "loss": 0.017529696226119995, "step": 69175 }, { "epoch": 0.6511058823529412, "grad_norm": 0.47390584512244716, "learning_rate": 2.4791881533263686e-06, "loss": 0.014535103738307954, "step": 69180 }, { "epoch": 0.6511529411764706, "grad_norm": 0.6907746827123636, "learning_rate": 2.4790985649479743e-06, "loss": 0.012984615564346314, "step": 69185 }, { "epoch": 0.6512, "grad_norm": 0.5969421521837892, "learning_rate": 2.4790089862810223e-06, "loss": 0.0196653813123703, "step": 69190 }, { "epoch": 0.6512470588235294, "grad_norm": 0.5922989658835575, "learning_rate": 2.4789194173237586e-06, "loss": 0.016674092411994933, "step": 69195 }, { "epoch": 0.6512941176470588, "grad_norm": 0.49088154660327393, "learning_rate": 2.4788298580744288e-06, "loss": 0.018030062317848206, "step": 69200 }, { "epoch": 0.6513411764705882, "grad_norm": 0.31672624093714985, "learning_rate": 2.4787403085312783e-06, "loss": 0.013875699043273926, "step": 69205 }, { "epoch": 0.6513882352941176, "grad_norm": 0.8149852439238563, "learning_rate": 2.4786507686925564e-06, "loss": 0.019567030668258666, "step": 69210 }, { "epoch": 0.6514352941176471, "grad_norm": 0.6759496286295141, "learning_rate": 2.4785612385565083e-06, "loss": 0.016647213697433473, "step": 69215 }, { "epoch": 0.6514823529411765, "grad_norm": 0.636940114716686, "learning_rate": 2.478471718121383e-06, "loss": 0.016859406232833864, "step": 69220 }, { "epoch": 0.6515294117647059, "grad_norm": 0.5623583697343778, "learning_rate": 2.4783822073854273e-06, "loss": 0.01457156240940094, "step": 69225 }, { "epoch": 0.6515764705882353, "grad_norm": 0.6086773553025174, "learning_rate": 2.478292706346892e-06, "loss": 0.017038592696189882, "step": 69230 }, { "epoch": 0.6516235294117647, "grad_norm": 0.7232059189201621, "learning_rate": 2.4782032150040253e-06, "loss": 0.015384809672832489, "step": 69235 }, { "epoch": 0.6516705882352941, "grad_norm": 0.349767407365596, "learning_rate": 2.478113733355076e-06, "loss": 0.013970659673213958, "step": 69240 }, { "epoch": 0.6517176470588235, "grad_norm": 0.5258032060463994, "learning_rate": 2.4780242613982944e-06, "loss": 0.012787216901779174, "step": 69245 }, { "epoch": 0.6517647058823529, "grad_norm": 0.368005256221061, "learning_rate": 2.477934799131932e-06, "loss": 0.011755257844924927, "step": 69250 }, { "epoch": 0.6518117647058823, "grad_norm": 0.5007264016446602, "learning_rate": 2.477845346554239e-06, "loss": 0.01282382309436798, "step": 69255 }, { "epoch": 0.6518588235294117, "grad_norm": 0.3455929374625101, "learning_rate": 2.4777559036634657e-06, "loss": 0.017573101818561553, "step": 69260 }, { "epoch": 0.6519058823529412, "grad_norm": 0.4031722005994338, "learning_rate": 2.4776664704578663e-06, "loss": 0.01598568558692932, "step": 69265 }, { "epoch": 0.6519529411764706, "grad_norm": 0.5173089709430477, "learning_rate": 2.4775770469356908e-06, "loss": 0.0167844757437706, "step": 69270 }, { "epoch": 0.652, "grad_norm": 0.6015459695280737, "learning_rate": 2.477487633095193e-06, "loss": 0.016600632667541505, "step": 69275 }, { "epoch": 0.6520470588235294, "grad_norm": 0.30396714257619833, "learning_rate": 2.477398228934625e-06, "loss": 0.013279888033866882, "step": 69280 }, { "epoch": 0.6520941176470588, "grad_norm": 0.5364643187113374, "learning_rate": 2.4773088344522415e-06, "loss": 0.01886908710002899, "step": 69285 }, { "epoch": 0.6521411764705882, "grad_norm": 0.8827064896835417, "learning_rate": 2.4772194496462966e-06, "loss": 0.011676131188869477, "step": 69290 }, { "epoch": 0.6521882352941176, "grad_norm": 0.7670404103917635, "learning_rate": 2.4771300745150435e-06, "loss": 0.0161704957485199, "step": 69295 }, { "epoch": 0.652235294117647, "grad_norm": 0.38641934992812077, "learning_rate": 2.4770407090567376e-06, "loss": 0.014950565993785858, "step": 69300 }, { "epoch": 0.6522823529411764, "grad_norm": 0.3981057871827012, "learning_rate": 2.4769513532696345e-06, "loss": 0.01468701958656311, "step": 69305 }, { "epoch": 0.6523294117647059, "grad_norm": 0.4761258153661434, "learning_rate": 2.47686200715199e-06, "loss": 0.01622915118932724, "step": 69310 }, { "epoch": 0.6523764705882353, "grad_norm": 0.5201844545334914, "learning_rate": 2.47677267070206e-06, "loss": 0.012705999612808227, "step": 69315 }, { "epoch": 0.6524235294117647, "grad_norm": 0.4190025641707579, "learning_rate": 2.476683343918101e-06, "loss": 0.014653724431991578, "step": 69320 }, { "epoch": 0.6524705882352941, "grad_norm": 0.4385006048867408, "learning_rate": 2.4765940267983703e-06, "loss": 0.015586234629154205, "step": 69325 }, { "epoch": 0.6525176470588235, "grad_norm": 0.44903186513755355, "learning_rate": 2.476504719341125e-06, "loss": 0.014428764581680298, "step": 69330 }, { "epoch": 0.6525647058823529, "grad_norm": 0.6368347296973087, "learning_rate": 2.4764154215446245e-06, "loss": 0.0125724196434021, "step": 69335 }, { "epoch": 0.6526117647058823, "grad_norm": 0.5294863035574596, "learning_rate": 2.4763261334071253e-06, "loss": 0.01429046094417572, "step": 69340 }, { "epoch": 0.6526588235294117, "grad_norm": 0.5394306665595429, "learning_rate": 2.476236854926887e-06, "loss": 0.016309323906898498, "step": 69345 }, { "epoch": 0.6527058823529411, "grad_norm": 0.7322809618622407, "learning_rate": 2.476147586102169e-06, "loss": 0.01828986406326294, "step": 69350 }, { "epoch": 0.6527529411764705, "grad_norm": 1.4449808586559765, "learning_rate": 2.476058326931231e-06, "loss": 0.014607354998588562, "step": 69355 }, { "epoch": 0.6528, "grad_norm": 0.596101253965015, "learning_rate": 2.4759690774123336e-06, "loss": 0.01865905225276947, "step": 69360 }, { "epoch": 0.6528470588235294, "grad_norm": 0.5110685904458506, "learning_rate": 2.475879837543736e-06, "loss": 0.01469617784023285, "step": 69365 }, { "epoch": 0.6528941176470588, "grad_norm": 0.3980811400757232, "learning_rate": 2.475790607323701e-06, "loss": 0.01433357447385788, "step": 69370 }, { "epoch": 0.6529411764705882, "grad_norm": 0.784850825479623, "learning_rate": 2.4757013867504886e-06, "loss": 0.014361885190010071, "step": 69375 }, { "epoch": 0.6529882352941176, "grad_norm": 0.4809303709788378, "learning_rate": 2.4756121758223613e-06, "loss": 0.01322045475244522, "step": 69380 }, { "epoch": 0.653035294117647, "grad_norm": 0.5080384902083258, "learning_rate": 2.4755229745375813e-06, "loss": 0.014780621230602264, "step": 69385 }, { "epoch": 0.6530823529411764, "grad_norm": 0.42646148236923964, "learning_rate": 2.4754337828944113e-06, "loss": 0.016444247961044312, "step": 69390 }, { "epoch": 0.6531294117647058, "grad_norm": 0.42479893067751556, "learning_rate": 2.475344600891115e-06, "loss": 0.010944335162639618, "step": 69395 }, { "epoch": 0.6531764705882352, "grad_norm": 0.3241354704598605, "learning_rate": 2.4752554285259557e-06, "loss": 0.013658693432807923, "step": 69400 }, { "epoch": 0.6532235294117648, "grad_norm": 0.5363079765311325, "learning_rate": 2.4751662657971977e-06, "loss": 0.016754601895809174, "step": 69405 }, { "epoch": 0.6532705882352942, "grad_norm": 0.5766532719374892, "learning_rate": 2.475077112703105e-06, "loss": 0.013381944596767425, "step": 69410 }, { "epoch": 0.6533176470588236, "grad_norm": 0.5836949983070803, "learning_rate": 2.474987969241943e-06, "loss": 0.016417281329631807, "step": 69415 }, { "epoch": 0.653364705882353, "grad_norm": 0.5878095193289706, "learning_rate": 2.4748988354119773e-06, "loss": 0.016438233852386474, "step": 69420 }, { "epoch": 0.6534117647058824, "grad_norm": 0.6021858061164777, "learning_rate": 2.4748097112114734e-06, "loss": 0.016278719902038573, "step": 69425 }, { "epoch": 0.6534588235294118, "grad_norm": 0.5578218179361213, "learning_rate": 2.474720596638697e-06, "loss": 0.015895479917526247, "step": 69430 }, { "epoch": 0.6535058823529412, "grad_norm": 0.8437834924453118, "learning_rate": 2.474631491691916e-06, "loss": 0.017216503620147705, "step": 69435 }, { "epoch": 0.6535529411764706, "grad_norm": 0.48065094835664574, "learning_rate": 2.474542396369398e-06, "loss": 0.018321123719215394, "step": 69440 }, { "epoch": 0.6536, "grad_norm": 0.6808171875514993, "learning_rate": 2.474453310669408e-06, "loss": 0.018511131405830383, "step": 69445 }, { "epoch": 0.6536470588235294, "grad_norm": 0.4780758654913954, "learning_rate": 2.474364234590217e-06, "loss": 0.017268523573875427, "step": 69450 }, { "epoch": 0.6536941176470589, "grad_norm": 0.5002695874004993, "learning_rate": 2.4742751681300914e-06, "loss": 0.014168348908424378, "step": 69455 }, { "epoch": 0.6537411764705883, "grad_norm": 0.4036426954084869, "learning_rate": 2.474186111287301e-06, "loss": 0.01677187383174896, "step": 69460 }, { "epoch": 0.6537882352941177, "grad_norm": 0.43658826380744237, "learning_rate": 2.474097064060115e-06, "loss": 0.01722707748413086, "step": 69465 }, { "epoch": 0.6538352941176471, "grad_norm": 0.38085286477398234, "learning_rate": 2.4740080264468027e-06, "loss": 0.017795509099960326, "step": 69470 }, { "epoch": 0.6538823529411765, "grad_norm": 0.4956153653330794, "learning_rate": 2.473918998445635e-06, "loss": 0.01835213601589203, "step": 69475 }, { "epoch": 0.6539294117647059, "grad_norm": 0.5065872653749489, "learning_rate": 2.4738299800548825e-06, "loss": 0.012820656597614288, "step": 69480 }, { "epoch": 0.6539764705882353, "grad_norm": 0.4013979180121134, "learning_rate": 2.473740971272816e-06, "loss": 0.017081835865974428, "step": 69485 }, { "epoch": 0.6540235294117647, "grad_norm": 0.4769443719490863, "learning_rate": 2.4736519720977065e-06, "loss": 0.017158475518226624, "step": 69490 }, { "epoch": 0.6540705882352941, "grad_norm": 0.4889035552731363, "learning_rate": 2.4735629825278274e-06, "loss": 0.015705063939094543, "step": 69495 }, { "epoch": 0.6541176470588236, "grad_norm": 0.9451110815610055, "learning_rate": 2.4734740025614495e-06, "loss": 0.030618947744369508, "step": 69500 }, { "epoch": 0.654164705882353, "grad_norm": 0.38300605789290115, "learning_rate": 2.473385032196847e-06, "loss": 0.014956033229827881, "step": 69505 }, { "epoch": 0.6542117647058824, "grad_norm": 0.4083265437365024, "learning_rate": 2.473296071432292e-06, "loss": 0.014933091402053834, "step": 69510 }, { "epoch": 0.6542588235294118, "grad_norm": 0.5125376506704394, "learning_rate": 2.473207120266059e-06, "loss": 0.016157828271389008, "step": 69515 }, { "epoch": 0.6543058823529412, "grad_norm": 0.4753031843019152, "learning_rate": 2.4731181786964213e-06, "loss": 0.014767992496490478, "step": 69520 }, { "epoch": 0.6543529411764706, "grad_norm": 0.4590024695894279, "learning_rate": 2.473029246721654e-06, "loss": 0.02071240544319153, "step": 69525 }, { "epoch": 0.6544, "grad_norm": 0.5949941638118137, "learning_rate": 2.4729403243400323e-06, "loss": 0.01691379100084305, "step": 69530 }, { "epoch": 0.6544470588235294, "grad_norm": 0.4917734079014116, "learning_rate": 2.4728514115498312e-06, "loss": 0.015003332495689392, "step": 69535 }, { "epoch": 0.6544941176470588, "grad_norm": 0.4249669201835398, "learning_rate": 2.472762508349327e-06, "loss": 0.014601275324821472, "step": 69540 }, { "epoch": 0.6545411764705882, "grad_norm": 0.7525591873663403, "learning_rate": 2.4726736147367953e-06, "loss": 0.018346603214740752, "step": 69545 }, { "epoch": 0.6545882352941177, "grad_norm": 0.5032525099461945, "learning_rate": 2.472584730710514e-06, "loss": 0.014167526364326477, "step": 69550 }, { "epoch": 0.6546352941176471, "grad_norm": 0.6481264079429798, "learning_rate": 2.472495856268759e-06, "loss": 0.017535603046417235, "step": 69555 }, { "epoch": 0.6546823529411765, "grad_norm": 0.5335641288274556, "learning_rate": 2.472406991409808e-06, "loss": 0.017625854909420015, "step": 69560 }, { "epoch": 0.6547294117647059, "grad_norm": 0.3220668537715157, "learning_rate": 2.47231813613194e-06, "loss": 0.01272895485162735, "step": 69565 }, { "epoch": 0.6547764705882353, "grad_norm": 0.42960591419442623, "learning_rate": 2.4722292904334327e-06, "loss": 0.01794761121273041, "step": 69570 }, { "epoch": 0.6548235294117647, "grad_norm": 0.5316972656306649, "learning_rate": 2.4721404543125658e-06, "loss": 0.015961985290050506, "step": 69575 }, { "epoch": 0.6548705882352941, "grad_norm": 0.5032564107995398, "learning_rate": 2.4720516277676172e-06, "loss": 0.015139439702033996, "step": 69580 }, { "epoch": 0.6549176470588235, "grad_norm": 0.4123786671421539, "learning_rate": 2.4719628107968676e-06, "loss": 0.01706143468618393, "step": 69585 }, { "epoch": 0.6549647058823529, "grad_norm": 0.39278629740887483, "learning_rate": 2.4718740033985974e-06, "loss": 0.014830607175827026, "step": 69590 }, { "epoch": 0.6550117647058824, "grad_norm": 0.3907505978762028, "learning_rate": 2.4717852055710868e-06, "loss": 0.016116274893283843, "step": 69595 }, { "epoch": 0.6550588235294118, "grad_norm": 0.45056552572240316, "learning_rate": 2.4716964173126166e-06, "loss": 0.011803930997848511, "step": 69600 }, { "epoch": 0.6551058823529412, "grad_norm": 0.6640940649596921, "learning_rate": 2.4716076386214695e-06, "loss": 0.015380382537841797, "step": 69605 }, { "epoch": 0.6551529411764706, "grad_norm": 0.33480955359444986, "learning_rate": 2.471518869495926e-06, "loss": 0.013177070021629333, "step": 69610 }, { "epoch": 0.6552, "grad_norm": 0.37937676736428133, "learning_rate": 2.4714301099342693e-06, "loss": 0.015501244366168976, "step": 69615 }, { "epoch": 0.6552470588235294, "grad_norm": 0.4245447206718852, "learning_rate": 2.471341359934781e-06, "loss": 0.016605183482170105, "step": 69620 }, { "epoch": 0.6552941176470588, "grad_norm": 0.6632898782250519, "learning_rate": 2.471252619495746e-06, "loss": 0.01590295433998108, "step": 69625 }, { "epoch": 0.6553411764705882, "grad_norm": 0.5761012063913296, "learning_rate": 2.471163888615448e-06, "loss": 0.017481639981269836, "step": 69630 }, { "epoch": 0.6553882352941176, "grad_norm": 0.638827372956413, "learning_rate": 2.471075167292169e-06, "loss": 0.01930815279483795, "step": 69635 }, { "epoch": 0.655435294117647, "grad_norm": 0.4822442370271779, "learning_rate": 2.470986455524195e-06, "loss": 0.012476995587348938, "step": 69640 }, { "epoch": 0.6554823529411765, "grad_norm": 0.6196689966302873, "learning_rate": 2.470897753309811e-06, "loss": 0.018333789706230164, "step": 69645 }, { "epoch": 0.6555294117647059, "grad_norm": 0.3531424310743831, "learning_rate": 2.470809060647302e-06, "loss": 0.013411593437194825, "step": 69650 }, { "epoch": 0.6555764705882353, "grad_norm": 0.3583227886585148, "learning_rate": 2.4707203775349534e-06, "loss": 0.015241852402687073, "step": 69655 }, { "epoch": 0.6556235294117647, "grad_norm": 0.47565252542736824, "learning_rate": 2.4706317039710526e-06, "loss": 0.015820810198783876, "step": 69660 }, { "epoch": 0.6556705882352941, "grad_norm": 0.5653279363058127, "learning_rate": 2.470543039953885e-06, "loss": 0.01764419674873352, "step": 69665 }, { "epoch": 0.6557176470588235, "grad_norm": 0.4577994713406998, "learning_rate": 2.4704543854817393e-06, "loss": 0.017611825466156007, "step": 69670 }, { "epoch": 0.6557647058823529, "grad_norm": 0.329806590123709, "learning_rate": 2.470365740552901e-06, "loss": 0.014742359519004822, "step": 69675 }, { "epoch": 0.6558117647058823, "grad_norm": 0.6097538332404643, "learning_rate": 2.4702771051656595e-06, "loss": 0.019529899954795836, "step": 69680 }, { "epoch": 0.6558588235294117, "grad_norm": 0.36737679346627167, "learning_rate": 2.470188479318303e-06, "loss": 0.014048659801483154, "step": 69685 }, { "epoch": 0.6559058823529412, "grad_norm": 0.4007793216358638, "learning_rate": 2.47009986300912e-06, "loss": 0.015085640549659728, "step": 69690 }, { "epoch": 0.6559529411764706, "grad_norm": 0.5988801314030575, "learning_rate": 2.470011256236399e-06, "loss": 0.013943779468536376, "step": 69695 }, { "epoch": 0.656, "grad_norm": 0.3994896005059517, "learning_rate": 2.4699226589984313e-06, "loss": 0.015672323107719422, "step": 69700 }, { "epoch": 0.6560470588235294, "grad_norm": 0.5677233696237912, "learning_rate": 2.469834071293506e-06, "loss": 0.017328280210494994, "step": 69705 }, { "epoch": 0.6560941176470588, "grad_norm": 0.2632654375808568, "learning_rate": 2.469745493119914e-06, "loss": 0.013654075562953949, "step": 69710 }, { "epoch": 0.6561411764705882, "grad_norm": 0.3917899526729438, "learning_rate": 2.469656924475946e-06, "loss": 0.019139876961708067, "step": 69715 }, { "epoch": 0.6561882352941176, "grad_norm": 0.4620616716125461, "learning_rate": 2.469568365359893e-06, "loss": 0.014705261588096619, "step": 69720 }, { "epoch": 0.656235294117647, "grad_norm": 0.45860510447052444, "learning_rate": 2.4694798157700482e-06, "loss": 0.013316980004310608, "step": 69725 }, { "epoch": 0.6562823529411764, "grad_norm": 1.2432318068419959, "learning_rate": 2.4693912757047024e-06, "loss": 0.018728169798851012, "step": 69730 }, { "epoch": 0.6563294117647058, "grad_norm": 0.6761018731201301, "learning_rate": 2.4693027451621483e-06, "loss": 0.01814371347427368, "step": 69735 }, { "epoch": 0.6563764705882353, "grad_norm": 0.5600251070897145, "learning_rate": 2.4692142241406807e-06, "loss": 0.021222805976867674, "step": 69740 }, { "epoch": 0.6564235294117647, "grad_norm": 0.4237693800308429, "learning_rate": 2.4691257126385916e-06, "loss": 0.01704893708229065, "step": 69745 }, { "epoch": 0.6564705882352941, "grad_norm": 0.5262403280118382, "learning_rate": 2.469037210654175e-06, "loss": 0.013553616404533387, "step": 69750 }, { "epoch": 0.6565176470588235, "grad_norm": 0.4412132233697527, "learning_rate": 2.4689487181857256e-06, "loss": 0.01397971659898758, "step": 69755 }, { "epoch": 0.6565647058823529, "grad_norm": 0.4767150846284452, "learning_rate": 2.468860235231539e-06, "loss": 0.014563655853271485, "step": 69760 }, { "epoch": 0.6566117647058823, "grad_norm": 0.6273164730317721, "learning_rate": 2.468771761789909e-06, "loss": 0.018990783393383025, "step": 69765 }, { "epoch": 0.6566588235294117, "grad_norm": 0.5010354091251011, "learning_rate": 2.468683297859132e-06, "loss": 0.013297465443611146, "step": 69770 }, { "epoch": 0.6567058823529411, "grad_norm": 0.3462183352599649, "learning_rate": 2.468594843437504e-06, "loss": 0.016222186386585236, "step": 69775 }, { "epoch": 0.6567529411764705, "grad_norm": 0.6833895224758705, "learning_rate": 2.468506398523322e-06, "loss": 0.014773277938365937, "step": 69780 }, { "epoch": 0.6568, "grad_norm": 0.44395271545988035, "learning_rate": 2.4684179631148826e-06, "loss": 0.01809290945529938, "step": 69785 }, { "epoch": 0.6568470588235295, "grad_norm": 0.5468459340724394, "learning_rate": 2.468329537210483e-06, "loss": 0.017083893716335296, "step": 69790 }, { "epoch": 0.6568941176470589, "grad_norm": 0.61169256991012, "learning_rate": 2.468241120808421e-06, "loss": 0.018496955931186675, "step": 69795 }, { "epoch": 0.6569411764705883, "grad_norm": 0.4122485958919502, "learning_rate": 2.468152713906995e-06, "loss": 0.016035428643226622, "step": 69800 }, { "epoch": 0.6569882352941177, "grad_norm": 0.42548926725762953, "learning_rate": 2.4680643165045037e-06, "loss": 0.014507773518562316, "step": 69805 }, { "epoch": 0.657035294117647, "grad_norm": 0.37340216629264716, "learning_rate": 2.4679759285992456e-06, "loss": 0.010492189973592757, "step": 69810 }, { "epoch": 0.6570823529411765, "grad_norm": 0.41539192022954774, "learning_rate": 2.467887550189522e-06, "loss": 0.015430618822574616, "step": 69815 }, { "epoch": 0.6571294117647059, "grad_norm": 0.42999242173880037, "learning_rate": 2.467799181273631e-06, "loss": 0.01506701111793518, "step": 69820 }, { "epoch": 0.6571764705882353, "grad_norm": 0.32676492487974407, "learning_rate": 2.4677108218498733e-06, "loss": 0.017976970970630647, "step": 69825 }, { "epoch": 0.6572235294117647, "grad_norm": 0.5333000917915114, "learning_rate": 2.4676224719165504e-06, "loss": 0.014426134526729584, "step": 69830 }, { "epoch": 0.6572705882352942, "grad_norm": 0.373651767015872, "learning_rate": 2.467534131471963e-06, "loss": 0.01121588796377182, "step": 69835 }, { "epoch": 0.6573176470588236, "grad_norm": 0.44398442944329936, "learning_rate": 2.4674458005144133e-06, "loss": 0.014314761757850647, "step": 69840 }, { "epoch": 0.657364705882353, "grad_norm": 0.3634310892403708, "learning_rate": 2.4673574790422016e-06, "loss": 0.01829867660999298, "step": 69845 }, { "epoch": 0.6574117647058824, "grad_norm": 0.6809635069254768, "learning_rate": 2.4672691670536326e-06, "loss": 0.018441349267959595, "step": 69850 }, { "epoch": 0.6574588235294118, "grad_norm": 0.5829868048372504, "learning_rate": 2.467180864547008e-06, "loss": 0.016039234399795533, "step": 69855 }, { "epoch": 0.6575058823529412, "grad_norm": 0.466778494481931, "learning_rate": 2.4670925715206317e-06, "loss": 0.01724129617214203, "step": 69860 }, { "epoch": 0.6575529411764706, "grad_norm": 0.4773281731268377, "learning_rate": 2.4670042879728066e-06, "loss": 0.01488548219203949, "step": 69865 }, { "epoch": 0.6576, "grad_norm": 0.40308364279380743, "learning_rate": 2.466916013901839e-06, "loss": 0.01457701325416565, "step": 69870 }, { "epoch": 0.6576470588235294, "grad_norm": 0.48457193404993343, "learning_rate": 2.466827749306031e-06, "loss": 0.015641313791275025, "step": 69875 }, { "epoch": 0.6576941176470589, "grad_norm": 0.4573443727531378, "learning_rate": 2.466739494183689e-06, "loss": 0.015642192959785462, "step": 69880 }, { "epoch": 0.6577411764705883, "grad_norm": 1.0205927181467203, "learning_rate": 2.466651248533118e-06, "loss": 0.01567101776599884, "step": 69885 }, { "epoch": 0.6577882352941177, "grad_norm": 0.524913901391461, "learning_rate": 2.4665630123526244e-06, "loss": 0.019323289394378662, "step": 69890 }, { "epoch": 0.6578352941176471, "grad_norm": 0.4454974459392369, "learning_rate": 2.466474785640514e-06, "loss": 0.015472790598869324, "step": 69895 }, { "epoch": 0.6578823529411765, "grad_norm": 0.4255136379002861, "learning_rate": 2.4663865683950936e-06, "loss": 0.016374874114990234, "step": 69900 }, { "epoch": 0.6579294117647059, "grad_norm": 0.4580884490177401, "learning_rate": 2.4662983606146706e-06, "loss": 0.015753042697906495, "step": 69905 }, { "epoch": 0.6579764705882353, "grad_norm": 0.35824832227953013, "learning_rate": 2.4662101622975525e-06, "loss": 0.015430237352848052, "step": 69910 }, { "epoch": 0.6580235294117647, "grad_norm": 0.6508120013464043, "learning_rate": 2.4661219734420474e-06, "loss": 0.022009533643722535, "step": 69915 }, { "epoch": 0.6580705882352941, "grad_norm": 0.5515375642334509, "learning_rate": 2.4660337940464637e-06, "loss": 0.019195224344730377, "step": 69920 }, { "epoch": 0.6581176470588235, "grad_norm": 0.44054210878564665, "learning_rate": 2.46594562410911e-06, "loss": 0.012055706977844239, "step": 69925 }, { "epoch": 0.658164705882353, "grad_norm": 0.5611032602853102, "learning_rate": 2.465857463628296e-06, "loss": 0.019471530616283417, "step": 69930 }, { "epoch": 0.6582117647058824, "grad_norm": 0.5123533392281608, "learning_rate": 2.4657693126023317e-06, "loss": 0.015664952993392944, "step": 69935 }, { "epoch": 0.6582588235294118, "grad_norm": 0.5614731594841036, "learning_rate": 2.4656811710295254e-06, "loss": 0.021294015645980834, "step": 69940 }, { "epoch": 0.6583058823529412, "grad_norm": 0.47552849018562854, "learning_rate": 2.4655930389081905e-06, "loss": 0.015051937103271485, "step": 69945 }, { "epoch": 0.6583529411764706, "grad_norm": 0.5095116428703584, "learning_rate": 2.465504916236636e-06, "loss": 0.016230258345603942, "step": 69950 }, { "epoch": 0.6584, "grad_norm": 0.44136773511956345, "learning_rate": 2.4654168030131733e-06, "loss": 0.015727239847183227, "step": 69955 }, { "epoch": 0.6584470588235294, "grad_norm": 0.394306788240075, "learning_rate": 2.465328699236115e-06, "loss": 0.01326265037059784, "step": 69960 }, { "epoch": 0.6584941176470588, "grad_norm": 0.4389256802479956, "learning_rate": 2.465240604903773e-06, "loss": 0.015500077605247497, "step": 69965 }, { "epoch": 0.6585411764705882, "grad_norm": 0.6219270298237031, "learning_rate": 2.4651525200144604e-06, "loss": 0.01909789890050888, "step": 69970 }, { "epoch": 0.6585882352941177, "grad_norm": 0.568820294998096, "learning_rate": 2.4650644445664894e-06, "loss": 0.019429942965507506, "step": 69975 }, { "epoch": 0.6586352941176471, "grad_norm": 0.6124073780653015, "learning_rate": 2.464976378558174e-06, "loss": 0.015090326964855193, "step": 69980 }, { "epoch": 0.6586823529411765, "grad_norm": 0.5688698164846494, "learning_rate": 2.4648883219878282e-06, "loss": 0.01412648856639862, "step": 69985 }, { "epoch": 0.6587294117647059, "grad_norm": 0.7934297137300276, "learning_rate": 2.4648002748537665e-06, "loss": 0.012629002332687378, "step": 69990 }, { "epoch": 0.6587764705882353, "grad_norm": 0.5938565037685406, "learning_rate": 2.4647122371543033e-06, "loss": 0.017430000007152557, "step": 69995 }, { "epoch": 0.6588235294117647, "grad_norm": 0.6164579261739993, "learning_rate": 2.464624208887754e-06, "loss": 0.016948583722114562, "step": 70000 }, { "epoch": 0.6588705882352941, "grad_norm": 0.564976380911033, "learning_rate": 2.4645361900524344e-06, "loss": 0.014109596610069275, "step": 70005 }, { "epoch": 0.6589176470588235, "grad_norm": 0.3216190871836043, "learning_rate": 2.46444818064666e-06, "loss": 0.01634444296360016, "step": 70010 }, { "epoch": 0.6589647058823529, "grad_norm": 0.513505047600403, "learning_rate": 2.4643601806687474e-06, "loss": 0.01605890691280365, "step": 70015 }, { "epoch": 0.6590117647058824, "grad_norm": 0.5782965766706245, "learning_rate": 2.464272190117014e-06, "loss": 0.023286962509155275, "step": 70020 }, { "epoch": 0.6590588235294118, "grad_norm": 0.6268146760354358, "learning_rate": 2.464184208989777e-06, "loss": 0.012027990818023682, "step": 70025 }, { "epoch": 0.6591058823529412, "grad_norm": 0.6475626442537658, "learning_rate": 2.4640962372853536e-06, "loss": 0.016596227884292603, "step": 70030 }, { "epoch": 0.6591529411764706, "grad_norm": 0.43544776071102925, "learning_rate": 2.464008275002062e-06, "loss": 0.015768691897392273, "step": 70035 }, { "epoch": 0.6592, "grad_norm": 0.43435139632454095, "learning_rate": 2.4639203221382214e-06, "loss": 0.015372577309608459, "step": 70040 }, { "epoch": 0.6592470588235294, "grad_norm": 0.5000787626419176, "learning_rate": 2.46383237869215e-06, "loss": 0.01794213056564331, "step": 70045 }, { "epoch": 0.6592941176470588, "grad_norm": 0.510569584865876, "learning_rate": 2.4637444446621677e-06, "loss": 0.016659095883369446, "step": 70050 }, { "epoch": 0.6593411764705882, "grad_norm": 0.39462389279599175, "learning_rate": 2.463656520046594e-06, "loss": 0.015064835548400879, "step": 70055 }, { "epoch": 0.6593882352941176, "grad_norm": 0.3753211995394343, "learning_rate": 2.4635686048437495e-06, "loss": 0.010823002457618714, "step": 70060 }, { "epoch": 0.659435294117647, "grad_norm": 0.6244271037450215, "learning_rate": 2.463480699051955e-06, "loss": 0.01717386543750763, "step": 70065 }, { "epoch": 0.6594823529411765, "grad_norm": 0.378316660983661, "learning_rate": 2.463392802669531e-06, "loss": 0.014941397309303283, "step": 70070 }, { "epoch": 0.6595294117647059, "grad_norm": 0.6002711862708221, "learning_rate": 2.463304915694799e-06, "loss": 0.014879068732261658, "step": 70075 }, { "epoch": 0.6595764705882353, "grad_norm": 0.7561957770308542, "learning_rate": 2.4632170381260814e-06, "loss": 0.020545050501823425, "step": 70080 }, { "epoch": 0.6596235294117647, "grad_norm": 0.5119826234249812, "learning_rate": 2.463129169961701e-06, "loss": 0.013245843350887299, "step": 70085 }, { "epoch": 0.6596705882352941, "grad_norm": 0.35083163355976404, "learning_rate": 2.4630413111999786e-06, "loss": 0.01625138372182846, "step": 70090 }, { "epoch": 0.6597176470588235, "grad_norm": 0.5180103611266939, "learning_rate": 2.46295346183924e-06, "loss": 0.017250652611255645, "step": 70095 }, { "epoch": 0.6597647058823529, "grad_norm": 0.7804117747882755, "learning_rate": 2.462865621877807e-06, "loss": 0.015248395502567291, "step": 70100 }, { "epoch": 0.6598117647058823, "grad_norm": 0.5039575538258632, "learning_rate": 2.4627777913140035e-06, "loss": 0.01491388976573944, "step": 70105 }, { "epoch": 0.6598588235294117, "grad_norm": 0.5835828090600464, "learning_rate": 2.4626899701461542e-06, "loss": 0.0184611976146698, "step": 70110 }, { "epoch": 0.6599058823529412, "grad_norm": 0.5607919536313843, "learning_rate": 2.462602158372585e-06, "loss": 0.01590956300497055, "step": 70115 }, { "epoch": 0.6599529411764706, "grad_norm": 0.471036623673285, "learning_rate": 2.4625143559916204e-06, "loss": 0.012899896502494812, "step": 70120 }, { "epoch": 0.66, "grad_norm": 0.6412325692902003, "learning_rate": 2.4624265630015863e-06, "loss": 0.015645043551921846, "step": 70125 }, { "epoch": 0.6600470588235294, "grad_norm": 0.47635920661286096, "learning_rate": 2.462338779400808e-06, "loss": 0.017330968379974367, "step": 70130 }, { "epoch": 0.6600941176470588, "grad_norm": 0.3853824196205697, "learning_rate": 2.462251005187613e-06, "loss": 0.012569908797740937, "step": 70135 }, { "epoch": 0.6601411764705882, "grad_norm": 0.5225015720674573, "learning_rate": 2.4621632403603278e-06, "loss": 0.016245946288108826, "step": 70140 }, { "epoch": 0.6601882352941176, "grad_norm": 0.3521253496168372, "learning_rate": 2.4620754849172796e-06, "loss": 0.015192097425460816, "step": 70145 }, { "epoch": 0.660235294117647, "grad_norm": 0.7551752496665494, "learning_rate": 2.4619877388567968e-06, "loss": 0.01826445758342743, "step": 70150 }, { "epoch": 0.6602823529411764, "grad_norm": 0.8094327596694776, "learning_rate": 2.4619000021772072e-06, "loss": 0.015464165806770324, "step": 70155 }, { "epoch": 0.6603294117647058, "grad_norm": 0.5893832722409509, "learning_rate": 2.461812274876839e-06, "loss": 0.014343079924583436, "step": 70160 }, { "epoch": 0.6603764705882353, "grad_norm": 0.5703610935307669, "learning_rate": 2.4617245569540215e-06, "loss": 0.017683619260787965, "step": 70165 }, { "epoch": 0.6604235294117647, "grad_norm": 0.41672560653549, "learning_rate": 2.4616368484070846e-06, "loss": 0.020538707077503205, "step": 70170 }, { "epoch": 0.6604705882352941, "grad_norm": 0.6758491734480172, "learning_rate": 2.461549149234358e-06, "loss": 0.01605212688446045, "step": 70175 }, { "epoch": 0.6605176470588235, "grad_norm": 0.5423926977370704, "learning_rate": 2.4614614594341714e-06, "loss": 0.013781645894050598, "step": 70180 }, { "epoch": 0.6605647058823529, "grad_norm": 0.5218974283757042, "learning_rate": 2.4613737790048557e-06, "loss": 0.019625452160835267, "step": 70185 }, { "epoch": 0.6606117647058823, "grad_norm": 0.23187030450465676, "learning_rate": 2.461286107944742e-06, "loss": 0.01055506020784378, "step": 70190 }, { "epoch": 0.6606588235294117, "grad_norm": 0.5165743576925261, "learning_rate": 2.461198446252163e-06, "loss": 0.019080880284309387, "step": 70195 }, { "epoch": 0.6607058823529411, "grad_norm": 0.459216798374634, "learning_rate": 2.461110793925449e-06, "loss": 0.016929128766059877, "step": 70200 }, { "epoch": 0.6607529411764705, "grad_norm": 0.5594964776812631, "learning_rate": 2.461023150962933e-06, "loss": 0.016492560505867004, "step": 70205 }, { "epoch": 0.6608, "grad_norm": 0.3575358952617717, "learning_rate": 2.460935517362948e-06, "loss": 0.014722073078155517, "step": 70210 }, { "epoch": 0.6608470588235295, "grad_norm": 0.5160870065876976, "learning_rate": 2.4608478931238263e-06, "loss": 0.013969358801841737, "step": 70215 }, { "epoch": 0.6608941176470589, "grad_norm": 1.004154776917084, "learning_rate": 2.460760278243903e-06, "loss": 0.016602784395217896, "step": 70220 }, { "epoch": 0.6609411764705883, "grad_norm": 0.46312609683173117, "learning_rate": 2.460672672721511e-06, "loss": 0.019417107105255127, "step": 70225 }, { "epoch": 0.6609882352941177, "grad_norm": 0.6453957014522881, "learning_rate": 2.4605850765549847e-06, "loss": 0.01397818624973297, "step": 70230 }, { "epoch": 0.661035294117647, "grad_norm": 0.34501437685600417, "learning_rate": 2.46049748974266e-06, "loss": 0.017608563601970672, "step": 70235 }, { "epoch": 0.6610823529411765, "grad_norm": 0.42887083742392434, "learning_rate": 2.4604099122828706e-06, "loss": 0.011348303407430649, "step": 70240 }, { "epoch": 0.6611294117647059, "grad_norm": 0.47765889823927815, "learning_rate": 2.4603223441739528e-06, "loss": 0.014166697859764099, "step": 70245 }, { "epoch": 0.6611764705882353, "grad_norm": 0.48859191485527936, "learning_rate": 2.4602347854142436e-06, "loss": 0.013063186407089233, "step": 70250 }, { "epoch": 0.6612235294117647, "grad_norm": 0.4178442925320299, "learning_rate": 2.460147236002079e-06, "loss": 0.01732386350631714, "step": 70255 }, { "epoch": 0.6612705882352942, "grad_norm": 0.6261575626186381, "learning_rate": 2.4600596959357943e-06, "loss": 0.01885659396648407, "step": 70260 }, { "epoch": 0.6613176470588236, "grad_norm": 0.43787217219906166, "learning_rate": 2.4599721652137296e-06, "loss": 0.017724215984344482, "step": 70265 }, { "epoch": 0.661364705882353, "grad_norm": 0.6674819380651441, "learning_rate": 2.459884643834221e-06, "loss": 0.01761479377746582, "step": 70270 }, { "epoch": 0.6614117647058824, "grad_norm": 0.454796186613034, "learning_rate": 2.4597971317956066e-06, "loss": 0.014781579375267029, "step": 70275 }, { "epoch": 0.6614588235294118, "grad_norm": 0.29839820221043273, "learning_rate": 2.459709629096225e-06, "loss": 0.016742146015167235, "step": 70280 }, { "epoch": 0.6615058823529412, "grad_norm": 0.5241987415722605, "learning_rate": 2.459622135734416e-06, "loss": 0.014644412696361542, "step": 70285 }, { "epoch": 0.6615529411764706, "grad_norm": 0.6051008184066554, "learning_rate": 2.4595346517085184e-06, "loss": 0.017054247856140136, "step": 70290 }, { "epoch": 0.6616, "grad_norm": 0.389807739913679, "learning_rate": 2.4594471770168725e-06, "loss": 0.010960552096366882, "step": 70295 }, { "epoch": 0.6616470588235294, "grad_norm": 0.5212120825025035, "learning_rate": 2.4593597116578174e-06, "loss": 0.01135929599404335, "step": 70300 }, { "epoch": 0.6616941176470589, "grad_norm": 0.48728536828154845, "learning_rate": 2.459272255629695e-06, "loss": 0.02070004492998123, "step": 70305 }, { "epoch": 0.6617411764705883, "grad_norm": 0.6392528998541882, "learning_rate": 2.459184808930846e-06, "loss": 0.020145714282989502, "step": 70310 }, { "epoch": 0.6617882352941177, "grad_norm": 0.3364257943013131, "learning_rate": 2.4590973715596116e-06, "loss": 0.015941262245178223, "step": 70315 }, { "epoch": 0.6618352941176471, "grad_norm": 0.42433721824299475, "learning_rate": 2.4590099435143335e-06, "loss": 0.01580870747566223, "step": 70320 }, { "epoch": 0.6618823529411765, "grad_norm": 0.3592181626264107, "learning_rate": 2.458922524793354e-06, "loss": 0.01505342274904251, "step": 70325 }, { "epoch": 0.6619294117647059, "grad_norm": 0.4486472935304662, "learning_rate": 2.4588351153950173e-06, "loss": 0.014596658945083617, "step": 70330 }, { "epoch": 0.6619764705882353, "grad_norm": 0.5386864429448587, "learning_rate": 2.4587477153176643e-06, "loss": 0.017933279275894165, "step": 70335 }, { "epoch": 0.6620235294117647, "grad_norm": 0.3484009662943556, "learning_rate": 2.4586603245596397e-06, "loss": 0.014489185810089112, "step": 70340 }, { "epoch": 0.6620705882352941, "grad_norm": 0.6049325444215842, "learning_rate": 2.4585729431192875e-06, "loss": 0.013083580136299133, "step": 70345 }, { "epoch": 0.6621176470588235, "grad_norm": 0.5242155356522806, "learning_rate": 2.4584855709949522e-06, "loss": 0.013074657320976258, "step": 70350 }, { "epoch": 0.662164705882353, "grad_norm": 0.7950500198938031, "learning_rate": 2.458398208184978e-06, "loss": 0.019116750359535216, "step": 70355 }, { "epoch": 0.6622117647058824, "grad_norm": 0.32358009875788796, "learning_rate": 2.4583108546877095e-06, "loss": 0.01138927936553955, "step": 70360 }, { "epoch": 0.6622588235294118, "grad_norm": 0.49572543113879847, "learning_rate": 2.458223510501494e-06, "loss": 0.013615231215953826, "step": 70365 }, { "epoch": 0.6623058823529412, "grad_norm": 0.43096874770051197, "learning_rate": 2.458136175624676e-06, "loss": 0.016301722824573518, "step": 70370 }, { "epoch": 0.6623529411764706, "grad_norm": 0.3594290002673762, "learning_rate": 2.4580488500556026e-06, "loss": 0.018624648451805115, "step": 70375 }, { "epoch": 0.6624, "grad_norm": 0.7564834849180613, "learning_rate": 2.4579615337926205e-06, "loss": 0.013591012358665467, "step": 70380 }, { "epoch": 0.6624470588235294, "grad_norm": 0.41076527494284737, "learning_rate": 2.4578742268340777e-06, "loss": 0.01256178468465805, "step": 70385 }, { "epoch": 0.6624941176470588, "grad_norm": 0.5713762266580793, "learning_rate": 2.4577869291783203e-06, "loss": 0.016030657291412353, "step": 70390 }, { "epoch": 0.6625411764705882, "grad_norm": 0.5312935704704465, "learning_rate": 2.4576996408236974e-06, "loss": 0.014966288208961486, "step": 70395 }, { "epoch": 0.6625882352941177, "grad_norm": 0.32988087348730843, "learning_rate": 2.457612361768557e-06, "loss": 0.016235870122909547, "step": 70400 }, { "epoch": 0.6626352941176471, "grad_norm": 0.47123094608966715, "learning_rate": 2.4575250920112488e-06, "loss": 0.015031376481056213, "step": 70405 }, { "epoch": 0.6626823529411765, "grad_norm": 0.44739532865334636, "learning_rate": 2.4574378315501205e-06, "loss": 0.0205364465713501, "step": 70410 }, { "epoch": 0.6627294117647059, "grad_norm": 0.35745359500437857, "learning_rate": 2.4573505803835237e-06, "loss": 0.012040166556835175, "step": 70415 }, { "epoch": 0.6627764705882353, "grad_norm": 0.8561417273033272, "learning_rate": 2.4572633385098075e-06, "loss": 0.015646356344223022, "step": 70420 }, { "epoch": 0.6628235294117647, "grad_norm": 0.5359843570502998, "learning_rate": 2.4571761059273223e-06, "loss": 0.01608946919441223, "step": 70425 }, { "epoch": 0.6628705882352941, "grad_norm": 0.40769793485276007, "learning_rate": 2.4570888826344186e-06, "loss": 0.016956958174705505, "step": 70430 }, { "epoch": 0.6629176470588235, "grad_norm": 0.47935569757679225, "learning_rate": 2.4570016686294493e-06, "loss": 0.013712912797927856, "step": 70435 }, { "epoch": 0.6629647058823529, "grad_norm": 0.4512456866761532, "learning_rate": 2.456914463910765e-06, "loss": 0.018415600061416626, "step": 70440 }, { "epoch": 0.6630117647058823, "grad_norm": 0.6092585238870569, "learning_rate": 2.4568272684767183e-06, "loss": 0.01840556412935257, "step": 70445 }, { "epoch": 0.6630588235294118, "grad_norm": 0.5208565365283041, "learning_rate": 2.4567400823256605e-06, "loss": 0.017076116800308228, "step": 70450 }, { "epoch": 0.6631058823529412, "grad_norm": 0.49201066917123476, "learning_rate": 2.4566529054559467e-06, "loss": 0.01620050221681595, "step": 70455 }, { "epoch": 0.6631529411764706, "grad_norm": 0.5266491019482451, "learning_rate": 2.4565657378659294e-06, "loss": 0.013818874955177307, "step": 70460 }, { "epoch": 0.6632, "grad_norm": 0.5801365343528577, "learning_rate": 2.4564785795539617e-06, "loss": 0.0206658273935318, "step": 70465 }, { "epoch": 0.6632470588235294, "grad_norm": 0.4935811815720188, "learning_rate": 2.456391430518398e-06, "loss": 0.014494068920612335, "step": 70470 }, { "epoch": 0.6632941176470588, "grad_norm": 0.4677257352810139, "learning_rate": 2.456304290757594e-06, "loss": 0.016991037130355834, "step": 70475 }, { "epoch": 0.6633411764705882, "grad_norm": 0.4354997485803037, "learning_rate": 2.4562171602699035e-06, "loss": 0.015094104409217834, "step": 70480 }, { "epoch": 0.6633882352941176, "grad_norm": 0.46615260836743155, "learning_rate": 2.4561300390536823e-06, "loss": 0.01721445322036743, "step": 70485 }, { "epoch": 0.663435294117647, "grad_norm": 0.3016407030134652, "learning_rate": 2.4560429271072863e-06, "loss": 0.013649243116378783, "step": 70490 }, { "epoch": 0.6634823529411765, "grad_norm": 0.48021092856745806, "learning_rate": 2.455955824429072e-06, "loss": 0.020481428503990172, "step": 70495 }, { "epoch": 0.6635294117647059, "grad_norm": 0.41151383114061635, "learning_rate": 2.4558687310173958e-06, "loss": 0.01851119101047516, "step": 70500 }, { "epoch": 0.6635764705882353, "grad_norm": 0.7068160272725004, "learning_rate": 2.4557816468706143e-06, "loss": 0.014940895140171051, "step": 70505 }, { "epoch": 0.6636235294117647, "grad_norm": 0.5084366971622574, "learning_rate": 2.455694571987086e-06, "loss": 0.015683893859386445, "step": 70510 }, { "epoch": 0.6636705882352941, "grad_norm": 0.6066964698211645, "learning_rate": 2.4556075063651684e-06, "loss": 0.01904846429824829, "step": 70515 }, { "epoch": 0.6637176470588235, "grad_norm": 0.9047044020829366, "learning_rate": 2.455520450003219e-06, "loss": 0.016491609811782836, "step": 70520 }, { "epoch": 0.6637647058823529, "grad_norm": 0.45723444625686005, "learning_rate": 2.4554334028995973e-06, "loss": 0.012134160101413726, "step": 70525 }, { "epoch": 0.6638117647058823, "grad_norm": 0.2818446118291996, "learning_rate": 2.4553463650526617e-06, "loss": 0.014133554697036744, "step": 70530 }, { "epoch": 0.6638588235294117, "grad_norm": 0.4629154213710282, "learning_rate": 2.4552593364607735e-06, "loss": 0.01542196273803711, "step": 70535 }, { "epoch": 0.6639058823529411, "grad_norm": 0.6952404476899525, "learning_rate": 2.45517231712229e-06, "loss": 0.018083781003952026, "step": 70540 }, { "epoch": 0.6639529411764706, "grad_norm": 0.3805640216231395, "learning_rate": 2.455085307035574e-06, "loss": 0.016038385033607484, "step": 70545 }, { "epoch": 0.664, "grad_norm": 0.6468634381904723, "learning_rate": 2.454998306198984e-06, "loss": 0.016214030981063842, "step": 70550 }, { "epoch": 0.6640470588235294, "grad_norm": 0.4642288975425548, "learning_rate": 2.454911314610883e-06, "loss": 0.015189799666404723, "step": 70555 }, { "epoch": 0.6640941176470588, "grad_norm": 0.414753557586035, "learning_rate": 2.454824332269631e-06, "loss": 0.017783334851264952, "step": 70560 }, { "epoch": 0.6641411764705882, "grad_norm": 0.6243000502828073, "learning_rate": 2.4547373591735906e-06, "loss": 0.015411871671676635, "step": 70565 }, { "epoch": 0.6641882352941176, "grad_norm": 0.4302881008483343, "learning_rate": 2.4546503953211247e-06, "loss": 0.01839785873889923, "step": 70570 }, { "epoch": 0.664235294117647, "grad_norm": 0.6685257961936506, "learning_rate": 2.4545634407105955e-06, "loss": 0.018862040340900423, "step": 70575 }, { "epoch": 0.6642823529411764, "grad_norm": 0.5193476416044078, "learning_rate": 2.4544764953403657e-06, "loss": 0.020086154341697693, "step": 70580 }, { "epoch": 0.6643294117647058, "grad_norm": 0.5701326380229086, "learning_rate": 2.4543895592088e-06, "loss": 0.018381762504577636, "step": 70585 }, { "epoch": 0.6643764705882353, "grad_norm": 0.4539319102376944, "learning_rate": 2.454302632314262e-06, "loss": 0.01567460298538208, "step": 70590 }, { "epoch": 0.6644235294117647, "grad_norm": 0.5344425084474069, "learning_rate": 2.4542157146551154e-06, "loss": 0.016605773568153383, "step": 70595 }, { "epoch": 0.6644705882352941, "grad_norm": 0.4801855259539546, "learning_rate": 2.454128806229726e-06, "loss": 0.014797799289226532, "step": 70600 }, { "epoch": 0.6645176470588235, "grad_norm": 0.43608685750327697, "learning_rate": 2.4540419070364578e-06, "loss": 0.018022799491882326, "step": 70605 }, { "epoch": 0.664564705882353, "grad_norm": 0.3623548142145857, "learning_rate": 2.4539550170736774e-06, "loss": 0.010862163454294204, "step": 70610 }, { "epoch": 0.6646117647058823, "grad_norm": 0.5455041435693575, "learning_rate": 2.4538681363397504e-06, "loss": 0.014298605918884277, "step": 70615 }, { "epoch": 0.6646588235294117, "grad_norm": 0.5543881161219189, "learning_rate": 2.4537812648330433e-06, "loss": 0.014546239376068115, "step": 70620 }, { "epoch": 0.6647058823529411, "grad_norm": 0.5409680998730384, "learning_rate": 2.4536944025519226e-06, "loss": 0.014201691746711731, "step": 70625 }, { "epoch": 0.6647529411764705, "grad_norm": 0.2975020101863791, "learning_rate": 2.453607549494756e-06, "loss": 0.012755095958709717, "step": 70630 }, { "epoch": 0.6648, "grad_norm": 0.762524473251883, "learning_rate": 2.453520705659911e-06, "loss": 0.016789579391479494, "step": 70635 }, { "epoch": 0.6648470588235295, "grad_norm": 0.49501557481199854, "learning_rate": 2.453433871045755e-06, "loss": 0.014128361642360688, "step": 70640 }, { "epoch": 0.6648941176470589, "grad_norm": 0.34574321748186826, "learning_rate": 2.4533470456506574e-06, "loss": 0.01852744519710541, "step": 70645 }, { "epoch": 0.6649411764705883, "grad_norm": 0.35048561847611676, "learning_rate": 2.453260229472987e-06, "loss": 0.014168930053710938, "step": 70650 }, { "epoch": 0.6649882352941177, "grad_norm": 0.49128866596388915, "learning_rate": 2.4531734225111116e-06, "loss": 0.013599225878715515, "step": 70655 }, { "epoch": 0.6650352941176471, "grad_norm": 0.6467126704473297, "learning_rate": 2.453086624763403e-06, "loss": 0.017569658160209656, "step": 70660 }, { "epoch": 0.6650823529411765, "grad_norm": 0.48062294656625143, "learning_rate": 2.452999836228229e-06, "loss": 0.013905274868011474, "step": 70665 }, { "epoch": 0.6651294117647059, "grad_norm": 0.3564758203122636, "learning_rate": 2.4529130569039622e-06, "loss": 0.013554894924163818, "step": 70670 }, { "epoch": 0.6651764705882353, "grad_norm": 0.3036090256740239, "learning_rate": 2.4528262867889717e-06, "loss": 0.021809542179107667, "step": 70675 }, { "epoch": 0.6652235294117647, "grad_norm": 0.4609000278877575, "learning_rate": 2.4527395258816295e-06, "loss": 0.01421312391757965, "step": 70680 }, { "epoch": 0.6652705882352942, "grad_norm": 0.3204731988687866, "learning_rate": 2.4526527741803073e-06, "loss": 0.014125972986221313, "step": 70685 }, { "epoch": 0.6653176470588236, "grad_norm": 0.4776066683301872, "learning_rate": 2.452566031683378e-06, "loss": 0.012133991718292237, "step": 70690 }, { "epoch": 0.665364705882353, "grad_norm": 0.5880330009778774, "learning_rate": 2.452479298389212e-06, "loss": 0.015483120083808899, "step": 70695 }, { "epoch": 0.6654117647058824, "grad_norm": 0.3609481079660633, "learning_rate": 2.4523925742961837e-06, "loss": 0.012780357897281647, "step": 70700 }, { "epoch": 0.6654588235294118, "grad_norm": 0.609278419521974, "learning_rate": 2.452305859402666e-06, "loss": 0.01423681378364563, "step": 70705 }, { "epoch": 0.6655058823529412, "grad_norm": 0.6139933676626326, "learning_rate": 2.4522191537070327e-06, "loss": 0.011875248700380325, "step": 70710 }, { "epoch": 0.6655529411764706, "grad_norm": 0.6439563411950666, "learning_rate": 2.452132457207658e-06, "loss": 0.018863858282566072, "step": 70715 }, { "epoch": 0.6656, "grad_norm": 0.43569433937545393, "learning_rate": 2.4520457699029154e-06, "loss": 0.01671544313430786, "step": 70720 }, { "epoch": 0.6656470588235294, "grad_norm": 0.5467116763649672, "learning_rate": 2.451959091791181e-06, "loss": 0.014895768463611602, "step": 70725 }, { "epoch": 0.6656941176470588, "grad_norm": 0.6459414327207298, "learning_rate": 2.45187242287083e-06, "loss": 0.018591921031475066, "step": 70730 }, { "epoch": 0.6657411764705883, "grad_norm": 0.3534574130548704, "learning_rate": 2.451785763140236e-06, "loss": 0.015837696194648743, "step": 70735 }, { "epoch": 0.6657882352941177, "grad_norm": 0.45120382747025317, "learning_rate": 2.451699112597778e-06, "loss": 0.015786106884479522, "step": 70740 }, { "epoch": 0.6658352941176471, "grad_norm": 0.5599652077686438, "learning_rate": 2.4516124712418317e-06, "loss": 0.017527519166469573, "step": 70745 }, { "epoch": 0.6658823529411765, "grad_norm": 0.3928456799815476, "learning_rate": 2.451525839070773e-06, "loss": 0.011163868010044098, "step": 70750 }, { "epoch": 0.6659294117647059, "grad_norm": 0.45653225453944907, "learning_rate": 2.4514392160829795e-06, "loss": 0.017659948766231538, "step": 70755 }, { "epoch": 0.6659764705882353, "grad_norm": 0.5008753706538868, "learning_rate": 2.451352602276829e-06, "loss": 0.014019906520843506, "step": 70760 }, { "epoch": 0.6660235294117647, "grad_norm": 0.3916378160527202, "learning_rate": 2.451265997650701e-06, "loss": 0.015494911372661591, "step": 70765 }, { "epoch": 0.6660705882352941, "grad_norm": 0.4776516109342839, "learning_rate": 2.4511794022029707e-06, "loss": 0.015056310594081879, "step": 70770 }, { "epoch": 0.6661176470588235, "grad_norm": 0.3007782657982975, "learning_rate": 2.4510928159320204e-06, "loss": 0.013550287485122681, "step": 70775 }, { "epoch": 0.666164705882353, "grad_norm": 0.6216250628878843, "learning_rate": 2.451006238836228e-06, "loss": 0.01541612446308136, "step": 70780 }, { "epoch": 0.6662117647058824, "grad_norm": 0.4372059751275995, "learning_rate": 2.450919670913973e-06, "loss": 0.013487198948860168, "step": 70785 }, { "epoch": 0.6662588235294118, "grad_norm": 0.4896446613468493, "learning_rate": 2.4508331121636355e-06, "loss": 0.015370900928974151, "step": 70790 }, { "epoch": 0.6663058823529412, "grad_norm": 0.5271582329587898, "learning_rate": 2.450746562583596e-06, "loss": 0.02116861045360565, "step": 70795 }, { "epoch": 0.6663529411764706, "grad_norm": 0.35156982668072645, "learning_rate": 2.4506600221722367e-06, "loss": 0.013671427965164185, "step": 70800 }, { "epoch": 0.6664, "grad_norm": 0.2982529391909682, "learning_rate": 2.4505734909279365e-06, "loss": 0.010951033234596253, "step": 70805 }, { "epoch": 0.6664470588235294, "grad_norm": 0.3541477997437156, "learning_rate": 2.450486968849079e-06, "loss": 0.015803396701812744, "step": 70810 }, { "epoch": 0.6664941176470588, "grad_norm": 0.6157289352745565, "learning_rate": 2.4504004559340456e-06, "loss": 0.01340537965297699, "step": 70815 }, { "epoch": 0.6665411764705882, "grad_norm": 0.4498006054915366, "learning_rate": 2.4503139521812187e-06, "loss": 0.014781682193279267, "step": 70820 }, { "epoch": 0.6665882352941176, "grad_norm": 0.5384560649595145, "learning_rate": 2.4502274575889816e-06, "loss": 0.014878797531127929, "step": 70825 }, { "epoch": 0.6666352941176471, "grad_norm": 0.4405418845335629, "learning_rate": 2.4501409721557173e-06, "loss": 0.01586560010910034, "step": 70830 }, { "epoch": 0.6666823529411765, "grad_norm": 0.39825115488326096, "learning_rate": 2.45005449587981e-06, "loss": 0.018126025795936584, "step": 70835 }, { "epoch": 0.6667294117647059, "grad_norm": 0.4391737849294683, "learning_rate": 2.4499680287596428e-06, "loss": 0.017397186160087584, "step": 70840 }, { "epoch": 0.6667764705882353, "grad_norm": 0.4658523238302237, "learning_rate": 2.4498815707936007e-06, "loss": 0.014897669851779937, "step": 70845 }, { "epoch": 0.6668235294117647, "grad_norm": 0.5630946841368323, "learning_rate": 2.4497951219800694e-06, "loss": 0.013803264498710633, "step": 70850 }, { "epoch": 0.6668705882352941, "grad_norm": 0.3788694178955036, "learning_rate": 2.4497086823174328e-06, "loss": 0.013951045274734498, "step": 70855 }, { "epoch": 0.6669176470588235, "grad_norm": 0.5103765619254308, "learning_rate": 2.449622251804078e-06, "loss": 0.018820537626743315, "step": 70860 }, { "epoch": 0.6669647058823529, "grad_norm": 0.4037436745248933, "learning_rate": 2.4495358304383897e-06, "loss": 0.014259576797485352, "step": 70865 }, { "epoch": 0.6670117647058823, "grad_norm": 0.5119937480681184, "learning_rate": 2.449449418218755e-06, "loss": 0.013118705153465271, "step": 70870 }, { "epoch": 0.6670588235294118, "grad_norm": 0.5161443347074259, "learning_rate": 2.4493630151435615e-06, "loss": 0.01646300554275513, "step": 70875 }, { "epoch": 0.6671058823529412, "grad_norm": 0.4624557333448317, "learning_rate": 2.4492766212111956e-06, "loss": 0.012347093224525452, "step": 70880 }, { "epoch": 0.6671529411764706, "grad_norm": 0.5136816383555414, "learning_rate": 2.4491902364200444e-06, "loss": 0.01681872606277466, "step": 70885 }, { "epoch": 0.6672, "grad_norm": 0.501663686647262, "learning_rate": 2.4491038607684978e-06, "loss": 0.013471613824367522, "step": 70890 }, { "epoch": 0.6672470588235294, "grad_norm": 0.47844758016227695, "learning_rate": 2.4490174942549424e-06, "loss": 0.015422467887401582, "step": 70895 }, { "epoch": 0.6672941176470588, "grad_norm": 0.5867485560940017, "learning_rate": 2.4489311368777682e-06, "loss": 0.022322556376457213, "step": 70900 }, { "epoch": 0.6673411764705882, "grad_norm": 0.4397445900464224, "learning_rate": 2.448844788635365e-06, "loss": 0.0166985422372818, "step": 70905 }, { "epoch": 0.6673882352941176, "grad_norm": 0.4159211896400515, "learning_rate": 2.4487584495261206e-06, "loss": 0.012946856021881104, "step": 70910 }, { "epoch": 0.667435294117647, "grad_norm": 0.5093801182717135, "learning_rate": 2.4486721195484264e-06, "loss": 0.014207018911838532, "step": 70915 }, { "epoch": 0.6674823529411764, "grad_norm": 0.48775364755282213, "learning_rate": 2.448585798700673e-06, "loss": 0.017081651091575622, "step": 70920 }, { "epoch": 0.6675294117647059, "grad_norm": 0.6511244930517834, "learning_rate": 2.448499486981251e-06, "loss": 0.016077950596809387, "step": 70925 }, { "epoch": 0.6675764705882353, "grad_norm": 0.5759694775935996, "learning_rate": 2.448413184388551e-06, "loss": 0.01817314773797989, "step": 70930 }, { "epoch": 0.6676235294117647, "grad_norm": 0.5250020108926827, "learning_rate": 2.448326890920966e-06, "loss": 0.01580672711133957, "step": 70935 }, { "epoch": 0.6676705882352941, "grad_norm": 0.5964665536499126, "learning_rate": 2.4482406065768864e-06, "loss": 0.014337687194347382, "step": 70940 }, { "epoch": 0.6677176470588235, "grad_norm": 0.47833828997075, "learning_rate": 2.448154331354706e-06, "loss": 0.017915531992912292, "step": 70945 }, { "epoch": 0.6677647058823529, "grad_norm": 0.5344152153440797, "learning_rate": 2.4480680652528173e-06, "loss": 0.013912369310855866, "step": 70950 }, { "epoch": 0.6678117647058823, "grad_norm": 0.40439385580907333, "learning_rate": 2.4479818082696137e-06, "loss": 0.016443508863449096, "step": 70955 }, { "epoch": 0.6678588235294117, "grad_norm": 0.41377730231109905, "learning_rate": 2.447895560403488e-06, "loss": 0.012728594243526459, "step": 70960 }, { "epoch": 0.6679058823529411, "grad_norm": 0.39855995633915525, "learning_rate": 2.4478093216528345e-06, "loss": 0.01648086905479431, "step": 70965 }, { "epoch": 0.6679529411764706, "grad_norm": 0.5106844044838702, "learning_rate": 2.447723092016048e-06, "loss": 0.012596556544303894, "step": 70970 }, { "epoch": 0.668, "grad_norm": 0.49251732977674356, "learning_rate": 2.447636871491524e-06, "loss": 0.013279411196708679, "step": 70975 }, { "epoch": 0.6680470588235294, "grad_norm": 0.4718042160378299, "learning_rate": 2.4475506600776562e-06, "loss": 0.015384426712989807, "step": 70980 }, { "epoch": 0.6680941176470588, "grad_norm": 0.6582641093040903, "learning_rate": 2.447464457772842e-06, "loss": 0.016707678139209748, "step": 70985 }, { "epoch": 0.6681411764705882, "grad_norm": 0.3872108439784984, "learning_rate": 2.447378264575476e-06, "loss": 0.013989146053791045, "step": 70990 }, { "epoch": 0.6681882352941176, "grad_norm": 0.4964061610202009, "learning_rate": 2.4472920804839543e-06, "loss": 0.016589871048927306, "step": 70995 }, { "epoch": 0.668235294117647, "grad_norm": 0.8304101437933196, "learning_rate": 2.4472059054966753e-06, "loss": 0.015528267621994019, "step": 71000 }, { "epoch": 0.6682823529411764, "grad_norm": 0.39780023625115835, "learning_rate": 2.447119739612035e-06, "loss": 0.017966051399707795, "step": 71005 }, { "epoch": 0.6683294117647058, "grad_norm": 0.5696563015756352, "learning_rate": 2.4470335828284318e-06, "loss": 0.015234588086605072, "step": 71010 }, { "epoch": 0.6683764705882353, "grad_norm": 0.4031931486937114, "learning_rate": 2.4469474351442625e-06, "loss": 0.01935533881187439, "step": 71015 }, { "epoch": 0.6684235294117647, "grad_norm": 0.45165677904380874, "learning_rate": 2.4468612965579262e-06, "loss": 0.012894442677497864, "step": 71020 }, { "epoch": 0.6684705882352941, "grad_norm": 0.4304502308170352, "learning_rate": 2.4467751670678226e-06, "loss": 0.01362471580505371, "step": 71025 }, { "epoch": 0.6685176470588235, "grad_norm": 0.5071947321710951, "learning_rate": 2.4466890466723494e-06, "loss": 0.015272940695285796, "step": 71030 }, { "epoch": 0.668564705882353, "grad_norm": 0.5098043229258955, "learning_rate": 2.446602935369907e-06, "loss": 0.016302609443664552, "step": 71035 }, { "epoch": 0.6686117647058823, "grad_norm": 0.41931388135095726, "learning_rate": 2.4465168331588947e-06, "loss": 0.013308660686016082, "step": 71040 }, { "epoch": 0.6686588235294117, "grad_norm": 0.5228793846678925, "learning_rate": 2.4464307400377143e-06, "loss": 0.017710407078266144, "step": 71045 }, { "epoch": 0.6687058823529411, "grad_norm": 0.5234164477588914, "learning_rate": 2.4463446560047645e-06, "loss": 0.016519172489643096, "step": 71050 }, { "epoch": 0.6687529411764706, "grad_norm": 0.39375971695126266, "learning_rate": 2.4462585810584476e-06, "loss": 0.015280532836914062, "step": 71055 }, { "epoch": 0.6688, "grad_norm": 0.48529705497364284, "learning_rate": 2.446172515197165e-06, "loss": 0.015871959924697875, "step": 71060 }, { "epoch": 0.6688470588235295, "grad_norm": 0.4115716205963401, "learning_rate": 2.4460864584193194e-06, "loss": 0.014598684012889862, "step": 71065 }, { "epoch": 0.6688941176470589, "grad_norm": 0.43515175728941796, "learning_rate": 2.446000410723311e-06, "loss": 0.012852835655212402, "step": 71070 }, { "epoch": 0.6689411764705883, "grad_norm": 0.43275705782539353, "learning_rate": 2.445914372107545e-06, "loss": 0.012873733043670654, "step": 71075 }, { "epoch": 0.6689882352941177, "grad_norm": 0.35871487088734444, "learning_rate": 2.4458283425704234e-06, "loss": 0.011366897821426391, "step": 71080 }, { "epoch": 0.6690352941176471, "grad_norm": 0.9502630946284406, "learning_rate": 2.4457423221103495e-06, "loss": 0.017832615971565248, "step": 71085 }, { "epoch": 0.6690823529411765, "grad_norm": 0.5784726013217667, "learning_rate": 2.4456563107257275e-06, "loss": 0.021590176224708556, "step": 71090 }, { "epoch": 0.6691294117647059, "grad_norm": 0.5924731323439095, "learning_rate": 2.4455703084149617e-06, "loss": 0.015708430111408232, "step": 71095 }, { "epoch": 0.6691764705882353, "grad_norm": 0.30147014914103093, "learning_rate": 2.4454843151764565e-06, "loss": 0.01737259030342102, "step": 71100 }, { "epoch": 0.6692235294117647, "grad_norm": 0.4681294882435976, "learning_rate": 2.4453983310086176e-06, "loss": 0.018010711669921874, "step": 71105 }, { "epoch": 0.6692705882352942, "grad_norm": 0.4745941653345792, "learning_rate": 2.4453123559098495e-06, "loss": 0.015491573512554169, "step": 71110 }, { "epoch": 0.6693176470588236, "grad_norm": 0.5977801430198356, "learning_rate": 2.445226389878559e-06, "loss": 0.01546671986579895, "step": 71115 }, { "epoch": 0.669364705882353, "grad_norm": 0.833297813553578, "learning_rate": 2.445140432913152e-06, "loss": 0.013311943411827088, "step": 71120 }, { "epoch": 0.6694117647058824, "grad_norm": 0.5524305977590122, "learning_rate": 2.4450544850120343e-06, "loss": 0.019176959991455078, "step": 71125 }, { "epoch": 0.6694588235294118, "grad_norm": 0.4118792573931641, "learning_rate": 2.4449685461736146e-06, "loss": 0.012366696447134017, "step": 71130 }, { "epoch": 0.6695058823529412, "grad_norm": 0.5070143034811035, "learning_rate": 2.4448826163962993e-06, "loss": 0.014629915356636047, "step": 71135 }, { "epoch": 0.6695529411764706, "grad_norm": 0.4671938939044187, "learning_rate": 2.4447966956784965e-06, "loss": 0.013789542019367218, "step": 71140 }, { "epoch": 0.6696, "grad_norm": 0.9764854952962545, "learning_rate": 2.444710784018614e-06, "loss": 0.015769919753074645, "step": 71145 }, { "epoch": 0.6696470588235294, "grad_norm": 0.4097588658205982, "learning_rate": 2.444624881415061e-06, "loss": 0.013331353664398193, "step": 71150 }, { "epoch": 0.6696941176470588, "grad_norm": 0.48059879939753086, "learning_rate": 2.4445389878662468e-06, "loss": 0.016667543351650237, "step": 71155 }, { "epoch": 0.6697411764705883, "grad_norm": 0.5253630616385062, "learning_rate": 2.44445310337058e-06, "loss": 0.016619768738746644, "step": 71160 }, { "epoch": 0.6697882352941177, "grad_norm": 0.6142590164216585, "learning_rate": 2.44436722792647e-06, "loss": 0.014890903234481811, "step": 71165 }, { "epoch": 0.6698352941176471, "grad_norm": 0.573083431190597, "learning_rate": 2.444281361532328e-06, "loss": 0.017475885152816773, "step": 71170 }, { "epoch": 0.6698823529411765, "grad_norm": 0.6197819756570109, "learning_rate": 2.4441955041865645e-06, "loss": 0.015664294362068176, "step": 71175 }, { "epoch": 0.6699294117647059, "grad_norm": 0.521878590109036, "learning_rate": 2.4441096558875897e-06, "loss": 0.016002967953681946, "step": 71180 }, { "epoch": 0.6699764705882353, "grad_norm": 0.7009249690179269, "learning_rate": 2.4440238166338155e-06, "loss": 0.021258246898651124, "step": 71185 }, { "epoch": 0.6700235294117647, "grad_norm": 0.45678833015057185, "learning_rate": 2.443937986423653e-06, "loss": 0.016424430906772612, "step": 71190 }, { "epoch": 0.6700705882352941, "grad_norm": 0.5458219005557776, "learning_rate": 2.4438521652555156e-06, "loss": 0.020469167828559877, "step": 71195 }, { "epoch": 0.6701176470588235, "grad_norm": 0.4679448953353219, "learning_rate": 2.4437663531278143e-06, "loss": 0.015467086434364319, "step": 71200 }, { "epoch": 0.670164705882353, "grad_norm": 0.3410647651665617, "learning_rate": 2.443680550038963e-06, "loss": 0.014251884818077088, "step": 71205 }, { "epoch": 0.6702117647058824, "grad_norm": 0.4857342268907951, "learning_rate": 2.443594755987375e-06, "loss": 0.020995134115219118, "step": 71210 }, { "epoch": 0.6702588235294118, "grad_norm": 0.9098371149446556, "learning_rate": 2.443508970971464e-06, "loss": 0.016179078817367555, "step": 71215 }, { "epoch": 0.6703058823529412, "grad_norm": 0.5414967140728848, "learning_rate": 2.4434231949896426e-06, "loss": 0.012656739354133606, "step": 71220 }, { "epoch": 0.6703529411764706, "grad_norm": 0.6304255138519798, "learning_rate": 2.4433374280403275e-06, "loss": 0.018052342534065246, "step": 71225 }, { "epoch": 0.6704, "grad_norm": 0.722757948665633, "learning_rate": 2.443251670121932e-06, "loss": 0.016513291001319885, "step": 71230 }, { "epoch": 0.6704470588235294, "grad_norm": 0.3425149767356056, "learning_rate": 2.443165921232872e-06, "loss": 0.013547512888908386, "step": 71235 }, { "epoch": 0.6704941176470588, "grad_norm": 0.4011415529564289, "learning_rate": 2.443080181371563e-06, "loss": 0.013925750553607941, "step": 71240 }, { "epoch": 0.6705411764705882, "grad_norm": 0.5760241991902184, "learning_rate": 2.442994450536421e-06, "loss": 0.017507290840148924, "step": 71245 }, { "epoch": 0.6705882352941176, "grad_norm": 0.46405616082360024, "learning_rate": 2.442908728725862e-06, "loss": 0.01296597272157669, "step": 71250 }, { "epoch": 0.6706352941176471, "grad_norm": 0.45436857576798884, "learning_rate": 2.4428230159383044e-06, "loss": 0.014862772822380067, "step": 71255 }, { "epoch": 0.6706823529411765, "grad_norm": 0.5538659482623063, "learning_rate": 2.4427373121721634e-06, "loss": 0.014906279742717743, "step": 71260 }, { "epoch": 0.6707294117647059, "grad_norm": 0.3168115048409851, "learning_rate": 2.4426516174258577e-06, "loss": 0.016732171177864075, "step": 71265 }, { "epoch": 0.6707764705882353, "grad_norm": 0.6922716379837893, "learning_rate": 2.4425659316978047e-06, "loss": 0.01951175630092621, "step": 71270 }, { "epoch": 0.6708235294117647, "grad_norm": 0.7162863711091237, "learning_rate": 2.442480254986423e-06, "loss": 0.022489508986473082, "step": 71275 }, { "epoch": 0.6708705882352941, "grad_norm": 0.4638987438468507, "learning_rate": 2.4423945872901313e-06, "loss": 0.017954525351524354, "step": 71280 }, { "epoch": 0.6709176470588235, "grad_norm": 0.5062130372248776, "learning_rate": 2.442308928607349e-06, "loss": 0.015599623322486877, "step": 71285 }, { "epoch": 0.6709647058823529, "grad_norm": 0.6374803720288788, "learning_rate": 2.4422232789364958e-06, "loss": 0.01890823394060135, "step": 71290 }, { "epoch": 0.6710117647058823, "grad_norm": 0.5373758190407517, "learning_rate": 2.4421376382759903e-06, "loss": 0.01824519634246826, "step": 71295 }, { "epoch": 0.6710588235294118, "grad_norm": 0.8138302931005655, "learning_rate": 2.4420520066242544e-06, "loss": 0.019148650765419006, "step": 71300 }, { "epoch": 0.6711058823529412, "grad_norm": 0.50620366384161, "learning_rate": 2.441966383979708e-06, "loss": 0.014348673820495605, "step": 71305 }, { "epoch": 0.6711529411764706, "grad_norm": 0.5543979369135886, "learning_rate": 2.4418807703407717e-06, "loss": 0.015759290754795076, "step": 71310 }, { "epoch": 0.6712, "grad_norm": 0.41921110528454136, "learning_rate": 2.441795165705868e-06, "loss": 0.022478339076042176, "step": 71315 }, { "epoch": 0.6712470588235294, "grad_norm": 0.4438361047365948, "learning_rate": 2.4417095700734187e-06, "loss": 0.018415626883506776, "step": 71320 }, { "epoch": 0.6712941176470588, "grad_norm": 0.7299195608061761, "learning_rate": 2.4416239834418453e-06, "loss": 0.017800046503543852, "step": 71325 }, { "epoch": 0.6713411764705882, "grad_norm": 0.5620922981603426, "learning_rate": 2.4415384058095705e-06, "loss": 0.01583596467971802, "step": 71330 }, { "epoch": 0.6713882352941176, "grad_norm": 0.44575972482405535, "learning_rate": 2.4414528371750175e-06, "loss": 0.016415421664714814, "step": 71335 }, { "epoch": 0.671435294117647, "grad_norm": 0.5698988226553454, "learning_rate": 2.44136727753661e-06, "loss": 0.018717361986637114, "step": 71340 }, { "epoch": 0.6714823529411764, "grad_norm": 0.5949038653109424, "learning_rate": 2.4412817268927717e-06, "loss": 0.015615206956863404, "step": 71345 }, { "epoch": 0.6715294117647059, "grad_norm": 0.7053778990187031, "learning_rate": 2.4411961852419262e-06, "loss": 0.013830545544624328, "step": 71350 }, { "epoch": 0.6715764705882353, "grad_norm": 0.448031220273113, "learning_rate": 2.441110652582498e-06, "loss": 0.019104155898094177, "step": 71355 }, { "epoch": 0.6716235294117647, "grad_norm": 0.6467681231846636, "learning_rate": 2.4410251289129136e-06, "loss": 0.01497586965560913, "step": 71360 }, { "epoch": 0.6716705882352941, "grad_norm": 0.3807466824067753, "learning_rate": 2.4409396142315963e-06, "loss": 0.016074928641319274, "step": 71365 }, { "epoch": 0.6717176470588235, "grad_norm": 0.6094927141912336, "learning_rate": 2.4408541085369733e-06, "loss": 0.015187956392765045, "step": 71370 }, { "epoch": 0.6717647058823529, "grad_norm": 0.4263006869660596, "learning_rate": 2.4407686118274696e-06, "loss": 0.013294118642807006, "step": 71375 }, { "epoch": 0.6718117647058823, "grad_norm": 0.39705551024538066, "learning_rate": 2.4406831241015126e-06, "loss": 0.015069393813610077, "step": 71380 }, { "epoch": 0.6718588235294117, "grad_norm": 0.34937648268943966, "learning_rate": 2.4405976453575286e-06, "loss": 0.01767147481441498, "step": 71385 }, { "epoch": 0.6719058823529411, "grad_norm": 0.3648161233218594, "learning_rate": 2.4405121755939446e-06, "loss": 0.02129364013671875, "step": 71390 }, { "epoch": 0.6719529411764706, "grad_norm": 0.42313108131137334, "learning_rate": 2.4404267148091893e-06, "loss": 0.019916167855262755, "step": 71395 }, { "epoch": 0.672, "grad_norm": 0.3959504652172179, "learning_rate": 2.44034126300169e-06, "loss": 0.019398012757301332, "step": 71400 }, { "epoch": 0.6720470588235294, "grad_norm": 0.44222639685370635, "learning_rate": 2.440255820169875e-06, "loss": 0.015454863011837006, "step": 71405 }, { "epoch": 0.6720941176470588, "grad_norm": 0.3417073262862264, "learning_rate": 2.440170386312173e-06, "loss": 0.012287583947181702, "step": 71410 }, { "epoch": 0.6721411764705882, "grad_norm": 0.41384416063114543, "learning_rate": 2.440084961427014e-06, "loss": 0.01325037181377411, "step": 71415 }, { "epoch": 0.6721882352941176, "grad_norm": 0.4590492815070545, "learning_rate": 2.439999545512827e-06, "loss": 0.012407612800598145, "step": 71420 }, { "epoch": 0.672235294117647, "grad_norm": 0.5709366207235725, "learning_rate": 2.4399141385680423e-06, "loss": 0.015695370733737946, "step": 71425 }, { "epoch": 0.6722823529411764, "grad_norm": 0.6839308721019074, "learning_rate": 2.439828740591089e-06, "loss": 0.013873034715652465, "step": 71430 }, { "epoch": 0.6723294117647058, "grad_norm": 0.6521402686499678, "learning_rate": 2.4397433515803996e-06, "loss": 0.013113388419151306, "step": 71435 }, { "epoch": 0.6723764705882352, "grad_norm": 0.5997780580596519, "learning_rate": 2.439657971534404e-06, "loss": 0.014525079727172851, "step": 71440 }, { "epoch": 0.6724235294117648, "grad_norm": 0.2844531773161714, "learning_rate": 2.4395726004515342e-06, "loss": 0.012375500053167343, "step": 71445 }, { "epoch": 0.6724705882352942, "grad_norm": 0.29360077378186694, "learning_rate": 2.4394872383302215e-06, "loss": 0.01598256826400757, "step": 71450 }, { "epoch": 0.6725176470588236, "grad_norm": 0.49342647038507575, "learning_rate": 2.439401885168899e-06, "loss": 0.012442061305046081, "step": 71455 }, { "epoch": 0.672564705882353, "grad_norm": 0.6667315591351198, "learning_rate": 2.439316540965999e-06, "loss": 0.014686849713325501, "step": 71460 }, { "epoch": 0.6726117647058824, "grad_norm": 0.4279529874537745, "learning_rate": 2.439231205719954e-06, "loss": 0.015660178661346436, "step": 71465 }, { "epoch": 0.6726588235294118, "grad_norm": 0.4327208371555002, "learning_rate": 2.4391458794291985e-06, "loss": 0.016426488757133484, "step": 71470 }, { "epoch": 0.6727058823529412, "grad_norm": 0.4847260075924296, "learning_rate": 2.439060562092165e-06, "loss": 0.014940285682678222, "step": 71475 }, { "epoch": 0.6727529411764706, "grad_norm": 0.6432585585744294, "learning_rate": 2.4389752537072892e-06, "loss": 0.017785386741161348, "step": 71480 }, { "epoch": 0.6728, "grad_norm": 0.4517465411081693, "learning_rate": 2.4388899542730036e-06, "loss": 0.015073543787002564, "step": 71485 }, { "epoch": 0.6728470588235295, "grad_norm": 0.6971999275747002, "learning_rate": 2.4388046637877452e-06, "loss": 0.01812998056411743, "step": 71490 }, { "epoch": 0.6728941176470589, "grad_norm": 0.4748030412315832, "learning_rate": 2.4387193822499482e-06, "loss": 0.015470334887504577, "step": 71495 }, { "epoch": 0.6729411764705883, "grad_norm": 0.4902113305055271, "learning_rate": 2.438634109658049e-06, "loss": 0.017544910311698914, "step": 71500 }, { "epoch": 0.6729882352941177, "grad_norm": 0.7766148211368125, "learning_rate": 2.4385488460104824e-06, "loss": 0.01488228738307953, "step": 71505 }, { "epoch": 0.6730352941176471, "grad_norm": 0.3613125180963212, "learning_rate": 2.4384635913056863e-06, "loss": 0.01426757574081421, "step": 71510 }, { "epoch": 0.6730823529411765, "grad_norm": 0.5317778441669503, "learning_rate": 2.438378345542097e-06, "loss": 0.018334272503852844, "step": 71515 }, { "epoch": 0.6731294117647059, "grad_norm": 0.5517647034282581, "learning_rate": 2.438293108718152e-06, "loss": 0.01977025866508484, "step": 71520 }, { "epoch": 0.6731764705882353, "grad_norm": 0.5915225094499729, "learning_rate": 2.438207880832288e-06, "loss": 0.014610904455184936, "step": 71525 }, { "epoch": 0.6732235294117647, "grad_norm": 0.3609127428319065, "learning_rate": 2.438122661882944e-06, "loss": 0.012702369689941406, "step": 71530 }, { "epoch": 0.6732705882352941, "grad_norm": 0.31818633120023654, "learning_rate": 2.438037451868558e-06, "loss": 0.014857414364814758, "step": 71535 }, { "epoch": 0.6733176470588236, "grad_norm": 0.36383502155200914, "learning_rate": 2.437952250787569e-06, "loss": 0.015187418460845948, "step": 71540 }, { "epoch": 0.673364705882353, "grad_norm": 0.49944674791276084, "learning_rate": 2.4378670586384156e-06, "loss": 0.017892584204673767, "step": 71545 }, { "epoch": 0.6734117647058824, "grad_norm": 0.6393006437711305, "learning_rate": 2.437781875419538e-06, "loss": 0.015116611123085022, "step": 71550 }, { "epoch": 0.6734588235294118, "grad_norm": 0.5866561633780053, "learning_rate": 2.4376967011293756e-06, "loss": 0.013846594095230102, "step": 71555 }, { "epoch": 0.6735058823529412, "grad_norm": 0.4009755660001907, "learning_rate": 2.437611535766369e-06, "loss": 0.011764968186616898, "step": 71560 }, { "epoch": 0.6735529411764706, "grad_norm": 0.4625842977746926, "learning_rate": 2.4375263793289593e-06, "loss": 0.012591959536075592, "step": 71565 }, { "epoch": 0.6736, "grad_norm": 0.6013714405125379, "learning_rate": 2.4374412318155863e-06, "loss": 0.01496361494064331, "step": 71570 }, { "epoch": 0.6736470588235294, "grad_norm": 0.29685947257125883, "learning_rate": 2.4373560932246925e-06, "loss": 0.013200661540031433, "step": 71575 }, { "epoch": 0.6736941176470588, "grad_norm": 0.3395595510029312, "learning_rate": 2.437270963554719e-06, "loss": 0.014908075332641602, "step": 71580 }, { "epoch": 0.6737411764705883, "grad_norm": 0.44726691563189025, "learning_rate": 2.437185842804109e-06, "loss": 0.01495538353919983, "step": 71585 }, { "epoch": 0.6737882352941177, "grad_norm": 0.4301565710715324, "learning_rate": 2.4371007309713048e-06, "loss": 0.01298833042383194, "step": 71590 }, { "epoch": 0.6738352941176471, "grad_norm": 0.3936230475015882, "learning_rate": 2.437015628054748e-06, "loss": 0.018551379442214966, "step": 71595 }, { "epoch": 0.6738823529411765, "grad_norm": 0.3550529506799665, "learning_rate": 2.436930534052883e-06, "loss": 0.013064560294151307, "step": 71600 }, { "epoch": 0.6739294117647059, "grad_norm": 0.42680911547533, "learning_rate": 2.4368454489641545e-06, "loss": 0.01513727307319641, "step": 71605 }, { "epoch": 0.6739764705882353, "grad_norm": 0.6777346688188604, "learning_rate": 2.436760372787005e-06, "loss": 0.014790883660316468, "step": 71610 }, { "epoch": 0.6740235294117647, "grad_norm": 0.5313807266638377, "learning_rate": 2.43667530551988e-06, "loss": 0.01869822442531586, "step": 71615 }, { "epoch": 0.6740705882352941, "grad_norm": 0.4208309932817489, "learning_rate": 2.4365902471612234e-06, "loss": 0.01545485109090805, "step": 71620 }, { "epoch": 0.6741176470588235, "grad_norm": 0.48492312204327137, "learning_rate": 2.4365051977094816e-06, "loss": 0.014967253804206848, "step": 71625 }, { "epoch": 0.6741647058823529, "grad_norm": 0.4244223311028797, "learning_rate": 2.4364201571630994e-06, "loss": 0.014530584216117859, "step": 71630 }, { "epoch": 0.6742117647058824, "grad_norm": 0.47291344946440983, "learning_rate": 2.436335125520523e-06, "loss": 0.014715522527694702, "step": 71635 }, { "epoch": 0.6742588235294118, "grad_norm": 0.6575236250508647, "learning_rate": 2.436250102780199e-06, "loss": 0.017010967433452606, "step": 71640 }, { "epoch": 0.6743058823529412, "grad_norm": 0.5025317050192079, "learning_rate": 2.436165088940574e-06, "loss": 0.016332730650901794, "step": 71645 }, { "epoch": 0.6743529411764706, "grad_norm": 0.483762735685294, "learning_rate": 2.436080084000095e-06, "loss": 0.011918462067842483, "step": 71650 }, { "epoch": 0.6744, "grad_norm": 0.575547594494753, "learning_rate": 2.435995087957209e-06, "loss": 0.019076192378997804, "step": 71655 }, { "epoch": 0.6744470588235294, "grad_norm": 0.5616014767986421, "learning_rate": 2.4359101008103654e-06, "loss": 0.014449834823608398, "step": 71660 }, { "epoch": 0.6744941176470588, "grad_norm": 0.30178855105862656, "learning_rate": 2.4358251225580117e-06, "loss": 0.018809618055820466, "step": 71665 }, { "epoch": 0.6745411764705882, "grad_norm": 0.5194783615116721, "learning_rate": 2.435740153198596e-06, "loss": 0.010343918949365616, "step": 71670 }, { "epoch": 0.6745882352941176, "grad_norm": 0.8639240143826775, "learning_rate": 2.435655192730568e-06, "loss": 0.027559465169906615, "step": 71675 }, { "epoch": 0.6746352941176471, "grad_norm": 0.516608772507236, "learning_rate": 2.435570241152377e-06, "loss": 0.0169353723526001, "step": 71680 }, { "epoch": 0.6746823529411765, "grad_norm": 0.682406012637816, "learning_rate": 2.435485298462473e-06, "loss": 0.013671237230300903, "step": 71685 }, { "epoch": 0.6747294117647059, "grad_norm": 0.46646154989966127, "learning_rate": 2.435400364659306e-06, "loss": 0.014231494069099427, "step": 71690 }, { "epoch": 0.6747764705882353, "grad_norm": 0.6068596983034806, "learning_rate": 2.4353154397413258e-06, "loss": 0.016100004315376282, "step": 71695 }, { "epoch": 0.6748235294117647, "grad_norm": 0.36347132370500307, "learning_rate": 2.4352305237069844e-06, "loss": 0.016385182738304138, "step": 71700 }, { "epoch": 0.6748705882352941, "grad_norm": 0.4447547100453227, "learning_rate": 2.4351456165547325e-06, "loss": 0.019435583055019377, "step": 71705 }, { "epoch": 0.6749176470588235, "grad_norm": 0.38585675247432866, "learning_rate": 2.4350607182830227e-06, "loss": 0.014723475277423858, "step": 71710 }, { "epoch": 0.6749647058823529, "grad_norm": 0.43178486146039086, "learning_rate": 2.4349758288903053e-06, "loss": 0.014606510102748872, "step": 71715 }, { "epoch": 0.6750117647058823, "grad_norm": 0.6476454881350071, "learning_rate": 2.4348909483750344e-06, "loss": 0.017232105135917664, "step": 71720 }, { "epoch": 0.6750588235294117, "grad_norm": 0.4331410132632995, "learning_rate": 2.434806076735662e-06, "loss": 0.01645488440990448, "step": 71725 }, { "epoch": 0.6751058823529412, "grad_norm": 0.3920661121684855, "learning_rate": 2.4347212139706417e-06, "loss": 0.01549125909805298, "step": 71730 }, { "epoch": 0.6751529411764706, "grad_norm": 0.45108187393284155, "learning_rate": 2.434636360078427e-06, "loss": 0.013454969227313995, "step": 71735 }, { "epoch": 0.6752, "grad_norm": 0.6447072090256349, "learning_rate": 2.4345515150574715e-06, "loss": 0.014863450825214387, "step": 71740 }, { "epoch": 0.6752470588235294, "grad_norm": 0.5169178479327229, "learning_rate": 2.4344666789062297e-06, "loss": 0.01556878536939621, "step": 71745 }, { "epoch": 0.6752941176470588, "grad_norm": 0.3217949182364328, "learning_rate": 2.434381851623157e-06, "loss": 0.013797834515571594, "step": 71750 }, { "epoch": 0.6753411764705882, "grad_norm": 0.5427700463005712, "learning_rate": 2.4342970332067072e-06, "loss": 0.015467536449432374, "step": 71755 }, { "epoch": 0.6753882352941176, "grad_norm": 0.5777713942185092, "learning_rate": 2.4342122236553366e-06, "loss": 0.015261578559875488, "step": 71760 }, { "epoch": 0.675435294117647, "grad_norm": 0.6071386328092897, "learning_rate": 2.4341274229675012e-06, "loss": 0.013635995984077453, "step": 71765 }, { "epoch": 0.6754823529411764, "grad_norm": 0.3959674123507592, "learning_rate": 2.434042631141656e-06, "loss": 0.013964584469795227, "step": 71770 }, { "epoch": 0.6755294117647059, "grad_norm": 0.538725934383709, "learning_rate": 2.433957848176259e-06, "loss": 0.014211863279342651, "step": 71775 }, { "epoch": 0.6755764705882353, "grad_norm": 0.7593873081931835, "learning_rate": 2.4338730740697668e-06, "loss": 0.016693729162216186, "step": 71780 }, { "epoch": 0.6756235294117647, "grad_norm": 0.42713502430607964, "learning_rate": 2.4337883088206367e-06, "loss": 0.01876755952835083, "step": 71785 }, { "epoch": 0.6756705882352941, "grad_norm": 0.602394479512441, "learning_rate": 2.4337035524273254e-06, "loss": 0.01801244169473648, "step": 71790 }, { "epoch": 0.6757176470588235, "grad_norm": 0.4526905241531513, "learning_rate": 2.4336188048882927e-06, "loss": 0.017006421089172365, "step": 71795 }, { "epoch": 0.6757647058823529, "grad_norm": 0.5516026211084618, "learning_rate": 2.4335340662019955e-06, "loss": 0.01626213490962982, "step": 71800 }, { "epoch": 0.6758117647058823, "grad_norm": 0.7059215214038911, "learning_rate": 2.4334493363668937e-06, "loss": 0.020497629046440126, "step": 71805 }, { "epoch": 0.6758588235294117, "grad_norm": 0.6753365501359108, "learning_rate": 2.433364615381447e-06, "loss": 0.016522470116615295, "step": 71810 }, { "epoch": 0.6759058823529411, "grad_norm": 0.3452817531591618, "learning_rate": 2.4332799032441133e-06, "loss": 0.01498909592628479, "step": 71815 }, { "epoch": 0.6759529411764705, "grad_norm": 0.5183056161190881, "learning_rate": 2.4331951999533533e-06, "loss": 0.014874204993247986, "step": 71820 }, { "epoch": 0.676, "grad_norm": 0.6191009593813049, "learning_rate": 2.433110505507628e-06, "loss": 0.04124631583690643, "step": 71825 }, { "epoch": 0.6760470588235294, "grad_norm": 0.3438147207438626, "learning_rate": 2.4330258199053973e-06, "loss": 0.014619770646095275, "step": 71830 }, { "epoch": 0.6760941176470588, "grad_norm": 0.5390988198940396, "learning_rate": 2.4329411431451233e-06, "loss": 0.016371898353099823, "step": 71835 }, { "epoch": 0.6761411764705882, "grad_norm": 0.6010242390875734, "learning_rate": 2.4328564752252665e-06, "loss": 0.013278990983963013, "step": 71840 }, { "epoch": 0.6761882352941176, "grad_norm": 0.5937999504705607, "learning_rate": 2.4327718161442883e-06, "loss": 0.01575402319431305, "step": 71845 }, { "epoch": 0.676235294117647, "grad_norm": 0.48883872103852505, "learning_rate": 2.432687165900652e-06, "loss": 0.017705196142196657, "step": 71850 }, { "epoch": 0.6762823529411764, "grad_norm": 0.4577669781798281, "learning_rate": 2.4326025244928208e-06, "loss": 0.013168299198150634, "step": 71855 }, { "epoch": 0.6763294117647058, "grad_norm": 0.6871851838512638, "learning_rate": 2.432517891919256e-06, "loss": 0.017486909031867982, "step": 71860 }, { "epoch": 0.6763764705882352, "grad_norm": 0.45916839937323944, "learning_rate": 2.4324332681784213e-06, "loss": 0.01186390146613121, "step": 71865 }, { "epoch": 0.6764235294117648, "grad_norm": 0.7714345818275659, "learning_rate": 2.4323486532687815e-06, "loss": 0.017152765393257143, "step": 71870 }, { "epoch": 0.6764705882352942, "grad_norm": 0.49963504298032785, "learning_rate": 2.4322640471887995e-06, "loss": 0.015283988416194915, "step": 71875 }, { "epoch": 0.6765176470588236, "grad_norm": 0.4849332162151186, "learning_rate": 2.4321794499369405e-06, "loss": 0.016826166212558745, "step": 71880 }, { "epoch": 0.676564705882353, "grad_norm": 0.6662895647086919, "learning_rate": 2.4320948615116687e-06, "loss": 0.017048278450965883, "step": 71885 }, { "epoch": 0.6766117647058824, "grad_norm": 0.5711868549094113, "learning_rate": 2.4320102819114503e-06, "loss": 0.012735161185264587, "step": 71890 }, { "epoch": 0.6766588235294118, "grad_norm": 0.29237513465454623, "learning_rate": 2.4319257111347504e-06, "loss": 0.013779032230377197, "step": 71895 }, { "epoch": 0.6767058823529412, "grad_norm": 0.36931265849181966, "learning_rate": 2.431841149180034e-06, "loss": 0.019491469860076903, "step": 71900 }, { "epoch": 0.6767529411764706, "grad_norm": 0.273462423383279, "learning_rate": 2.431756596045769e-06, "loss": 0.012676481902599335, "step": 71905 }, { "epoch": 0.6768, "grad_norm": 0.3155417900701289, "learning_rate": 2.4316720517304214e-06, "loss": 0.01426592916250229, "step": 71910 }, { "epoch": 0.6768470588235294, "grad_norm": 0.45319708628135613, "learning_rate": 2.431587516232458e-06, "loss": 0.01385856866836548, "step": 71915 }, { "epoch": 0.6768941176470589, "grad_norm": 0.42029412069128713, "learning_rate": 2.4315029895503466e-06, "loss": 0.015374885499477386, "step": 71920 }, { "epoch": 0.6769411764705883, "grad_norm": 0.5494978025774662, "learning_rate": 2.431418471682555e-06, "loss": 0.015309612452983856, "step": 71925 }, { "epoch": 0.6769882352941177, "grad_norm": 0.6349888132575862, "learning_rate": 2.4313339626275515e-06, "loss": 0.017693831026554106, "step": 71930 }, { "epoch": 0.6770352941176471, "grad_norm": 0.6379093515323779, "learning_rate": 2.431249462383804e-06, "loss": 0.019699013233184813, "step": 71935 }, { "epoch": 0.6770823529411765, "grad_norm": 0.6686565005433024, "learning_rate": 2.431164970949782e-06, "loss": 0.016225162148475646, "step": 71940 }, { "epoch": 0.6771294117647059, "grad_norm": 0.49461496502689745, "learning_rate": 2.4310804883239552e-06, "loss": 0.01774125248193741, "step": 71945 }, { "epoch": 0.6771764705882353, "grad_norm": 0.47620653550476816, "learning_rate": 2.4309960145047934e-06, "loss": 0.017412793636322022, "step": 71950 }, { "epoch": 0.6772235294117647, "grad_norm": 0.6918546053325779, "learning_rate": 2.4309115494907656e-06, "loss": 0.017894163727760315, "step": 71955 }, { "epoch": 0.6772705882352941, "grad_norm": 0.40592599488328357, "learning_rate": 2.430827093280342e-06, "loss": 0.016071194410324098, "step": 71960 }, { "epoch": 0.6773176470588236, "grad_norm": 0.6836686918164445, "learning_rate": 2.4307426458719953e-06, "loss": 0.016585573554039, "step": 71965 }, { "epoch": 0.677364705882353, "grad_norm": 0.42606169743001565, "learning_rate": 2.430658207264195e-06, "loss": 0.014701735973358155, "step": 71970 }, { "epoch": 0.6774117647058824, "grad_norm": 0.40659623642687226, "learning_rate": 2.4305737774554133e-06, "loss": 0.0168560266494751, "step": 71975 }, { "epoch": 0.6774588235294118, "grad_norm": 0.41744895618979416, "learning_rate": 2.430489356444122e-06, "loss": 0.014583709836006164, "step": 71980 }, { "epoch": 0.6775058823529412, "grad_norm": 0.5557260550636384, "learning_rate": 2.430404944228793e-06, "loss": 0.01735132485628128, "step": 71985 }, { "epoch": 0.6775529411764706, "grad_norm": 0.7635659951492564, "learning_rate": 2.4303205408079003e-06, "loss": 0.01601037085056305, "step": 71990 }, { "epoch": 0.6776, "grad_norm": 0.6276929953803312, "learning_rate": 2.430236146179915e-06, "loss": 0.0184745728969574, "step": 71995 }, { "epoch": 0.6776470588235294, "grad_norm": 0.4002091898945942, "learning_rate": 2.4301517603433126e-06, "loss": 0.014507327973842622, "step": 72000 }, { "epoch": 0.6776941176470588, "grad_norm": 0.47550581161485933, "learning_rate": 2.4300673832965646e-06, "loss": 0.017934396862983704, "step": 72005 }, { "epoch": 0.6777411764705882, "grad_norm": 0.3978187632965385, "learning_rate": 2.4299830150381478e-06, "loss": 0.012320411950349807, "step": 72010 }, { "epoch": 0.6777882352941177, "grad_norm": 0.4585741077122147, "learning_rate": 2.429898655566534e-06, "loss": 0.013126438856124878, "step": 72015 }, { "epoch": 0.6778352941176471, "grad_norm": 0.5376976536930351, "learning_rate": 2.4298143048802e-06, "loss": 0.013745243847370147, "step": 72020 }, { "epoch": 0.6778823529411765, "grad_norm": 0.3961196394510639, "learning_rate": 2.42972996297762e-06, "loss": 0.01731412410736084, "step": 72025 }, { "epoch": 0.6779294117647059, "grad_norm": 0.47894732658317146, "learning_rate": 2.4296456298572704e-06, "loss": 0.014033171534538268, "step": 72030 }, { "epoch": 0.6779764705882353, "grad_norm": 0.6683529349538461, "learning_rate": 2.429561305517626e-06, "loss": 0.015305456519126893, "step": 72035 }, { "epoch": 0.6780235294117647, "grad_norm": 0.6888072839129908, "learning_rate": 2.429476989957165e-06, "loss": 0.017706330120563506, "step": 72040 }, { "epoch": 0.6780705882352941, "grad_norm": 0.3562207851167815, "learning_rate": 2.4293926831743632e-06, "loss": 0.012556664645671844, "step": 72045 }, { "epoch": 0.6781176470588235, "grad_norm": 0.3963810351818351, "learning_rate": 2.4293083851676973e-06, "loss": 0.01258561909198761, "step": 72050 }, { "epoch": 0.6781647058823529, "grad_norm": 0.38891249346659407, "learning_rate": 2.4292240959356447e-06, "loss": 0.01658930480480194, "step": 72055 }, { "epoch": 0.6782117647058824, "grad_norm": 0.5901962550893862, "learning_rate": 2.429139815476684e-06, "loss": 0.017211544513702392, "step": 72060 }, { "epoch": 0.6782588235294118, "grad_norm": 0.49580713665156817, "learning_rate": 2.4290555437892936e-06, "loss": 0.014259126782417298, "step": 72065 }, { "epoch": 0.6783058823529412, "grad_norm": 0.4404736443912268, "learning_rate": 2.4289712808719513e-06, "loss": 0.014087656140327453, "step": 72070 }, { "epoch": 0.6783529411764706, "grad_norm": 0.4735402850546153, "learning_rate": 2.428887026723136e-06, "loss": 0.016387686133384705, "step": 72075 }, { "epoch": 0.6784, "grad_norm": 0.5806132518541924, "learning_rate": 2.4288027813413278e-06, "loss": 0.013840940594673157, "step": 72080 }, { "epoch": 0.6784470588235294, "grad_norm": 0.45468543248872334, "learning_rate": 2.428718544725006e-06, "loss": 0.015909643471241, "step": 72085 }, { "epoch": 0.6784941176470588, "grad_norm": 0.3925419054662409, "learning_rate": 2.42863431687265e-06, "loss": 0.015752813220024107, "step": 72090 }, { "epoch": 0.6785411764705882, "grad_norm": 0.7475811226763992, "learning_rate": 2.4285500977827415e-06, "loss": 0.017617705464363097, "step": 72095 }, { "epoch": 0.6785882352941176, "grad_norm": 0.4089299603095732, "learning_rate": 2.4284658874537608e-06, "loss": 0.016319169104099272, "step": 72100 }, { "epoch": 0.6786352941176471, "grad_norm": 0.31088927504690234, "learning_rate": 2.428381685884189e-06, "loss": 0.014978727698326111, "step": 72105 }, { "epoch": 0.6786823529411765, "grad_norm": 0.7690482632942947, "learning_rate": 2.428297493072507e-06, "loss": 0.020442259311676026, "step": 72110 }, { "epoch": 0.6787294117647059, "grad_norm": 0.5708885989784906, "learning_rate": 2.428213309017198e-06, "loss": 0.020205584168434144, "step": 72115 }, { "epoch": 0.6787764705882353, "grad_norm": 0.4756719854125238, "learning_rate": 2.428129133716743e-06, "loss": 0.016579103469848634, "step": 72120 }, { "epoch": 0.6788235294117647, "grad_norm": 0.5563328279718717, "learning_rate": 2.4280449671696256e-06, "loss": 0.016800656914711, "step": 72125 }, { "epoch": 0.6788705882352941, "grad_norm": 0.4858601817797482, "learning_rate": 2.4279608093743283e-06, "loss": 0.016735786199569704, "step": 72130 }, { "epoch": 0.6789176470588235, "grad_norm": 0.4526362885569555, "learning_rate": 2.427876660329335e-06, "loss": 0.01710890531539917, "step": 72135 }, { "epoch": 0.6789647058823529, "grad_norm": 0.5362305745461492, "learning_rate": 2.427792520033129e-06, "loss": 0.014733457565307617, "step": 72140 }, { "epoch": 0.6790117647058823, "grad_norm": 1.4217562509402455, "learning_rate": 2.427708388484194e-06, "loss": 0.015031617879867554, "step": 72145 }, { "epoch": 0.6790588235294117, "grad_norm": 0.4978458736559269, "learning_rate": 2.427624265681016e-06, "loss": 0.013869483768939973, "step": 72150 }, { "epoch": 0.6791058823529412, "grad_norm": 0.5161080439542937, "learning_rate": 2.427540151622078e-06, "loss": 0.017742979526519775, "step": 72155 }, { "epoch": 0.6791529411764706, "grad_norm": 0.5087671591293705, "learning_rate": 2.427456046305866e-06, "loss": 0.01650124490261078, "step": 72160 }, { "epoch": 0.6792, "grad_norm": 0.504861446101303, "learning_rate": 2.427371949730866e-06, "loss": 0.012739361822605133, "step": 72165 }, { "epoch": 0.6792470588235294, "grad_norm": 0.53928421384467, "learning_rate": 2.4272878618955632e-06, "loss": 0.014266720414161682, "step": 72170 }, { "epoch": 0.6792941176470588, "grad_norm": 0.49894184267407987, "learning_rate": 2.427203782798445e-06, "loss": 0.015096020698547364, "step": 72175 }, { "epoch": 0.6793411764705882, "grad_norm": 0.4875544573111013, "learning_rate": 2.4271197124379973e-06, "loss": 0.018624866008758546, "step": 72180 }, { "epoch": 0.6793882352941176, "grad_norm": 0.33293601305365966, "learning_rate": 2.4270356508127067e-06, "loss": 0.012799419462680817, "step": 72185 }, { "epoch": 0.679435294117647, "grad_norm": 0.4863409291564014, "learning_rate": 2.426951597921062e-06, "loss": 0.017284578084945677, "step": 72190 }, { "epoch": 0.6794823529411764, "grad_norm": 0.46771694737366926, "learning_rate": 2.42686755376155e-06, "loss": 0.016168557107448578, "step": 72195 }, { "epoch": 0.6795294117647059, "grad_norm": 0.5065733991200695, "learning_rate": 2.426783518332659e-06, "loss": 0.01409098356962204, "step": 72200 }, { "epoch": 0.6795764705882353, "grad_norm": 0.5158617608606435, "learning_rate": 2.426699491632877e-06, "loss": 0.015963870286941528, "step": 72205 }, { "epoch": 0.6796235294117647, "grad_norm": 0.4459235136142093, "learning_rate": 2.426615473660694e-06, "loss": 0.016388064622879027, "step": 72210 }, { "epoch": 0.6796705882352941, "grad_norm": 0.5347489992033984, "learning_rate": 2.4265314644145987e-06, "loss": 0.01650187224149704, "step": 72215 }, { "epoch": 0.6797176470588235, "grad_norm": 0.5759758509057354, "learning_rate": 2.426447463893081e-06, "loss": 0.017475320398807524, "step": 72220 }, { "epoch": 0.6797647058823529, "grad_norm": 0.6099935520888814, "learning_rate": 2.4263634720946303e-06, "loss": 0.014780683815479279, "step": 72225 }, { "epoch": 0.6798117647058823, "grad_norm": 0.32982631717501865, "learning_rate": 2.4262794890177373e-06, "loss": 0.015738895535469054, "step": 72230 }, { "epoch": 0.6798588235294117, "grad_norm": 0.4755178141425139, "learning_rate": 2.426195514660893e-06, "loss": 0.01853162944316864, "step": 72235 }, { "epoch": 0.6799058823529411, "grad_norm": 0.4438129266597426, "learning_rate": 2.4261115490225876e-06, "loss": 0.012125655263662338, "step": 72240 }, { "epoch": 0.6799529411764705, "grad_norm": 0.5267927376818968, "learning_rate": 2.4260275921013135e-06, "loss": 0.0181542307138443, "step": 72245 }, { "epoch": 0.68, "grad_norm": 0.6141311869372208, "learning_rate": 2.4259436438955624e-06, "loss": 0.015098540484905243, "step": 72250 }, { "epoch": 0.6800470588235294, "grad_norm": 0.5312831476348966, "learning_rate": 2.4258597044038262e-06, "loss": 0.016932286322116852, "step": 72255 }, { "epoch": 0.6800941176470588, "grad_norm": 0.3545753151001789, "learning_rate": 2.4257757736245973e-06, "loss": 0.01811288595199585, "step": 72260 }, { "epoch": 0.6801411764705882, "grad_norm": 0.4284316046871388, "learning_rate": 2.4256918515563686e-06, "loss": 0.013231401145458222, "step": 72265 }, { "epoch": 0.6801882352941176, "grad_norm": 0.3770993377185371, "learning_rate": 2.4256079381976343e-06, "loss": 0.012901897728443145, "step": 72270 }, { "epoch": 0.680235294117647, "grad_norm": 0.45881123126824275, "learning_rate": 2.425524033546887e-06, "loss": 0.013234159350395203, "step": 72275 }, { "epoch": 0.6802823529411764, "grad_norm": 0.3788967938280473, "learning_rate": 2.4254401376026202e-06, "loss": 0.015256455540657044, "step": 72280 }, { "epoch": 0.6803294117647058, "grad_norm": 0.495099354128962, "learning_rate": 2.4253562503633297e-06, "loss": 0.012997539341449737, "step": 72285 }, { "epoch": 0.6803764705882352, "grad_norm": 0.4290997531554013, "learning_rate": 2.4252723718275102e-06, "loss": 0.016890569031238555, "step": 72290 }, { "epoch": 0.6804235294117648, "grad_norm": 0.444781811830521, "learning_rate": 2.4251885019936554e-06, "loss": 0.018694210052490234, "step": 72295 }, { "epoch": 0.6804705882352942, "grad_norm": 0.22194987677152764, "learning_rate": 2.4251046408602617e-06, "loss": 0.013047230243682862, "step": 72300 }, { "epoch": 0.6805176470588236, "grad_norm": 0.485038712516463, "learning_rate": 2.4250207884258256e-06, "loss": 0.013547362387180328, "step": 72305 }, { "epoch": 0.680564705882353, "grad_norm": 0.4764987381229941, "learning_rate": 2.424936944688842e-06, "loss": 0.011870793998241425, "step": 72310 }, { "epoch": 0.6806117647058824, "grad_norm": 0.6562662538432725, "learning_rate": 2.4248531096478077e-06, "loss": 0.018715211749076845, "step": 72315 }, { "epoch": 0.6806588235294118, "grad_norm": 0.483011427979628, "learning_rate": 2.4247692833012203e-06, "loss": 0.018744750320911406, "step": 72320 }, { "epoch": 0.6807058823529412, "grad_norm": 0.5182488192350251, "learning_rate": 2.424685465647576e-06, "loss": 0.015254095196723938, "step": 72325 }, { "epoch": 0.6807529411764706, "grad_norm": 0.33879153487508606, "learning_rate": 2.4246016566853743e-06, "loss": 0.010965666174888611, "step": 72330 }, { "epoch": 0.6808, "grad_norm": 0.6322644593810455, "learning_rate": 2.424517856413111e-06, "loss": 0.01697836220264435, "step": 72335 }, { "epoch": 0.6808470588235294, "grad_norm": 0.45716627736702414, "learning_rate": 2.4244340648292863e-06, "loss": 0.013699004054069519, "step": 72340 }, { "epoch": 0.6808941176470589, "grad_norm": 0.2814954012048793, "learning_rate": 2.424350281932398e-06, "loss": 0.015156401693820954, "step": 72345 }, { "epoch": 0.6809411764705883, "grad_norm": 0.7199607216811771, "learning_rate": 2.424266507720945e-06, "loss": 0.015474662184715271, "step": 72350 }, { "epoch": 0.6809882352941177, "grad_norm": 0.5282077767825349, "learning_rate": 2.424182742193427e-06, "loss": 0.012563887238502502, "step": 72355 }, { "epoch": 0.6810352941176471, "grad_norm": 0.46836759809627787, "learning_rate": 2.4240989853483442e-06, "loss": 0.018733960390090943, "step": 72360 }, { "epoch": 0.6810823529411765, "grad_norm": 0.6228952051729251, "learning_rate": 2.4240152371841967e-06, "loss": 0.01303824782371521, "step": 72365 }, { "epoch": 0.6811294117647059, "grad_norm": 0.5182208740660649, "learning_rate": 2.4239314976994845e-06, "loss": 0.013829188048839569, "step": 72370 }, { "epoch": 0.6811764705882353, "grad_norm": 0.45126995861689134, "learning_rate": 2.4238477668927086e-06, "loss": 0.014508485794067383, "step": 72375 }, { "epoch": 0.6812235294117647, "grad_norm": 0.49009519187957956, "learning_rate": 2.4237640447623716e-06, "loss": 0.015286709368228912, "step": 72380 }, { "epoch": 0.6812705882352941, "grad_norm": 0.3552128986959431, "learning_rate": 2.4236803313069737e-06, "loss": 0.01843467354774475, "step": 72385 }, { "epoch": 0.6813176470588236, "grad_norm": 0.5895603856119451, "learning_rate": 2.4235966265250167e-06, "loss": 0.01575944721698761, "step": 72390 }, { "epoch": 0.681364705882353, "grad_norm": 0.5977390879210732, "learning_rate": 2.4235129304150045e-06, "loss": 0.019515575468540193, "step": 72395 }, { "epoch": 0.6814117647058824, "grad_norm": 0.5627208024364841, "learning_rate": 2.423429242975438e-06, "loss": 0.016165699064731597, "step": 72400 }, { "epoch": 0.6814588235294118, "grad_norm": 0.5948931346088903, "learning_rate": 2.423345564204822e-06, "loss": 0.014372697472572327, "step": 72405 }, { "epoch": 0.6815058823529412, "grad_norm": 0.3506302069517331, "learning_rate": 2.4232618941016586e-06, "loss": 0.016100142896175385, "step": 72410 }, { "epoch": 0.6815529411764706, "grad_norm": 0.3465990561067949, "learning_rate": 2.423178232664452e-06, "loss": 0.014779268205165863, "step": 72415 }, { "epoch": 0.6816, "grad_norm": 0.464580287813459, "learning_rate": 2.4230945798917072e-06, "loss": 0.015154649317264558, "step": 72420 }, { "epoch": 0.6816470588235294, "grad_norm": 0.7143690022204475, "learning_rate": 2.4230109357819278e-06, "loss": 0.015600818395614623, "step": 72425 }, { "epoch": 0.6816941176470588, "grad_norm": 0.6227438294318062, "learning_rate": 2.4229273003336184e-06, "loss": 0.020857104659080507, "step": 72430 }, { "epoch": 0.6817411764705882, "grad_norm": 0.6484782027681703, "learning_rate": 2.422843673545285e-06, "loss": 0.019240443408489228, "step": 72435 }, { "epoch": 0.6817882352941177, "grad_norm": 0.4298815777575166, "learning_rate": 2.4227600554154336e-06, "loss": 0.012272722274065017, "step": 72440 }, { "epoch": 0.6818352941176471, "grad_norm": 0.7223406701612618, "learning_rate": 2.4226764459425695e-06, "loss": 0.015037059783935547, "step": 72445 }, { "epoch": 0.6818823529411765, "grad_norm": 0.47094498198303847, "learning_rate": 2.422592845125199e-06, "loss": 0.01603062152862549, "step": 72450 }, { "epoch": 0.6819294117647059, "grad_norm": 0.5151862834786731, "learning_rate": 2.422509252961829e-06, "loss": 0.01551891714334488, "step": 72455 }, { "epoch": 0.6819764705882353, "grad_norm": 0.564874402612442, "learning_rate": 2.4224256694509664e-06, "loss": 0.014400957524776459, "step": 72460 }, { "epoch": 0.6820235294117647, "grad_norm": 0.5528875511812625, "learning_rate": 2.422342094591118e-06, "loss": 0.022650013864040374, "step": 72465 }, { "epoch": 0.6820705882352941, "grad_norm": 0.6075835495864667, "learning_rate": 2.4222585283807934e-06, "loss": 0.014020983874797822, "step": 72470 }, { "epoch": 0.6821176470588235, "grad_norm": 0.3634965278277438, "learning_rate": 2.4221749708184993e-06, "loss": 0.01579132378101349, "step": 72475 }, { "epoch": 0.6821647058823529, "grad_norm": 0.3723139960533229, "learning_rate": 2.4220914219027446e-06, "loss": 0.017992471158504487, "step": 72480 }, { "epoch": 0.6822117647058824, "grad_norm": 0.4127892504441258, "learning_rate": 2.4220078816320377e-06, "loss": 0.0166913241147995, "step": 72485 }, { "epoch": 0.6822588235294118, "grad_norm": 0.39554872437668975, "learning_rate": 2.4219243500048887e-06, "loss": 0.013591240346431731, "step": 72490 }, { "epoch": 0.6823058823529412, "grad_norm": 0.5883380264528545, "learning_rate": 2.4218408270198064e-06, "loss": 0.013094918429851532, "step": 72495 }, { "epoch": 0.6823529411764706, "grad_norm": 0.642987898977256, "learning_rate": 2.4217573126753012e-06, "loss": 0.02084055244922638, "step": 72500 }, { "epoch": 0.6824, "grad_norm": 0.6750802301927658, "learning_rate": 2.421673806969883e-06, "loss": 0.017145712673664094, "step": 72505 }, { "epoch": 0.6824470588235294, "grad_norm": 0.47405095849167234, "learning_rate": 2.4215903099020635e-06, "loss": 0.016834141314029695, "step": 72510 }, { "epoch": 0.6824941176470588, "grad_norm": 0.9353032049891151, "learning_rate": 2.4215068214703524e-06, "loss": 0.013371835649013519, "step": 72515 }, { "epoch": 0.6825411764705882, "grad_norm": 0.4071431291561113, "learning_rate": 2.421423341673262e-06, "loss": 0.018048274517059325, "step": 72520 }, { "epoch": 0.6825882352941176, "grad_norm": 0.3882854355709748, "learning_rate": 2.421339870509303e-06, "loss": 0.015835504233837127, "step": 72525 }, { "epoch": 0.682635294117647, "grad_norm": 0.3720938685188097, "learning_rate": 2.4212564079769885e-06, "loss": 0.015997448563575746, "step": 72530 }, { "epoch": 0.6826823529411765, "grad_norm": 0.5147605569141839, "learning_rate": 2.421172954074831e-06, "loss": 0.01651313155889511, "step": 72535 }, { "epoch": 0.6827294117647059, "grad_norm": 0.3124995808316034, "learning_rate": 2.4210895088013423e-06, "loss": 0.013285905122756958, "step": 72540 }, { "epoch": 0.6827764705882353, "grad_norm": 0.7071163642181905, "learning_rate": 2.4210060721550365e-06, "loss": 0.014853820204734802, "step": 72545 }, { "epoch": 0.6828235294117647, "grad_norm": 0.6157957602672786, "learning_rate": 2.4209226441344266e-06, "loss": 0.012901110947132111, "step": 72550 }, { "epoch": 0.6828705882352941, "grad_norm": 0.6124822793156854, "learning_rate": 2.420839224738027e-06, "loss": 0.012700748443603516, "step": 72555 }, { "epoch": 0.6829176470588235, "grad_norm": 0.39009222873140487, "learning_rate": 2.420755813964352e-06, "loss": 0.01747354567050934, "step": 72560 }, { "epoch": 0.6829647058823529, "grad_norm": 0.4108875192646385, "learning_rate": 2.4206724118119153e-06, "loss": 0.019915691018104552, "step": 72565 }, { "epoch": 0.6830117647058823, "grad_norm": 0.4496564673934835, "learning_rate": 2.420589018279233e-06, "loss": 0.014797386527061463, "step": 72570 }, { "epoch": 0.6830588235294117, "grad_norm": 0.4980692098185546, "learning_rate": 2.4205056333648196e-06, "loss": 0.014273086190223694, "step": 72575 }, { "epoch": 0.6831058823529412, "grad_norm": 0.5661161238502587, "learning_rate": 2.4204222570671908e-06, "loss": 0.018596732616424562, "step": 72580 }, { "epoch": 0.6831529411764706, "grad_norm": 0.45027299806356574, "learning_rate": 2.4203388893848637e-06, "loss": 0.016034623980522154, "step": 72585 }, { "epoch": 0.6832, "grad_norm": 0.4859634458209103, "learning_rate": 2.4202555303163534e-06, "loss": 0.011635634303092956, "step": 72590 }, { "epoch": 0.6832470588235294, "grad_norm": 0.40789799575267577, "learning_rate": 2.420172179860177e-06, "loss": 0.011452654004096985, "step": 72595 }, { "epoch": 0.6832941176470588, "grad_norm": 0.45388470340969644, "learning_rate": 2.420088838014852e-06, "loss": 0.018654114007949828, "step": 72600 }, { "epoch": 0.6833411764705882, "grad_norm": 0.5632588539480112, "learning_rate": 2.4200055047788953e-06, "loss": 0.01436786949634552, "step": 72605 }, { "epoch": 0.6833882352941176, "grad_norm": 0.3421631849345492, "learning_rate": 2.419922180150826e-06, "loss": 0.01559440940618515, "step": 72610 }, { "epoch": 0.683435294117647, "grad_norm": 0.5110688803172404, "learning_rate": 2.4198388641291607e-06, "loss": 0.01746225655078888, "step": 72615 }, { "epoch": 0.6834823529411764, "grad_norm": 0.6323265344678805, "learning_rate": 2.4197555567124185e-06, "loss": 0.01922789365053177, "step": 72620 }, { "epoch": 0.6835294117647058, "grad_norm": 0.463643579879004, "learning_rate": 2.4196722578991186e-06, "loss": 0.012574008107185364, "step": 72625 }, { "epoch": 0.6835764705882353, "grad_norm": 0.660439057889447, "learning_rate": 2.4195889676877806e-06, "loss": 0.017479929327964782, "step": 72630 }, { "epoch": 0.6836235294117647, "grad_norm": 0.5777637366780558, "learning_rate": 2.4195056860769224e-06, "loss": 0.015956267714500427, "step": 72635 }, { "epoch": 0.6836705882352941, "grad_norm": 0.4173604410752816, "learning_rate": 2.419422413065066e-06, "loss": 0.014365015923976899, "step": 72640 }, { "epoch": 0.6837176470588235, "grad_norm": 0.4188070634245309, "learning_rate": 2.4193391486507305e-06, "loss": 0.01438467800617218, "step": 72645 }, { "epoch": 0.6837647058823529, "grad_norm": 0.6960891054746157, "learning_rate": 2.4192558928324377e-06, "loss": 0.016857604682445525, "step": 72650 }, { "epoch": 0.6838117647058823, "grad_norm": 0.5370128208428091, "learning_rate": 2.419172645608707e-06, "loss": 0.01938346177339554, "step": 72655 }, { "epoch": 0.6838588235294117, "grad_norm": 0.4617345311801702, "learning_rate": 2.419089406978061e-06, "loss": 0.013039204478263854, "step": 72660 }, { "epoch": 0.6839058823529411, "grad_norm": 0.3951412405537272, "learning_rate": 2.4190061769390216e-06, "loss": 0.015583430230617524, "step": 72665 }, { "epoch": 0.6839529411764705, "grad_norm": 0.2944104530695273, "learning_rate": 2.41892295549011e-06, "loss": 0.0149298757314682, "step": 72670 }, { "epoch": 0.684, "grad_norm": 0.5178731960899599, "learning_rate": 2.4188397426298495e-06, "loss": 0.01742272675037384, "step": 72675 }, { "epoch": 0.6840470588235295, "grad_norm": 0.4249646900279039, "learning_rate": 2.418756538356762e-06, "loss": 0.01600489765405655, "step": 72680 }, { "epoch": 0.6840941176470589, "grad_norm": 0.3032689852503038, "learning_rate": 2.418673342669372e-06, "loss": 0.017015255987644196, "step": 72685 }, { "epoch": 0.6841411764705883, "grad_norm": 0.498151042657637, "learning_rate": 2.4185901555662017e-06, "loss": 0.021120166778564452, "step": 72690 }, { "epoch": 0.6841882352941177, "grad_norm": 0.5394233882999077, "learning_rate": 2.418506977045775e-06, "loss": 0.01474599540233612, "step": 72695 }, { "epoch": 0.684235294117647, "grad_norm": 0.5782068440101858, "learning_rate": 2.4184238071066174e-06, "loss": 0.012045849859714509, "step": 72700 }, { "epoch": 0.6842823529411765, "grad_norm": 0.3441587423285628, "learning_rate": 2.418340645747253e-06, "loss": 0.01301557719707489, "step": 72705 }, { "epoch": 0.6843294117647059, "grad_norm": 0.5761429059507557, "learning_rate": 2.418257492966206e-06, "loss": 0.01712419241666794, "step": 72710 }, { "epoch": 0.6843764705882353, "grad_norm": 0.3832804496781502, "learning_rate": 2.418174348762003e-06, "loss": 0.0140327587723732, "step": 72715 }, { "epoch": 0.6844235294117647, "grad_norm": 0.6599983687371583, "learning_rate": 2.4180912131331684e-06, "loss": 0.01661413013935089, "step": 72720 }, { "epoch": 0.6844705882352942, "grad_norm": 0.4632584005759307, "learning_rate": 2.418008086078228e-06, "loss": 0.017172828316688538, "step": 72725 }, { "epoch": 0.6845176470588236, "grad_norm": 0.6501644354457369, "learning_rate": 2.4179249675957096e-06, "loss": 0.013407181203365325, "step": 72730 }, { "epoch": 0.684564705882353, "grad_norm": 0.41426380018617265, "learning_rate": 2.4178418576841393e-06, "loss": 0.017310786247253417, "step": 72735 }, { "epoch": 0.6846117647058824, "grad_norm": 0.7904805232841848, "learning_rate": 2.417758756342044e-06, "loss": 0.012548121809959411, "step": 72740 }, { "epoch": 0.6846588235294118, "grad_norm": 0.5209759493158276, "learning_rate": 2.4176756635679513e-06, "loss": 0.016400361061096193, "step": 72745 }, { "epoch": 0.6847058823529412, "grad_norm": 0.5781397798166475, "learning_rate": 2.417592579360389e-06, "loss": 0.012442909181118011, "step": 72750 }, { "epoch": 0.6847529411764706, "grad_norm": 0.6002035333711752, "learning_rate": 2.4175095037178846e-06, "loss": 0.017203590273857115, "step": 72755 }, { "epoch": 0.6848, "grad_norm": 0.3486782429849118, "learning_rate": 2.417426436638968e-06, "loss": 0.012294486165046692, "step": 72760 }, { "epoch": 0.6848470588235294, "grad_norm": 0.589136190367426, "learning_rate": 2.417343378122166e-06, "loss": 0.018535301089286804, "step": 72765 }, { "epoch": 0.6848941176470589, "grad_norm": 0.4149631989587185, "learning_rate": 2.41726032816601e-06, "loss": 0.01848033368587494, "step": 72770 }, { "epoch": 0.6849411764705883, "grad_norm": 0.8165734159054199, "learning_rate": 2.4171772867690283e-06, "loss": 0.014915885031223297, "step": 72775 }, { "epoch": 0.6849882352941177, "grad_norm": 0.584165752953369, "learning_rate": 2.417094253929751e-06, "loss": 0.016610154509544374, "step": 72780 }, { "epoch": 0.6850352941176471, "grad_norm": 0.6680328490934537, "learning_rate": 2.4170112296467087e-06, "loss": 0.019297027587890626, "step": 72785 }, { "epoch": 0.6850823529411765, "grad_norm": 0.4175558235700339, "learning_rate": 2.4169282139184317e-06, "loss": 0.01259695142507553, "step": 72790 }, { "epoch": 0.6851294117647059, "grad_norm": 0.42176052836987904, "learning_rate": 2.4168452067434507e-06, "loss": 0.01044028177857399, "step": 72795 }, { "epoch": 0.6851764705882353, "grad_norm": 0.6439925642357696, "learning_rate": 2.416762208120298e-06, "loss": 0.017439962923526765, "step": 72800 }, { "epoch": 0.6852235294117647, "grad_norm": 0.5583922437116366, "learning_rate": 2.416679218047504e-06, "loss": 0.015711264312267305, "step": 72805 }, { "epoch": 0.6852705882352941, "grad_norm": 0.5759742531372279, "learning_rate": 2.4165962365236017e-06, "loss": 0.013536393642425537, "step": 72810 }, { "epoch": 0.6853176470588235, "grad_norm": 0.5163084621820138, "learning_rate": 2.416513263547123e-06, "loss": 0.01489802747964859, "step": 72815 }, { "epoch": 0.685364705882353, "grad_norm": 0.5387697318687468, "learning_rate": 2.4164302991166015e-06, "loss": 0.012785682082176208, "step": 72820 }, { "epoch": 0.6854117647058824, "grad_norm": 0.7721250067885047, "learning_rate": 2.416347343230569e-06, "loss": 0.01352630853652954, "step": 72825 }, { "epoch": 0.6854588235294118, "grad_norm": 0.488749409470584, "learning_rate": 2.4162643958875594e-06, "loss": 0.014678031206130981, "step": 72830 }, { "epoch": 0.6855058823529412, "grad_norm": 0.4786051569519169, "learning_rate": 2.416181457086107e-06, "loss": 0.016207385063171386, "step": 72835 }, { "epoch": 0.6855529411764706, "grad_norm": 0.46617159538654784, "learning_rate": 2.4160985268247454e-06, "loss": 0.01641368567943573, "step": 72840 }, { "epoch": 0.6856, "grad_norm": 0.5940553696275988, "learning_rate": 2.416015605102009e-06, "loss": 0.016155877709388734, "step": 72845 }, { "epoch": 0.6856470588235294, "grad_norm": 0.7125711704552276, "learning_rate": 2.4159326919164326e-06, "loss": 0.017901507019996644, "step": 72850 }, { "epoch": 0.6856941176470588, "grad_norm": 0.40282770981859917, "learning_rate": 2.415849787266553e-06, "loss": 0.014835655689239502, "step": 72855 }, { "epoch": 0.6857411764705882, "grad_norm": 0.5720608284775485, "learning_rate": 2.4157668911509037e-06, "loss": 0.016138830780982973, "step": 72860 }, { "epoch": 0.6857882352941177, "grad_norm": 0.5306394285156313, "learning_rate": 2.415684003568021e-06, "loss": 0.013806699216365815, "step": 72865 }, { "epoch": 0.6858352941176471, "grad_norm": 0.7474377878841936, "learning_rate": 2.415601124516442e-06, "loss": 0.01222369372844696, "step": 72870 }, { "epoch": 0.6858823529411765, "grad_norm": 0.6094814417013912, "learning_rate": 2.415518253994703e-06, "loss": 0.01234893947839737, "step": 72875 }, { "epoch": 0.6859294117647059, "grad_norm": 0.6000584615533215, "learning_rate": 2.4154353920013407e-06, "loss": 0.013720467686653137, "step": 72880 }, { "epoch": 0.6859764705882353, "grad_norm": 0.552682286789484, "learning_rate": 2.4153525385348926e-06, "loss": 0.017758411169052125, "step": 72885 }, { "epoch": 0.6860235294117647, "grad_norm": 0.5126147966733008, "learning_rate": 2.415269693593896e-06, "loss": 0.013231107592582702, "step": 72890 }, { "epoch": 0.6860705882352941, "grad_norm": 0.32724694726052506, "learning_rate": 2.4151868571768895e-06, "loss": 0.0114255391061306, "step": 72895 }, { "epoch": 0.6861176470588235, "grad_norm": 0.45503738725597626, "learning_rate": 2.4151040292824106e-06, "loss": 0.016008022427558898, "step": 72900 }, { "epoch": 0.6861647058823529, "grad_norm": 0.6260737444045218, "learning_rate": 2.415021209908999e-06, "loss": 0.013468015193939208, "step": 72905 }, { "epoch": 0.6862117647058823, "grad_norm": 0.31815675251905207, "learning_rate": 2.4149383990551935e-06, "loss": 0.015590746700763703, "step": 72910 }, { "epoch": 0.6862588235294118, "grad_norm": 0.5660044152085828, "learning_rate": 2.4148555967195335e-06, "loss": 0.016683760285377502, "step": 72915 }, { "epoch": 0.6863058823529412, "grad_norm": 0.6995732648014766, "learning_rate": 2.414772802900558e-06, "loss": 0.015570352971553802, "step": 72920 }, { "epoch": 0.6863529411764706, "grad_norm": 0.45773064039729505, "learning_rate": 2.4146900175968084e-06, "loss": 0.012682068347930908, "step": 72925 }, { "epoch": 0.6864, "grad_norm": 0.5950250632339978, "learning_rate": 2.414607240806824e-06, "loss": 0.015328001976013184, "step": 72930 }, { "epoch": 0.6864470588235294, "grad_norm": 0.5458037900599009, "learning_rate": 2.414524472529147e-06, "loss": 0.01466851532459259, "step": 72935 }, { "epoch": 0.6864941176470588, "grad_norm": 0.5606442731497147, "learning_rate": 2.4144417127623168e-06, "loss": 0.015930472314357756, "step": 72940 }, { "epoch": 0.6865411764705882, "grad_norm": 0.6818028372983864, "learning_rate": 2.4143589615048763e-06, "loss": 0.013269826769828796, "step": 72945 }, { "epoch": 0.6865882352941176, "grad_norm": 0.39395730024566605, "learning_rate": 2.414276218755367e-06, "loss": 0.013433113694190979, "step": 72950 }, { "epoch": 0.686635294117647, "grad_norm": 0.5467711383520166, "learning_rate": 2.41419348451233e-06, "loss": 0.01717871129512787, "step": 72955 }, { "epoch": 0.6866823529411765, "grad_norm": 0.43269250957896843, "learning_rate": 2.4141107587743103e-06, "loss": 0.01654860973358154, "step": 72960 }, { "epoch": 0.6867294117647059, "grad_norm": 0.4828894399165333, "learning_rate": 2.414028041539849e-06, "loss": 0.016937808692455293, "step": 72965 }, { "epoch": 0.6867764705882353, "grad_norm": 0.42145170416405864, "learning_rate": 2.413945332807489e-06, "loss": 0.010794932395219803, "step": 72970 }, { "epoch": 0.6868235294117647, "grad_norm": 0.8716106589613626, "learning_rate": 2.4138626325757756e-06, "loss": 0.021671250462532043, "step": 72975 }, { "epoch": 0.6868705882352941, "grad_norm": 0.45254580567933284, "learning_rate": 2.4137799408432515e-06, "loss": 0.011923380196094513, "step": 72980 }, { "epoch": 0.6869176470588235, "grad_norm": 0.38281168105763563, "learning_rate": 2.4136972576084614e-06, "loss": 0.01828780919313431, "step": 72985 }, { "epoch": 0.6869647058823529, "grad_norm": 0.5081098457843372, "learning_rate": 2.4136145828699497e-06, "loss": 0.017008158564567565, "step": 72990 }, { "epoch": 0.6870117647058823, "grad_norm": 0.6207056808114586, "learning_rate": 2.4135319166262614e-06, "loss": 0.017834541201591492, "step": 72995 }, { "epoch": 0.6870588235294117, "grad_norm": 0.3878040726836461, "learning_rate": 2.4134492588759433e-06, "loss": 0.014690592885017395, "step": 73000 }, { "epoch": 0.6871058823529411, "grad_norm": 0.39822358275158853, "learning_rate": 2.413366609617539e-06, "loss": 0.01352282464504242, "step": 73005 }, { "epoch": 0.6871529411764706, "grad_norm": 0.4283640521370385, "learning_rate": 2.4132839688495958e-06, "loss": 0.013591177761554718, "step": 73010 }, { "epoch": 0.6872, "grad_norm": 0.644015614524003, "learning_rate": 2.4132013365706594e-06, "loss": 0.0179006427526474, "step": 73015 }, { "epoch": 0.6872470588235294, "grad_norm": 0.3578637574281738, "learning_rate": 2.4131187127792774e-06, "loss": 0.014753755927085877, "step": 73020 }, { "epoch": 0.6872941176470588, "grad_norm": 0.6160629582443463, "learning_rate": 2.4130360974739962e-06, "loss": 0.020409037172794343, "step": 73025 }, { "epoch": 0.6873411764705882, "grad_norm": 0.505282289534217, "learning_rate": 2.412953490653364e-06, "loss": 0.013505500555038453, "step": 73030 }, { "epoch": 0.6873882352941176, "grad_norm": 0.4053754032606423, "learning_rate": 2.4128708923159273e-06, "loss": 0.01645611524581909, "step": 73035 }, { "epoch": 0.687435294117647, "grad_norm": 0.5088480767836936, "learning_rate": 2.412788302460236e-06, "loss": 0.015819558501243593, "step": 73040 }, { "epoch": 0.6874823529411764, "grad_norm": 0.4719317487095614, "learning_rate": 2.412705721084837e-06, "loss": 0.01604001075029373, "step": 73045 }, { "epoch": 0.6875294117647058, "grad_norm": 0.28604782106402354, "learning_rate": 2.41262314818828e-06, "loss": 0.014092804491519928, "step": 73050 }, { "epoch": 0.6875764705882353, "grad_norm": 0.389240549897776, "learning_rate": 2.4125405837691137e-06, "loss": 0.014891429245471955, "step": 73055 }, { "epoch": 0.6876235294117647, "grad_norm": 1.71462921003465, "learning_rate": 2.4124580278258877e-06, "loss": 0.017064619064331054, "step": 73060 }, { "epoch": 0.6876705882352941, "grad_norm": 0.46471581766871206, "learning_rate": 2.4123754803571527e-06, "loss": 0.01928311288356781, "step": 73065 }, { "epoch": 0.6877176470588235, "grad_norm": 0.575808787242833, "learning_rate": 2.412292941361458e-06, "loss": 0.01610337197780609, "step": 73070 }, { "epoch": 0.687764705882353, "grad_norm": 0.3551688528187013, "learning_rate": 2.412210410837355e-06, "loss": 0.013554385304450989, "step": 73075 }, { "epoch": 0.6878117647058823, "grad_norm": 0.32755971771935544, "learning_rate": 2.412127888783394e-06, "loss": 0.016031137108802794, "step": 73080 }, { "epoch": 0.6878588235294117, "grad_norm": 0.39011681527118075, "learning_rate": 2.412045375198126e-06, "loss": 0.014650100469589233, "step": 73085 }, { "epoch": 0.6879058823529411, "grad_norm": 0.49593529962536015, "learning_rate": 2.4119628700801036e-06, "loss": 0.013719463348388672, "step": 73090 }, { "epoch": 0.6879529411764705, "grad_norm": 0.368733615056022, "learning_rate": 2.411880373427878e-06, "loss": 0.016355617344379424, "step": 73095 }, { "epoch": 0.688, "grad_norm": 0.41799201329232555, "learning_rate": 2.4117978852400015e-06, "loss": 0.0188685804605484, "step": 73100 }, { "epoch": 0.6880470588235295, "grad_norm": 0.45924250433952496, "learning_rate": 2.411715405515027e-06, "loss": 0.01689416468143463, "step": 73105 }, { "epoch": 0.6880941176470589, "grad_norm": 0.5132671399385699, "learning_rate": 2.4116329342515075e-06, "loss": 0.017358700931072234, "step": 73110 }, { "epoch": 0.6881411764705883, "grad_norm": 0.6549794517726846, "learning_rate": 2.411550471447997e-06, "loss": 0.015333864092826843, "step": 73115 }, { "epoch": 0.6881882352941177, "grad_norm": 0.5879404501985799, "learning_rate": 2.4114680171030484e-06, "loss": 0.010028742998838425, "step": 73120 }, { "epoch": 0.6882352941176471, "grad_norm": 0.42534386786963246, "learning_rate": 2.4113855712152156e-06, "loss": 0.015323363244533539, "step": 73125 }, { "epoch": 0.6882823529411765, "grad_norm": 0.6091998189430567, "learning_rate": 2.4113031337830535e-06, "loss": 0.015470707416534423, "step": 73130 }, { "epoch": 0.6883294117647059, "grad_norm": 0.4517569874419785, "learning_rate": 2.4112207048051167e-06, "loss": 0.01630414426326752, "step": 73135 }, { "epoch": 0.6883764705882353, "grad_norm": 0.3304583001023307, "learning_rate": 2.41113828427996e-06, "loss": 0.018929296731948854, "step": 73140 }, { "epoch": 0.6884235294117647, "grad_norm": 0.5891299231128178, "learning_rate": 2.4110558722061387e-06, "loss": 0.0178900808095932, "step": 73145 }, { "epoch": 0.6884705882352942, "grad_norm": 0.32746178698073247, "learning_rate": 2.4109734685822096e-06, "loss": 0.01296958327293396, "step": 73150 }, { "epoch": 0.6885176470588236, "grad_norm": 0.50416658750482, "learning_rate": 2.410891073406728e-06, "loss": 0.01596542149782181, "step": 73155 }, { "epoch": 0.688564705882353, "grad_norm": 0.4226420263355766, "learning_rate": 2.410808686678251e-06, "loss": 0.016371358931064606, "step": 73160 }, { "epoch": 0.6886117647058824, "grad_norm": 0.4615205641970932, "learning_rate": 2.410726308395334e-06, "loss": 0.01722456216812134, "step": 73165 }, { "epoch": 0.6886588235294118, "grad_norm": 0.5012952206014715, "learning_rate": 2.4106439385565356e-06, "loss": 0.013902854919433594, "step": 73170 }, { "epoch": 0.6887058823529412, "grad_norm": 0.3888100065338352, "learning_rate": 2.410561577160413e-06, "loss": 0.015780678391456603, "step": 73175 }, { "epoch": 0.6887529411764706, "grad_norm": 0.4112356207820298, "learning_rate": 2.410479224205523e-06, "loss": 0.013006940484046936, "step": 73180 }, { "epoch": 0.6888, "grad_norm": 0.4964835884563695, "learning_rate": 2.4103968796904255e-06, "loss": 0.015707376599311828, "step": 73185 }, { "epoch": 0.6888470588235294, "grad_norm": 0.7349262978757899, "learning_rate": 2.410314543613678e-06, "loss": 0.018461841344833373, "step": 73190 }, { "epoch": 0.6888941176470589, "grad_norm": 0.6006394125656725, "learning_rate": 2.410232215973839e-06, "loss": 0.012288369238376617, "step": 73195 }, { "epoch": 0.6889411764705883, "grad_norm": 0.5461889469208208, "learning_rate": 2.410149896769469e-06, "loss": 0.015633025765419008, "step": 73200 }, { "epoch": 0.6889882352941177, "grad_norm": 0.7083919910169247, "learning_rate": 2.4100675859991267e-06, "loss": 0.013701820373535156, "step": 73205 }, { "epoch": 0.6890352941176471, "grad_norm": 0.8220835704771058, "learning_rate": 2.4099852836613717e-06, "loss": 0.017565572261810304, "step": 73210 }, { "epoch": 0.6890823529411765, "grad_norm": 0.4521056032531468, "learning_rate": 2.4099029897547654e-06, "loss": 0.013762524724006653, "step": 73215 }, { "epoch": 0.6891294117647059, "grad_norm": 0.6817063829023625, "learning_rate": 2.409820704277867e-06, "loss": 0.017037448287010194, "step": 73220 }, { "epoch": 0.6891764705882353, "grad_norm": 0.3722675449075396, "learning_rate": 2.409738427229239e-06, "loss": 0.01378726214170456, "step": 73225 }, { "epoch": 0.6892235294117647, "grad_norm": 0.3437059294532203, "learning_rate": 2.409656158607442e-06, "loss": 0.01660914421081543, "step": 73230 }, { "epoch": 0.6892705882352941, "grad_norm": 0.5598064588122154, "learning_rate": 2.409573898411037e-06, "loss": 0.01649535298347473, "step": 73235 }, { "epoch": 0.6893176470588235, "grad_norm": 0.8115470785241293, "learning_rate": 2.4094916466385866e-06, "loss": 0.01944529712200165, "step": 73240 }, { "epoch": 0.689364705882353, "grad_norm": 0.6501995825182454, "learning_rate": 2.4094094032886524e-06, "loss": 0.018909773230552672, "step": 73245 }, { "epoch": 0.6894117647058824, "grad_norm": 0.6813522801185621, "learning_rate": 2.4093271683597987e-06, "loss": 0.015031151473522186, "step": 73250 }, { "epoch": 0.6894588235294118, "grad_norm": 0.44583511208698456, "learning_rate": 2.409244941850587e-06, "loss": 0.015920159220695496, "step": 73255 }, { "epoch": 0.6895058823529412, "grad_norm": 0.4768824339332928, "learning_rate": 2.409162723759581e-06, "loss": 0.018420852720737457, "step": 73260 }, { "epoch": 0.6895529411764706, "grad_norm": 0.6991870576573183, "learning_rate": 2.4090805140853447e-06, "loss": 0.01486380398273468, "step": 73265 }, { "epoch": 0.6896, "grad_norm": 0.5843499355890202, "learning_rate": 2.4089983128264424e-06, "loss": 0.018394768238067627, "step": 73270 }, { "epoch": 0.6896470588235294, "grad_norm": 0.3350745804429933, "learning_rate": 2.4089161199814372e-06, "loss": 0.01570487767457962, "step": 73275 }, { "epoch": 0.6896941176470588, "grad_norm": 0.4925827937581858, "learning_rate": 2.4088339355488957e-06, "loss": 0.01609877645969391, "step": 73280 }, { "epoch": 0.6897411764705882, "grad_norm": 0.4174134517664331, "learning_rate": 2.408751759527381e-06, "loss": 0.014557990431785583, "step": 73285 }, { "epoch": 0.6897882352941177, "grad_norm": 0.47911142118038164, "learning_rate": 2.4086695919154603e-06, "loss": 0.012833766639232635, "step": 73290 }, { "epoch": 0.6898352941176471, "grad_norm": 0.23466688814113904, "learning_rate": 2.4085874327116977e-06, "loss": 0.013430353999137879, "step": 73295 }, { "epoch": 0.6898823529411765, "grad_norm": 0.4028083624526335, "learning_rate": 2.408505281914661e-06, "loss": 0.011721939593553544, "step": 73300 }, { "epoch": 0.6899294117647059, "grad_norm": 0.5189835129791368, "learning_rate": 2.408423139522915e-06, "loss": 0.015634170174598692, "step": 73305 }, { "epoch": 0.6899764705882353, "grad_norm": 0.8239131779621393, "learning_rate": 2.408341005535028e-06, "loss": 0.01440250277519226, "step": 73310 }, { "epoch": 0.6900235294117647, "grad_norm": 0.5000281089849096, "learning_rate": 2.4082588799495653e-06, "loss": 0.017544659972190856, "step": 73315 }, { "epoch": 0.6900705882352941, "grad_norm": 0.39577366095438093, "learning_rate": 2.4081767627650963e-06, "loss": 0.013636425137519836, "step": 73320 }, { "epoch": 0.6901176470588235, "grad_norm": 0.7272136045610919, "learning_rate": 2.4080946539801876e-06, "loss": 0.019328278303146363, "step": 73325 }, { "epoch": 0.6901647058823529, "grad_norm": 0.36176024539912804, "learning_rate": 2.4080125535934077e-06, "loss": 0.017090356349945067, "step": 73330 }, { "epoch": 0.6902117647058823, "grad_norm": 0.5005046493216168, "learning_rate": 2.407930461603325e-06, "loss": 0.015181399881839752, "step": 73335 }, { "epoch": 0.6902588235294118, "grad_norm": 0.4210719215424145, "learning_rate": 2.4078483780085084e-06, "loss": 0.018413010239601135, "step": 73340 }, { "epoch": 0.6903058823529412, "grad_norm": 0.42845541961470235, "learning_rate": 2.407766302807527e-06, "loss": 0.015942150354385377, "step": 73345 }, { "epoch": 0.6903529411764706, "grad_norm": 0.36127866899840994, "learning_rate": 2.4076842359989504e-06, "loss": 0.01535148024559021, "step": 73350 }, { "epoch": 0.6904, "grad_norm": 0.4722628495880562, "learning_rate": 2.407602177581349e-06, "loss": 0.016061480343341827, "step": 73355 }, { "epoch": 0.6904470588235294, "grad_norm": 0.41412494918308307, "learning_rate": 2.4075201275532918e-06, "loss": 0.016896232962608337, "step": 73360 }, { "epoch": 0.6904941176470588, "grad_norm": 0.26912971267604585, "learning_rate": 2.40743808591335e-06, "loss": 0.0164612740278244, "step": 73365 }, { "epoch": 0.6905411764705882, "grad_norm": 0.40165267258475557, "learning_rate": 2.4073560526600943e-06, "loss": 0.014829570055007934, "step": 73370 }, { "epoch": 0.6905882352941176, "grad_norm": 0.4563975003532452, "learning_rate": 2.4072740277920966e-06, "loss": 0.016192331910133362, "step": 73375 }, { "epoch": 0.690635294117647, "grad_norm": 0.5521513641554732, "learning_rate": 2.4071920113079278e-06, "loss": 0.015662573277950287, "step": 73380 }, { "epoch": 0.6906823529411765, "grad_norm": 0.6390815174642096, "learning_rate": 2.40711000320616e-06, "loss": 0.021186444163322448, "step": 73385 }, { "epoch": 0.6907294117647059, "grad_norm": 0.49554675316852487, "learning_rate": 2.4070280034853647e-06, "loss": 0.015591040253639221, "step": 73390 }, { "epoch": 0.6907764705882353, "grad_norm": 0.6967564330264386, "learning_rate": 2.406946012144116e-06, "loss": 0.017182251811027525, "step": 73395 }, { "epoch": 0.6908235294117647, "grad_norm": 0.6031292644631141, "learning_rate": 2.4068640291809856e-06, "loss": 0.015008336305618286, "step": 73400 }, { "epoch": 0.6908705882352941, "grad_norm": 0.4713050007753518, "learning_rate": 2.406782054594547e-06, "loss": 0.013839372992515564, "step": 73405 }, { "epoch": 0.6909176470588235, "grad_norm": 0.5082407567693922, "learning_rate": 2.406700088383374e-06, "loss": 0.015199568867683411, "step": 73410 }, { "epoch": 0.6909647058823529, "grad_norm": 0.493689850130518, "learning_rate": 2.4066181305460405e-06, "loss": 0.018478536605834962, "step": 73415 }, { "epoch": 0.6910117647058823, "grad_norm": 0.40494266235506277, "learning_rate": 2.4065361810811208e-06, "loss": 0.014377422630786896, "step": 73420 }, { "epoch": 0.6910588235294117, "grad_norm": 0.5654613486224898, "learning_rate": 2.4064542399871897e-06, "loss": 0.01768832802772522, "step": 73425 }, { "epoch": 0.6911058823529411, "grad_norm": 0.438889023534065, "learning_rate": 2.4063723072628216e-06, "loss": 0.010310199856758118, "step": 73430 }, { "epoch": 0.6911529411764706, "grad_norm": 0.6126577582332215, "learning_rate": 2.406290382906592e-06, "loss": 0.015650741755962372, "step": 73435 }, { "epoch": 0.6912, "grad_norm": 0.4988113156729697, "learning_rate": 2.406208466917077e-06, "loss": 0.01610867977142334, "step": 73440 }, { "epoch": 0.6912470588235294, "grad_norm": 0.4806778637028023, "learning_rate": 2.406126559292852e-06, "loss": 0.017425724864006044, "step": 73445 }, { "epoch": 0.6912941176470588, "grad_norm": 0.5771032123628094, "learning_rate": 2.406044660032494e-06, "loss": 0.017214782536029816, "step": 73450 }, { "epoch": 0.6913411764705882, "grad_norm": 0.5319774132881617, "learning_rate": 2.405962769134579e-06, "loss": 0.014635835587978364, "step": 73455 }, { "epoch": 0.6913882352941176, "grad_norm": 0.7634938349825999, "learning_rate": 2.4058808865976847e-06, "loss": 0.013593201339244843, "step": 73460 }, { "epoch": 0.691435294117647, "grad_norm": 0.5433684413541925, "learning_rate": 2.405799012420387e-06, "loss": 0.01629876494407654, "step": 73465 }, { "epoch": 0.6914823529411764, "grad_norm": 0.4627012500659583, "learning_rate": 2.4057171466012653e-06, "loss": 0.013200116157531739, "step": 73470 }, { "epoch": 0.6915294117647058, "grad_norm": 0.30217824514316133, "learning_rate": 2.4056352891388968e-06, "loss": 0.01823422908782959, "step": 73475 }, { "epoch": 0.6915764705882353, "grad_norm": 0.6038043487283593, "learning_rate": 2.40555344003186e-06, "loss": 0.01610262393951416, "step": 73480 }, { "epoch": 0.6916235294117647, "grad_norm": 0.5664181695115019, "learning_rate": 2.4054715992787332e-06, "loss": 0.013044877350330353, "step": 73485 }, { "epoch": 0.6916705882352941, "grad_norm": 0.38314162816776165, "learning_rate": 2.405389766878096e-06, "loss": 0.017526492476463318, "step": 73490 }, { "epoch": 0.6917176470588235, "grad_norm": 0.4895362872030907, "learning_rate": 2.405307942828527e-06, "loss": 0.016341060400009155, "step": 73495 }, { "epoch": 0.691764705882353, "grad_norm": 0.33724249721914623, "learning_rate": 2.405226127128607e-06, "loss": 0.012735700607299805, "step": 73500 }, { "epoch": 0.6918117647058823, "grad_norm": 0.45873188092821454, "learning_rate": 2.405144319776915e-06, "loss": 0.014158491790294648, "step": 73505 }, { "epoch": 0.6918588235294117, "grad_norm": 0.46416252107609396, "learning_rate": 2.4050625207720325e-06, "loss": 0.014121592044830322, "step": 73510 }, { "epoch": 0.6919058823529411, "grad_norm": 0.7201104110989941, "learning_rate": 2.404980730112539e-06, "loss": 0.01646312475204468, "step": 73515 }, { "epoch": 0.6919529411764705, "grad_norm": 0.6886937602153149, "learning_rate": 2.404898947797016e-06, "loss": 0.016059279441833496, "step": 73520 }, { "epoch": 0.692, "grad_norm": 0.7662316379121494, "learning_rate": 2.4048171738240455e-06, "loss": 0.022956478595733642, "step": 73525 }, { "epoch": 0.6920470588235295, "grad_norm": 0.5995900390025847, "learning_rate": 2.4047354081922084e-06, "loss": 0.015196147561073303, "step": 73530 }, { "epoch": 0.6920941176470589, "grad_norm": 0.44871034328056497, "learning_rate": 2.404653650900088e-06, "loss": 0.014336317777633667, "step": 73535 }, { "epoch": 0.6921411764705883, "grad_norm": 0.4735606506475998, "learning_rate": 2.404571901946265e-06, "loss": 0.012242404371500015, "step": 73540 }, { "epoch": 0.6921882352941177, "grad_norm": 0.44003326956812794, "learning_rate": 2.4044901613293224e-06, "loss": 0.020587708055973052, "step": 73545 }, { "epoch": 0.6922352941176471, "grad_norm": 0.3824046371718729, "learning_rate": 2.404408429047845e-06, "loss": 0.013387009501457214, "step": 73550 }, { "epoch": 0.6922823529411765, "grad_norm": 0.7111499701864423, "learning_rate": 2.4043267051004147e-06, "loss": 0.016918934881687164, "step": 73555 }, { "epoch": 0.6923294117647059, "grad_norm": 0.654904026947422, "learning_rate": 2.4042449894856157e-06, "loss": 0.015173470973968506, "step": 73560 }, { "epoch": 0.6923764705882353, "grad_norm": 0.41370812456955525, "learning_rate": 2.404163282202032e-06, "loss": 0.011242212355136871, "step": 73565 }, { "epoch": 0.6924235294117647, "grad_norm": 0.5563523810577895, "learning_rate": 2.4040815832482478e-06, "loss": 0.014632138609886169, "step": 73570 }, { "epoch": 0.6924705882352942, "grad_norm": 0.4515146879082418, "learning_rate": 2.4039998926228485e-06, "loss": 0.011652830243110656, "step": 73575 }, { "epoch": 0.6925176470588236, "grad_norm": 0.2949766535100394, "learning_rate": 2.4039182103244184e-06, "loss": 0.015095369517803192, "step": 73580 }, { "epoch": 0.692564705882353, "grad_norm": 0.5644497450685523, "learning_rate": 2.403836536351544e-06, "loss": 0.013106845319271088, "step": 73585 }, { "epoch": 0.6926117647058824, "grad_norm": 0.6034794207509779, "learning_rate": 2.40375487070281e-06, "loss": 0.018975619971752167, "step": 73590 }, { "epoch": 0.6926588235294118, "grad_norm": 0.7474732082083811, "learning_rate": 2.4036732133768027e-06, "loss": 0.01333220601081848, "step": 73595 }, { "epoch": 0.6927058823529412, "grad_norm": 0.33520450278422975, "learning_rate": 2.40359156437211e-06, "loss": 0.014430373907089233, "step": 73600 }, { "epoch": 0.6927529411764706, "grad_norm": 0.5224235898899866, "learning_rate": 2.4035099236873165e-06, "loss": 0.01420470029115677, "step": 73605 }, { "epoch": 0.6928, "grad_norm": 0.5359037146710829, "learning_rate": 2.403428291321011e-06, "loss": 0.01389366090297699, "step": 73610 }, { "epoch": 0.6928470588235294, "grad_norm": 0.42774546432267513, "learning_rate": 2.4033466672717793e-06, "loss": 0.015519315004348755, "step": 73615 }, { "epoch": 0.6928941176470588, "grad_norm": 0.4537215738983465, "learning_rate": 2.403265051538211e-06, "loss": 0.01686565428972244, "step": 73620 }, { "epoch": 0.6929411764705883, "grad_norm": 0.5808706786358545, "learning_rate": 2.403183444118893e-06, "loss": 0.014446929097175598, "step": 73625 }, { "epoch": 0.6929882352941177, "grad_norm": 0.6118234899183063, "learning_rate": 2.403101845012415e-06, "loss": 0.013642305135726928, "step": 73630 }, { "epoch": 0.6930352941176471, "grad_norm": 0.5765211276250947, "learning_rate": 2.4030202542173644e-06, "loss": 0.015926159918308258, "step": 73635 }, { "epoch": 0.6930823529411765, "grad_norm": 0.48504227713206133, "learning_rate": 2.402938671732331e-06, "loss": 0.013527807593345643, "step": 73640 }, { "epoch": 0.6931294117647059, "grad_norm": 0.5475285017162123, "learning_rate": 2.402857097555904e-06, "loss": 0.015317253768444061, "step": 73645 }, { "epoch": 0.6931764705882353, "grad_norm": 0.43608096865912926, "learning_rate": 2.4027755316866735e-06, "loss": 0.013691294193267822, "step": 73650 }, { "epoch": 0.6932235294117647, "grad_norm": 0.42029738457069843, "learning_rate": 2.4026939741232296e-06, "loss": 0.01384313851594925, "step": 73655 }, { "epoch": 0.6932705882352941, "grad_norm": 0.4663694932437721, "learning_rate": 2.4026124248641623e-06, "loss": 0.015060925483703613, "step": 73660 }, { "epoch": 0.6933176470588235, "grad_norm": 0.979915095531652, "learning_rate": 2.4025308839080636e-06, "loss": 0.01780550181865692, "step": 73665 }, { "epoch": 0.693364705882353, "grad_norm": 0.5854217018994109, "learning_rate": 2.4024493512535234e-06, "loss": 0.011883806437253952, "step": 73670 }, { "epoch": 0.6934117647058824, "grad_norm": 0.5959907963750988, "learning_rate": 2.402367826899133e-06, "loss": 0.013622555136680602, "step": 73675 }, { "epoch": 0.6934588235294118, "grad_norm": 0.45739317326683254, "learning_rate": 2.402286310843486e-06, "loss": 0.014501550793647766, "step": 73680 }, { "epoch": 0.6935058823529412, "grad_norm": 0.41184924904552594, "learning_rate": 2.402204803085173e-06, "loss": 0.018983660638332366, "step": 73685 }, { "epoch": 0.6935529411764706, "grad_norm": 0.6349895019595536, "learning_rate": 2.4021233036227867e-06, "loss": 0.01170041486620903, "step": 73690 }, { "epoch": 0.6936, "grad_norm": 0.8030826254777003, "learning_rate": 2.4020418124549203e-06, "loss": 0.016542184352874755, "step": 73695 }, { "epoch": 0.6936470588235294, "grad_norm": 0.42481241574564055, "learning_rate": 2.401960329580167e-06, "loss": 0.014648373425006866, "step": 73700 }, { "epoch": 0.6936941176470588, "grad_norm": 0.27728111816734463, "learning_rate": 2.40187885499712e-06, "loss": 0.015427285432815551, "step": 73705 }, { "epoch": 0.6937411764705882, "grad_norm": 0.3114387312482085, "learning_rate": 2.4017973887043726e-06, "loss": 0.012958920001983643, "step": 73710 }, { "epoch": 0.6937882352941176, "grad_norm": 0.6451150371675192, "learning_rate": 2.40171593070052e-06, "loss": 0.017383185029029847, "step": 73715 }, { "epoch": 0.6938352941176471, "grad_norm": 0.49583707913273406, "learning_rate": 2.4016344809841567e-06, "loss": 0.013734479248523713, "step": 73720 }, { "epoch": 0.6938823529411765, "grad_norm": 0.5036889203874358, "learning_rate": 2.4015530395538766e-06, "loss": 0.01708936393260956, "step": 73725 }, { "epoch": 0.6939294117647059, "grad_norm": 0.4812550844812108, "learning_rate": 2.4014716064082753e-06, "loss": 0.01741735339164734, "step": 73730 }, { "epoch": 0.6939764705882353, "grad_norm": 0.8304132445102002, "learning_rate": 2.401390181545948e-06, "loss": 0.014640329778194428, "step": 73735 }, { "epoch": 0.6940235294117647, "grad_norm": 0.6066950195656123, "learning_rate": 2.4013087649654907e-06, "loss": 0.018254286050796507, "step": 73740 }, { "epoch": 0.6940705882352941, "grad_norm": 0.5475268665317232, "learning_rate": 2.4012273566655003e-06, "loss": 0.014613403379917145, "step": 73745 }, { "epoch": 0.6941176470588235, "grad_norm": 0.3965045748755917, "learning_rate": 2.401145956644572e-06, "loss": 0.015786510705947877, "step": 73750 }, { "epoch": 0.6941647058823529, "grad_norm": 0.45859872199433604, "learning_rate": 2.4010645649013044e-06, "loss": 0.013949868083000184, "step": 73755 }, { "epoch": 0.6942117647058823, "grad_norm": 0.6268688852965382, "learning_rate": 2.4009831814342927e-06, "loss": 0.015128588676452637, "step": 73760 }, { "epoch": 0.6942588235294118, "grad_norm": 1.1070355781336945, "learning_rate": 2.400901806242135e-06, "loss": 0.01691747158765793, "step": 73765 }, { "epoch": 0.6943058823529412, "grad_norm": 0.3941958060196849, "learning_rate": 2.4008204393234295e-06, "loss": 0.013332483172416688, "step": 73770 }, { "epoch": 0.6943529411764706, "grad_norm": 0.6733436107822798, "learning_rate": 2.4007390806767747e-06, "loss": 0.01900712251663208, "step": 73775 }, { "epoch": 0.6944, "grad_norm": 0.673313501135173, "learning_rate": 2.400657730300768e-06, "loss": 0.0192816361784935, "step": 73780 }, { "epoch": 0.6944470588235294, "grad_norm": 0.40025016150100207, "learning_rate": 2.4005763881940085e-06, "loss": 0.013440002501010895, "step": 73785 }, { "epoch": 0.6944941176470588, "grad_norm": 0.6785537665699191, "learning_rate": 2.4004950543550963e-06, "loss": 0.01578642427921295, "step": 73790 }, { "epoch": 0.6945411764705882, "grad_norm": 0.31848717200657817, "learning_rate": 2.4004137287826302e-06, "loss": 0.014080996811389922, "step": 73795 }, { "epoch": 0.6945882352941176, "grad_norm": 0.6277500160347155, "learning_rate": 2.40033241147521e-06, "loss": 0.01939954161643982, "step": 73800 }, { "epoch": 0.694635294117647, "grad_norm": 0.5269953001506448, "learning_rate": 2.400251102431436e-06, "loss": 0.011808992177248002, "step": 73805 }, { "epoch": 0.6946823529411764, "grad_norm": 0.49226658645730464, "learning_rate": 2.400169801649908e-06, "loss": 0.015601873397827148, "step": 73810 }, { "epoch": 0.6947294117647059, "grad_norm": 0.6517687165769576, "learning_rate": 2.400088509129228e-06, "loss": 0.016054733097553252, "step": 73815 }, { "epoch": 0.6947764705882353, "grad_norm": 0.49716520431698785, "learning_rate": 2.400007224867996e-06, "loss": 0.012796121835708617, "step": 73820 }, { "epoch": 0.6948235294117647, "grad_norm": 0.46155473626540394, "learning_rate": 2.399925948864814e-06, "loss": 0.013686458766460418, "step": 73825 }, { "epoch": 0.6948705882352941, "grad_norm": 0.41923869601929, "learning_rate": 2.399844681118284e-06, "loss": 0.013068953156471252, "step": 73830 }, { "epoch": 0.6949176470588235, "grad_norm": 0.6435663646736858, "learning_rate": 2.399763421627008e-06, "loss": 0.012412520498037339, "step": 73835 }, { "epoch": 0.6949647058823529, "grad_norm": 0.6221334391193145, "learning_rate": 2.399682170389588e-06, "loss": 0.013281150162220002, "step": 73840 }, { "epoch": 0.6950117647058823, "grad_norm": 0.6265089235660323, "learning_rate": 2.3996009274046276e-06, "loss": 0.014526212215423584, "step": 73845 }, { "epoch": 0.6950588235294117, "grad_norm": 0.7230275410803353, "learning_rate": 2.3995196926707293e-06, "loss": 0.02293255031108856, "step": 73850 }, { "epoch": 0.6951058823529411, "grad_norm": 0.7840605960986676, "learning_rate": 2.3994384661864974e-06, "loss": 0.015555796027183533, "step": 73855 }, { "epoch": 0.6951529411764706, "grad_norm": 0.36463023132529215, "learning_rate": 2.3993572479505343e-06, "loss": 0.010642848163843154, "step": 73860 }, { "epoch": 0.6952, "grad_norm": 0.3724452345727058, "learning_rate": 2.399276037961445e-06, "loss": 0.015420708060264587, "step": 73865 }, { "epoch": 0.6952470588235294, "grad_norm": 0.4805796969458186, "learning_rate": 2.399194836217834e-06, "loss": 0.01634075343608856, "step": 73870 }, { "epoch": 0.6952941176470588, "grad_norm": 0.5602888460878888, "learning_rate": 2.399113642718306e-06, "loss": 0.015451157093048095, "step": 73875 }, { "epoch": 0.6953411764705882, "grad_norm": 0.42538961577368173, "learning_rate": 2.399032457461466e-06, "loss": 0.019835731387138365, "step": 73880 }, { "epoch": 0.6953882352941176, "grad_norm": 0.32237786646535976, "learning_rate": 2.3989512804459193e-06, "loss": 0.014833445847034454, "step": 73885 }, { "epoch": 0.695435294117647, "grad_norm": 0.5117211921562025, "learning_rate": 2.3988701116702725e-06, "loss": 0.012505942583084106, "step": 73890 }, { "epoch": 0.6954823529411764, "grad_norm": 0.6240906788916628, "learning_rate": 2.3987889511331303e-06, "loss": 0.015319833159446716, "step": 73895 }, { "epoch": 0.6955294117647058, "grad_norm": 0.5161477826683813, "learning_rate": 2.3987077988331002e-06, "loss": 0.012744086980819701, "step": 73900 }, { "epoch": 0.6955764705882352, "grad_norm": 0.571606588120324, "learning_rate": 2.398626654768789e-06, "loss": 0.013161607086658478, "step": 73905 }, { "epoch": 0.6956235294117648, "grad_norm": 0.527435677115158, "learning_rate": 2.3985455189388028e-06, "loss": 0.015077885985374451, "step": 73910 }, { "epoch": 0.6956705882352942, "grad_norm": 0.6484693689093567, "learning_rate": 2.3984643913417504e-06, "loss": 0.01774455010890961, "step": 73915 }, { "epoch": 0.6957176470588236, "grad_norm": 1.5936074603309658, "learning_rate": 2.3983832719762383e-06, "loss": 0.01622026562690735, "step": 73920 }, { "epoch": 0.695764705882353, "grad_norm": 0.6646578557540251, "learning_rate": 2.398302160840875e-06, "loss": 0.023338007926940917, "step": 73925 }, { "epoch": 0.6958117647058824, "grad_norm": 0.44684412497960363, "learning_rate": 2.3982210579342695e-06, "loss": 0.01755487620830536, "step": 73930 }, { "epoch": 0.6958588235294118, "grad_norm": 0.5992117341978113, "learning_rate": 2.39813996325503e-06, "loss": 0.015750864148139955, "step": 73935 }, { "epoch": 0.6959058823529412, "grad_norm": 0.6363065392764122, "learning_rate": 2.3980588768017653e-06, "loss": 0.019877238571643828, "step": 73940 }, { "epoch": 0.6959529411764706, "grad_norm": 0.4686578571930383, "learning_rate": 2.3979777985730854e-06, "loss": 0.015380898118019104, "step": 73945 }, { "epoch": 0.696, "grad_norm": 0.4011572611550475, "learning_rate": 2.3978967285676e-06, "loss": 0.01567988097667694, "step": 73950 }, { "epoch": 0.6960470588235295, "grad_norm": 0.40421255051868, "learning_rate": 2.397815666783919e-06, "loss": 0.01469741016626358, "step": 73955 }, { "epoch": 0.6960941176470589, "grad_norm": 0.4235455224338657, "learning_rate": 2.3977346132206525e-06, "loss": 0.01237092986702919, "step": 73960 }, { "epoch": 0.6961411764705883, "grad_norm": 0.75732250889099, "learning_rate": 2.3976535678764116e-06, "loss": 0.018284422159194947, "step": 73965 }, { "epoch": 0.6961882352941177, "grad_norm": 0.4224382946741391, "learning_rate": 2.397572530749807e-06, "loss": 0.018318715691566467, "step": 73970 }, { "epoch": 0.6962352941176471, "grad_norm": 0.35994628276289775, "learning_rate": 2.39749150183945e-06, "loss": 0.015124411880970001, "step": 73975 }, { "epoch": 0.6962823529411765, "grad_norm": 0.4351230733516079, "learning_rate": 2.397410481143953e-06, "loss": 0.011348386108875275, "step": 73980 }, { "epoch": 0.6963294117647059, "grad_norm": 0.5424959145758368, "learning_rate": 2.3973294686619275e-06, "loss": 0.017569419741630555, "step": 73985 }, { "epoch": 0.6963764705882353, "grad_norm": 0.3447245854832963, "learning_rate": 2.3972484643919862e-06, "loss": 0.013818091154098511, "step": 73990 }, { "epoch": 0.6964235294117647, "grad_norm": 0.484104446835625, "learning_rate": 2.397167468332741e-06, "loss": 0.017682206630706788, "step": 73995 }, { "epoch": 0.6964705882352941, "grad_norm": 0.5490031714460503, "learning_rate": 2.397086480482806e-06, "loss": 0.015013355016708373, "step": 74000 }, { "epoch": 0.6965176470588236, "grad_norm": 0.5257993940111056, "learning_rate": 2.3970055008407943e-06, "loss": 0.018358232080936433, "step": 74005 }, { "epoch": 0.696564705882353, "grad_norm": 1.211387903015519, "learning_rate": 2.396924529405318e-06, "loss": 0.016005811095237733, "step": 74010 }, { "epoch": 0.6966117647058824, "grad_norm": 0.46738049636590095, "learning_rate": 2.3968435661749935e-06, "loss": 0.014721715450286865, "step": 74015 }, { "epoch": 0.6966588235294118, "grad_norm": 0.4817037764690015, "learning_rate": 2.3967626111484337e-06, "loss": 0.021274557709693907, "step": 74020 }, { "epoch": 0.6967058823529412, "grad_norm": 0.2825672402438735, "learning_rate": 2.3966816643242534e-06, "loss": 0.012829846143722535, "step": 74025 }, { "epoch": 0.6967529411764706, "grad_norm": 0.4360500148217022, "learning_rate": 2.396600725701068e-06, "loss": 0.014161700010299682, "step": 74030 }, { "epoch": 0.6968, "grad_norm": 0.9003289587389793, "learning_rate": 2.3965197952774924e-06, "loss": 0.016913066804409026, "step": 74035 }, { "epoch": 0.6968470588235294, "grad_norm": 0.5863162849790592, "learning_rate": 2.396438873052143e-06, "loss": 0.012754914164543153, "step": 74040 }, { "epoch": 0.6968941176470588, "grad_norm": 0.4051629362119425, "learning_rate": 2.3963579590236344e-06, "loss": 0.014284469187259674, "step": 74045 }, { "epoch": 0.6969411764705883, "grad_norm": 0.5596068386345779, "learning_rate": 2.3962770531905836e-06, "loss": 0.01591634452342987, "step": 74050 }, { "epoch": 0.6969882352941177, "grad_norm": 0.42200666185063374, "learning_rate": 2.396196155551608e-06, "loss": 0.011052308976650238, "step": 74055 }, { "epoch": 0.6970352941176471, "grad_norm": 0.8247690599884672, "learning_rate": 2.396115266105323e-06, "loss": 0.01753309965133667, "step": 74060 }, { "epoch": 0.6970823529411765, "grad_norm": 0.3126821494983208, "learning_rate": 2.396034384850347e-06, "loss": 0.011663636565208435, "step": 74065 }, { "epoch": 0.6971294117647059, "grad_norm": 0.645252327866238, "learning_rate": 2.3959535117852974e-06, "loss": 0.019142884016036987, "step": 74070 }, { "epoch": 0.6971764705882353, "grad_norm": 0.41668281086225045, "learning_rate": 2.395872646908792e-06, "loss": 0.01670854389667511, "step": 74075 }, { "epoch": 0.6972235294117647, "grad_norm": 0.5743984740855249, "learning_rate": 2.3957917902194494e-06, "loss": 0.0138211190700531, "step": 74080 }, { "epoch": 0.6972705882352941, "grad_norm": 0.4826582857523303, "learning_rate": 2.395710941715887e-06, "loss": 0.015033051371574402, "step": 74085 }, { "epoch": 0.6973176470588235, "grad_norm": 0.6806147540247613, "learning_rate": 2.3956301013967246e-06, "loss": 0.019251143932342528, "step": 74090 }, { "epoch": 0.6973647058823529, "grad_norm": 0.4161671312321427, "learning_rate": 2.3955492692605826e-06, "loss": 0.016434814035892486, "step": 74095 }, { "epoch": 0.6974117647058824, "grad_norm": 0.5453136907511102, "learning_rate": 2.3954684453060783e-06, "loss": 0.014697587490081787, "step": 74100 }, { "epoch": 0.6974588235294118, "grad_norm": 0.39848145960694614, "learning_rate": 2.3953876295318324e-06, "loss": 0.013360589742660522, "step": 74105 }, { "epoch": 0.6975058823529412, "grad_norm": 0.7043038154051484, "learning_rate": 2.3953068219364663e-06, "loss": 0.017442134022712708, "step": 74110 }, { "epoch": 0.6975529411764706, "grad_norm": 0.5804438263458884, "learning_rate": 2.395226022518599e-06, "loss": 0.01837884485721588, "step": 74115 }, { "epoch": 0.6976, "grad_norm": 0.47450041111765806, "learning_rate": 2.395145231276852e-06, "loss": 0.014508689939975738, "step": 74120 }, { "epoch": 0.6976470588235294, "grad_norm": 0.5106558561460326, "learning_rate": 2.395064448209846e-06, "loss": 0.015546590089797974, "step": 74125 }, { "epoch": 0.6976941176470588, "grad_norm": 0.4936843483877655, "learning_rate": 2.394983673316203e-06, "loss": 0.013848823308944703, "step": 74130 }, { "epoch": 0.6977411764705882, "grad_norm": 0.5826248597787967, "learning_rate": 2.3949029065945454e-06, "loss": 0.016513991355895995, "step": 74135 }, { "epoch": 0.6977882352941176, "grad_norm": 0.6891061311488164, "learning_rate": 2.3948221480434945e-06, "loss": 0.014343374967575073, "step": 74140 }, { "epoch": 0.6978352941176471, "grad_norm": 0.6053957331114189, "learning_rate": 2.3947413976616723e-06, "loss": 0.013184946775436402, "step": 74145 }, { "epoch": 0.6978823529411765, "grad_norm": 0.4770036790351949, "learning_rate": 2.394660655447703e-06, "loss": 0.012036815285682678, "step": 74150 }, { "epoch": 0.6979294117647059, "grad_norm": 0.5067758895125001, "learning_rate": 2.394579921400209e-06, "loss": 0.015874883532524107, "step": 74155 }, { "epoch": 0.6979764705882353, "grad_norm": 0.7214572437681245, "learning_rate": 2.3944991955178135e-06, "loss": 0.017498621344566347, "step": 74160 }, { "epoch": 0.6980235294117647, "grad_norm": 0.5677624724457184, "learning_rate": 2.3944184777991408e-06, "loss": 0.01761171519756317, "step": 74165 }, { "epoch": 0.6980705882352941, "grad_norm": 0.5285321717763647, "learning_rate": 2.394337768242815e-06, "loss": 0.015439078211784363, "step": 74170 }, { "epoch": 0.6981176470588235, "grad_norm": 0.5666261217700314, "learning_rate": 2.39425706684746e-06, "loss": 0.02089046835899353, "step": 74175 }, { "epoch": 0.6981647058823529, "grad_norm": 1.0733134196761298, "learning_rate": 2.3941763736117006e-06, "loss": 0.01417906880378723, "step": 74180 }, { "epoch": 0.6982117647058823, "grad_norm": 0.5611953822147814, "learning_rate": 2.394095688534163e-06, "loss": 0.016243430972099304, "step": 74185 }, { "epoch": 0.6982588235294117, "grad_norm": 0.5383890458786467, "learning_rate": 2.394015011613471e-06, "loss": 0.013159808516502381, "step": 74190 }, { "epoch": 0.6983058823529412, "grad_norm": 0.4841596641396586, "learning_rate": 2.393934342848252e-06, "loss": 0.016910892724990845, "step": 74195 }, { "epoch": 0.6983529411764706, "grad_norm": 0.4265176351472915, "learning_rate": 2.3938536822371307e-06, "loss": 0.015655180811882018, "step": 74200 }, { "epoch": 0.6984, "grad_norm": 0.6702292698173106, "learning_rate": 2.3937730297787343e-06, "loss": 0.015405580401420593, "step": 74205 }, { "epoch": 0.6984470588235294, "grad_norm": 0.4721352180145024, "learning_rate": 2.3936923854716888e-06, "loss": 0.017418292164802552, "step": 74210 }, { "epoch": 0.6984941176470588, "grad_norm": 0.7900188358627145, "learning_rate": 2.393611749314622e-06, "loss": 0.011683744937181472, "step": 74215 }, { "epoch": 0.6985411764705882, "grad_norm": 0.7109654307800901, "learning_rate": 2.3935311213061602e-06, "loss": 0.01775594651699066, "step": 74220 }, { "epoch": 0.6985882352941176, "grad_norm": 0.7446396141113396, "learning_rate": 2.3934505014449324e-06, "loss": 0.022101891040802003, "step": 74225 }, { "epoch": 0.698635294117647, "grad_norm": 0.4441630270995115, "learning_rate": 2.393369889729566e-06, "loss": 0.01682237833738327, "step": 74230 }, { "epoch": 0.6986823529411764, "grad_norm": 0.6479917050969408, "learning_rate": 2.3932892861586886e-06, "loss": 0.01341448724269867, "step": 74235 }, { "epoch": 0.6987294117647059, "grad_norm": 0.7168706883295111, "learning_rate": 2.3932086907309303e-06, "loss": 0.014923371374607086, "step": 74240 }, { "epoch": 0.6987764705882353, "grad_norm": 0.5422104432128392, "learning_rate": 2.3931281034449184e-06, "loss": 0.013877281546592712, "step": 74245 }, { "epoch": 0.6988235294117647, "grad_norm": 0.5754275171861618, "learning_rate": 2.393047524299284e-06, "loss": 0.017754232883453368, "step": 74250 }, { "epoch": 0.6988705882352941, "grad_norm": 0.4554363946083146, "learning_rate": 2.392966953292655e-06, "loss": 0.012048624455928802, "step": 74255 }, { "epoch": 0.6989176470588235, "grad_norm": 0.5071521088188584, "learning_rate": 2.3928863904236625e-06, "loss": 0.012752881646156311, "step": 74260 }, { "epoch": 0.6989647058823529, "grad_norm": 0.7308993102766439, "learning_rate": 2.3928058356909365e-06, "loss": 0.023049560189247132, "step": 74265 }, { "epoch": 0.6990117647058823, "grad_norm": 0.7128366416571549, "learning_rate": 2.3927252890931077e-06, "loss": 0.01590920686721802, "step": 74270 }, { "epoch": 0.6990588235294117, "grad_norm": 0.36518173337810966, "learning_rate": 2.392644750628806e-06, "loss": 0.014486274123191834, "step": 74275 }, { "epoch": 0.6991058823529411, "grad_norm": 0.3570325644861247, "learning_rate": 2.3925642202966644e-06, "loss": 0.013889230787754059, "step": 74280 }, { "epoch": 0.6991529411764706, "grad_norm": 0.6772915589273691, "learning_rate": 2.3924836980953127e-06, "loss": 0.02049325406551361, "step": 74285 }, { "epoch": 0.6992, "grad_norm": 0.34770869400194027, "learning_rate": 2.3924031840233843e-06, "loss": 0.016409926116466522, "step": 74290 }, { "epoch": 0.6992470588235294, "grad_norm": 0.5210632090356248, "learning_rate": 2.3923226780795096e-06, "loss": 0.015240344405174255, "step": 74295 }, { "epoch": 0.6992941176470588, "grad_norm": 0.7204729181108012, "learning_rate": 2.392242180262323e-06, "loss": 0.01455613374710083, "step": 74300 }, { "epoch": 0.6993411764705882, "grad_norm": 0.6895981173243795, "learning_rate": 2.3921616905704567e-06, "loss": 0.019381393492221833, "step": 74305 }, { "epoch": 0.6993882352941176, "grad_norm": 0.5156389436612129, "learning_rate": 2.3920812090025426e-06, "loss": 0.015221568942070007, "step": 74310 }, { "epoch": 0.699435294117647, "grad_norm": 0.36384742264159403, "learning_rate": 2.3920007355572163e-06, "loss": 0.01588037461042404, "step": 74315 }, { "epoch": 0.6994823529411764, "grad_norm": 0.718664788525638, "learning_rate": 2.39192027023311e-06, "loss": 0.01841885894536972, "step": 74320 }, { "epoch": 0.6995294117647058, "grad_norm": 0.49973924005906184, "learning_rate": 2.391839813028859e-06, "loss": 0.013446265459060669, "step": 74325 }, { "epoch": 0.6995764705882352, "grad_norm": 0.4737425726271937, "learning_rate": 2.3917593639430964e-06, "loss": 0.01659211665391922, "step": 74330 }, { "epoch": 0.6996235294117648, "grad_norm": 0.6211929988665347, "learning_rate": 2.391678922974458e-06, "loss": 0.012963539361953736, "step": 74335 }, { "epoch": 0.6996705882352942, "grad_norm": 0.3602066964021629, "learning_rate": 2.3915984901215792e-06, "loss": 0.013390949368476868, "step": 74340 }, { "epoch": 0.6997176470588236, "grad_norm": 0.4028701964738046, "learning_rate": 2.3915180653830945e-06, "loss": 0.012048356980085374, "step": 74345 }, { "epoch": 0.699764705882353, "grad_norm": 0.7766311414093, "learning_rate": 2.3914376487576395e-06, "loss": 0.016739696264266968, "step": 74350 }, { "epoch": 0.6998117647058824, "grad_norm": 0.5160242642810469, "learning_rate": 2.391357240243851e-06, "loss": 0.015453603863716126, "step": 74355 }, { "epoch": 0.6998588235294118, "grad_norm": 0.43947827681462015, "learning_rate": 2.391276839840365e-06, "loss": 0.017741888761520386, "step": 74360 }, { "epoch": 0.6999058823529412, "grad_norm": 0.500413078078108, "learning_rate": 2.3911964475458187e-06, "loss": 0.015112599730491639, "step": 74365 }, { "epoch": 0.6999529411764706, "grad_norm": 0.9669580537948476, "learning_rate": 2.3911160633588483e-06, "loss": 0.021858467161655425, "step": 74370 }, { "epoch": 0.7, "grad_norm": 0.5593642729902586, "learning_rate": 2.391035687278092e-06, "loss": 0.014945638179779053, "step": 74375 }, { "epoch": 0.7000470588235295, "grad_norm": 0.7688781393688876, "learning_rate": 2.390955319302187e-06, "loss": 0.015733052790164948, "step": 74380 }, { "epoch": 0.7000941176470589, "grad_norm": 0.6321092000123676, "learning_rate": 2.390874959429771e-06, "loss": 0.01755065768957138, "step": 74385 }, { "epoch": 0.7001411764705883, "grad_norm": 0.567243105479182, "learning_rate": 2.3907946076594826e-06, "loss": 0.016739484667778016, "step": 74390 }, { "epoch": 0.7001882352941177, "grad_norm": 0.4228933584756932, "learning_rate": 2.3907142639899604e-06, "loss": 0.017465539276599884, "step": 74395 }, { "epoch": 0.7002352941176471, "grad_norm": 0.4264782342139688, "learning_rate": 2.3906339284198436e-06, "loss": 0.011258891224861145, "step": 74400 }, { "epoch": 0.7002823529411765, "grad_norm": 0.4012361520763721, "learning_rate": 2.3905536009477713e-06, "loss": 0.013835456967353821, "step": 74405 }, { "epoch": 0.7003294117647059, "grad_norm": 0.5883056789736667, "learning_rate": 2.3904732815723827e-06, "loss": 0.015896497666835784, "step": 74410 }, { "epoch": 0.7003764705882353, "grad_norm": 0.43174128863279543, "learning_rate": 2.3903929702923184e-06, "loss": 0.01567164659500122, "step": 74415 }, { "epoch": 0.7004235294117647, "grad_norm": 0.3369026667426322, "learning_rate": 2.390312667106218e-06, "loss": 0.02175171971321106, "step": 74420 }, { "epoch": 0.7004705882352941, "grad_norm": 0.5785355895163667, "learning_rate": 2.390232372012722e-06, "loss": 0.014766797423362732, "step": 74425 }, { "epoch": 0.7005176470588236, "grad_norm": 0.5339074149800889, "learning_rate": 2.3901520850104724e-06, "loss": 0.013731572031974792, "step": 74430 }, { "epoch": 0.700564705882353, "grad_norm": 0.41344416748545554, "learning_rate": 2.3900718060981095e-06, "loss": 0.014747275412082672, "step": 74435 }, { "epoch": 0.7006117647058824, "grad_norm": 0.45064458951007386, "learning_rate": 2.389991535274274e-06, "loss": 0.017103946208953856, "step": 74440 }, { "epoch": 0.7006588235294118, "grad_norm": 0.7110854688033991, "learning_rate": 2.3899112725376084e-06, "loss": 0.013892588019371033, "step": 74445 }, { "epoch": 0.7007058823529412, "grad_norm": 0.3897091781704216, "learning_rate": 2.389831017886756e-06, "loss": 0.019607263803482055, "step": 74450 }, { "epoch": 0.7007529411764706, "grad_norm": 0.5604365622407933, "learning_rate": 2.389750771320358e-06, "loss": 0.016415873169898988, "step": 74455 }, { "epoch": 0.7008, "grad_norm": 0.46885672350582047, "learning_rate": 2.3896705328370574e-06, "loss": 0.013050805032253265, "step": 74460 }, { "epoch": 0.7008470588235294, "grad_norm": 0.36767353288157456, "learning_rate": 2.3895903024354966e-06, "loss": 0.015607067942619323, "step": 74465 }, { "epoch": 0.7008941176470588, "grad_norm": 0.5078142581183814, "learning_rate": 2.3895100801143205e-06, "loss": 0.01731176972389221, "step": 74470 }, { "epoch": 0.7009411764705883, "grad_norm": 0.5845594404362952, "learning_rate": 2.3894298658721722e-06, "loss": 0.01691723018884659, "step": 74475 }, { "epoch": 0.7009882352941177, "grad_norm": 0.5732378681806386, "learning_rate": 2.389349659707695e-06, "loss": 0.016219346225261687, "step": 74480 }, { "epoch": 0.7010352941176471, "grad_norm": 0.378639090527919, "learning_rate": 2.3892694616195345e-06, "loss": 0.013585972785949706, "step": 74485 }, { "epoch": 0.7010823529411765, "grad_norm": 0.49512927913690613, "learning_rate": 2.3891892716063346e-06, "loss": 0.013010485470294953, "step": 74490 }, { "epoch": 0.7011294117647059, "grad_norm": 0.41552573086493666, "learning_rate": 2.3891090896667407e-06, "loss": 0.015684767067432402, "step": 74495 }, { "epoch": 0.7011764705882353, "grad_norm": 0.49623362618774364, "learning_rate": 2.389028915799398e-06, "loss": 0.015845850110054016, "step": 74500 }, { "epoch": 0.7012235294117647, "grad_norm": 0.5046157211876791, "learning_rate": 2.388948750002951e-06, "loss": 0.012970444560050965, "step": 74505 }, { "epoch": 0.7012705882352941, "grad_norm": 0.3712600951067375, "learning_rate": 2.388868592276048e-06, "loss": 0.011252516508102417, "step": 74510 }, { "epoch": 0.7013176470588235, "grad_norm": 0.5581016301638185, "learning_rate": 2.388788442617334e-06, "loss": 0.017113974690437316, "step": 74515 }, { "epoch": 0.7013647058823529, "grad_norm": 0.6079447988068262, "learning_rate": 2.3887083010254546e-06, "loss": 0.013514506816864013, "step": 74520 }, { "epoch": 0.7014117647058824, "grad_norm": 0.3689841574674702, "learning_rate": 2.3886281674990588e-06, "loss": 0.014741855859756469, "step": 74525 }, { "epoch": 0.7014588235294118, "grad_norm": 0.37895920545475587, "learning_rate": 2.3885480420367922e-06, "loss": 0.016028439998626708, "step": 74530 }, { "epoch": 0.7015058823529412, "grad_norm": 0.5814001158909582, "learning_rate": 2.3884679246373036e-06, "loss": 0.018597616255283354, "step": 74535 }, { "epoch": 0.7015529411764706, "grad_norm": 0.37008227290783785, "learning_rate": 2.388387815299239e-06, "loss": 0.011196206510066985, "step": 74540 }, { "epoch": 0.7016, "grad_norm": 0.801725288879699, "learning_rate": 2.388307714021249e-06, "loss": 0.016691720485687254, "step": 74545 }, { "epoch": 0.7016470588235294, "grad_norm": 0.35085294783115545, "learning_rate": 2.38822762080198e-06, "loss": 0.022675219178199767, "step": 74550 }, { "epoch": 0.7016941176470588, "grad_norm": 0.38502583891249603, "learning_rate": 2.3881475356400827e-06, "loss": 0.013989123702049255, "step": 74555 }, { "epoch": 0.7017411764705882, "grad_norm": 0.41772640069671546, "learning_rate": 2.388067458534204e-06, "loss": 0.021293634176254274, "step": 74560 }, { "epoch": 0.7017882352941176, "grad_norm": 0.6489973259321152, "learning_rate": 2.3879873894829957e-06, "loss": 0.01681924909353256, "step": 74565 }, { "epoch": 0.7018352941176471, "grad_norm": 0.5686845296914369, "learning_rate": 2.3879073284851055e-06, "loss": 0.015042075514793396, "step": 74570 }, { "epoch": 0.7018823529411765, "grad_norm": 0.4051522369588029, "learning_rate": 2.3878272755391854e-06, "loss": 0.016688254475593568, "step": 74575 }, { "epoch": 0.7019294117647059, "grad_norm": 0.4197480076154647, "learning_rate": 2.3877472306438844e-06, "loss": 0.012443823367357254, "step": 74580 }, { "epoch": 0.7019764705882353, "grad_norm": 0.9269951727411596, "learning_rate": 2.3876671937978537e-06, "loss": 0.01819193959236145, "step": 74585 }, { "epoch": 0.7020235294117647, "grad_norm": 0.6127804415278947, "learning_rate": 2.3875871649997443e-06, "loss": 0.014442460238933563, "step": 74590 }, { "epoch": 0.7020705882352941, "grad_norm": 0.5000806277457287, "learning_rate": 2.3875071442482083e-06, "loss": 0.014990389347076416, "step": 74595 }, { "epoch": 0.7021176470588235, "grad_norm": 0.5956103544501617, "learning_rate": 2.387427131541896e-06, "loss": 0.014485493302345276, "step": 74600 }, { "epoch": 0.7021647058823529, "grad_norm": 0.5909062009838394, "learning_rate": 2.3873471268794604e-06, "loss": 0.015133629739284515, "step": 74605 }, { "epoch": 0.7022117647058823, "grad_norm": 0.4503528368174492, "learning_rate": 2.3872671302595533e-06, "loss": 0.01479327231645584, "step": 74610 }, { "epoch": 0.7022588235294117, "grad_norm": 0.45511645587483474, "learning_rate": 2.3871871416808273e-06, "loss": 0.01619202792644501, "step": 74615 }, { "epoch": 0.7023058823529412, "grad_norm": 0.5625619877607563, "learning_rate": 2.3871071611419356e-06, "loss": 0.018699651956558226, "step": 74620 }, { "epoch": 0.7023529411764706, "grad_norm": 0.38532370168040436, "learning_rate": 2.387027188641532e-06, "loss": 0.01471099853515625, "step": 74625 }, { "epoch": 0.7024, "grad_norm": 0.45077198163203974, "learning_rate": 2.386947224178269e-06, "loss": 0.014016783237457276, "step": 74630 }, { "epoch": 0.7024470588235294, "grad_norm": 0.4688851475232641, "learning_rate": 2.3868672677508006e-06, "loss": 0.01686186045408249, "step": 74635 }, { "epoch": 0.7024941176470588, "grad_norm": 0.4075340325928419, "learning_rate": 2.3867873193577817e-06, "loss": 0.01535595953464508, "step": 74640 }, { "epoch": 0.7025411764705882, "grad_norm": 0.589292158628648, "learning_rate": 2.386707378997866e-06, "loss": 0.015713492035865785, "step": 74645 }, { "epoch": 0.7025882352941176, "grad_norm": 0.5728990621176179, "learning_rate": 2.3866274466697095e-06, "loss": 0.017807337641716003, "step": 74650 }, { "epoch": 0.702635294117647, "grad_norm": 0.5187409282902296, "learning_rate": 2.386547522371966e-06, "loss": 0.014640696346759796, "step": 74655 }, { "epoch": 0.7026823529411764, "grad_norm": 0.5275760479557072, "learning_rate": 2.3864676061032922e-06, "loss": 0.019759741425514222, "step": 74660 }, { "epoch": 0.7027294117647059, "grad_norm": 0.49799877075026355, "learning_rate": 2.386387697862343e-06, "loss": 0.020336171984672545, "step": 74665 }, { "epoch": 0.7027764705882353, "grad_norm": 0.4873453129079623, "learning_rate": 2.386307797647775e-06, "loss": 0.0161908358335495, "step": 74670 }, { "epoch": 0.7028235294117647, "grad_norm": 0.5160565701482122, "learning_rate": 2.386227905458244e-06, "loss": 0.017327699065208434, "step": 74675 }, { "epoch": 0.7028705882352941, "grad_norm": 0.4184808706928037, "learning_rate": 2.386148021292407e-06, "loss": 0.017427366971969605, "step": 74680 }, { "epoch": 0.7029176470588235, "grad_norm": 0.5360403051011645, "learning_rate": 2.386068145148922e-06, "loss": 0.012380579113960266, "step": 74685 }, { "epoch": 0.7029647058823529, "grad_norm": 0.4202807007279143, "learning_rate": 2.3859882770264446e-06, "loss": 0.01551862359046936, "step": 74690 }, { "epoch": 0.7030117647058823, "grad_norm": 0.5861356929490946, "learning_rate": 2.385908416923633e-06, "loss": 0.013676217198371888, "step": 74695 }, { "epoch": 0.7030588235294117, "grad_norm": 1.1522430406502382, "learning_rate": 2.3858285648391466e-06, "loss": 0.01762354075908661, "step": 74700 }, { "epoch": 0.7031058823529411, "grad_norm": 0.4520538151633373, "learning_rate": 2.385748720771642e-06, "loss": 0.011989153921604156, "step": 74705 }, { "epoch": 0.7031529411764705, "grad_norm": 0.6976162953891704, "learning_rate": 2.3856688847197786e-06, "loss": 0.019097962975502016, "step": 74710 }, { "epoch": 0.7032, "grad_norm": 0.6044859618333979, "learning_rate": 2.3855890566822154e-06, "loss": 0.014069518446922303, "step": 74715 }, { "epoch": 0.7032470588235294, "grad_norm": 0.3550952318763101, "learning_rate": 2.385509236657611e-06, "loss": 0.011040166020393372, "step": 74720 }, { "epoch": 0.7032941176470588, "grad_norm": 0.5314643441475395, "learning_rate": 2.385429424644625e-06, "loss": 0.014955747127532958, "step": 74725 }, { "epoch": 0.7033411764705882, "grad_norm": 0.5727796677387569, "learning_rate": 2.3853496206419183e-06, "loss": 0.015621255338191985, "step": 74730 }, { "epoch": 0.7033882352941176, "grad_norm": 0.544776732749713, "learning_rate": 2.3852698246481505e-06, "loss": 0.017310002446174623, "step": 74735 }, { "epoch": 0.703435294117647, "grad_norm": 0.48303177865754393, "learning_rate": 2.3851900366619816e-06, "loss": 0.013165625929832458, "step": 74740 }, { "epoch": 0.7034823529411764, "grad_norm": 0.3439389310791539, "learning_rate": 2.3851102566820726e-06, "loss": 0.01432630568742752, "step": 74745 }, { "epoch": 0.7035294117647058, "grad_norm": 0.3618270782259522, "learning_rate": 2.385030484707085e-06, "loss": 0.019534920156002045, "step": 74750 }, { "epoch": 0.7035764705882352, "grad_norm": 0.9412996415568042, "learning_rate": 2.3849507207356803e-06, "loss": 0.014792525768280029, "step": 74755 }, { "epoch": 0.7036235294117648, "grad_norm": 0.3712943166737781, "learning_rate": 2.38487096476652e-06, "loss": 0.013372428715229034, "step": 74760 }, { "epoch": 0.7036705882352942, "grad_norm": 0.7178368536031244, "learning_rate": 2.384791216798266e-06, "loss": 0.015237540006637573, "step": 74765 }, { "epoch": 0.7037176470588236, "grad_norm": 0.4476355651299588, "learning_rate": 2.3847114768295806e-06, "loss": 0.014629605412483215, "step": 74770 }, { "epoch": 0.703764705882353, "grad_norm": 0.6036742763622914, "learning_rate": 2.3846317448591273e-06, "loss": 0.014710725843906402, "step": 74775 }, { "epoch": 0.7038117647058824, "grad_norm": 0.5870745303903211, "learning_rate": 2.3845520208855683e-06, "loss": 0.013996489346027374, "step": 74780 }, { "epoch": 0.7038588235294118, "grad_norm": 0.5489342535380217, "learning_rate": 2.3844723049075667e-06, "loss": 0.015659743547439577, "step": 74785 }, { "epoch": 0.7039058823529412, "grad_norm": 0.4721241190797081, "learning_rate": 2.3843925969237867e-06, "loss": 0.02000769376754761, "step": 74790 }, { "epoch": 0.7039529411764706, "grad_norm": 0.5743676776846747, "learning_rate": 2.3843128969328924e-06, "loss": 0.014080287516117096, "step": 74795 }, { "epoch": 0.704, "grad_norm": 0.4745647643513788, "learning_rate": 2.3842332049335473e-06, "loss": 0.014262986183166505, "step": 74800 }, { "epoch": 0.7040470588235294, "grad_norm": 0.629894423295589, "learning_rate": 2.384153520924417e-06, "loss": 0.018594425916671754, "step": 74805 }, { "epoch": 0.7040941176470589, "grad_norm": 0.2711205851416717, "learning_rate": 2.384073844904165e-06, "loss": 0.013815262913703918, "step": 74810 }, { "epoch": 0.7041411764705883, "grad_norm": 0.6449590809394945, "learning_rate": 2.383994176871458e-06, "loss": 0.015229931473731995, "step": 74815 }, { "epoch": 0.7041882352941177, "grad_norm": 0.38833339898140296, "learning_rate": 2.3839145168249604e-06, "loss": 0.015521484613418578, "step": 74820 }, { "epoch": 0.7042352941176471, "grad_norm": 0.5301655296131409, "learning_rate": 2.3838348647633383e-06, "loss": 0.01243695467710495, "step": 74825 }, { "epoch": 0.7042823529411765, "grad_norm": 0.36949604844704054, "learning_rate": 2.3837552206852583e-06, "loss": 0.0156337708234787, "step": 74830 }, { "epoch": 0.7043294117647059, "grad_norm": 0.42164311358294365, "learning_rate": 2.383675584589386e-06, "loss": 0.013212290406227113, "step": 74835 }, { "epoch": 0.7043764705882353, "grad_norm": 0.5042735859732892, "learning_rate": 2.3835959564743885e-06, "loss": 0.01420755684375763, "step": 74840 }, { "epoch": 0.7044235294117647, "grad_norm": 0.8194612218440538, "learning_rate": 2.383516336338933e-06, "loss": 0.015200802683830261, "step": 74845 }, { "epoch": 0.7044705882352941, "grad_norm": 0.3855898473560443, "learning_rate": 2.383436724181687e-06, "loss": 0.010611076653003693, "step": 74850 }, { "epoch": 0.7045176470588236, "grad_norm": 0.5987164841567049, "learning_rate": 2.383357120001318e-06, "loss": 0.016228388249874114, "step": 74855 }, { "epoch": 0.704564705882353, "grad_norm": 0.5044790508557844, "learning_rate": 2.3832775237964943e-06, "loss": 0.016289059817790986, "step": 74860 }, { "epoch": 0.7046117647058824, "grad_norm": 0.535714917545679, "learning_rate": 2.383197935565884e-06, "loss": 0.013831889629364014, "step": 74865 }, { "epoch": 0.7046588235294118, "grad_norm": 0.4005532665799092, "learning_rate": 2.3831183553081547e-06, "loss": 0.01646038591861725, "step": 74870 }, { "epoch": 0.7047058823529412, "grad_norm": 0.5342259947303065, "learning_rate": 2.383038783021977e-06, "loss": 0.014144478738307953, "step": 74875 }, { "epoch": 0.7047529411764706, "grad_norm": 0.5602858349748739, "learning_rate": 2.382959218706019e-06, "loss": 0.015049472451210022, "step": 74880 }, { "epoch": 0.7048, "grad_norm": 0.3941551955220227, "learning_rate": 2.3828796623589508e-06, "loss": 0.01727443337440491, "step": 74885 }, { "epoch": 0.7048470588235294, "grad_norm": 0.5298860559749176, "learning_rate": 2.3828001139794417e-06, "loss": 0.022795030474662782, "step": 74890 }, { "epoch": 0.7048941176470588, "grad_norm": 0.5085891942171593, "learning_rate": 2.382720573566163e-06, "loss": 0.013341763615608215, "step": 74895 }, { "epoch": 0.7049411764705882, "grad_norm": 0.5481949504793636, "learning_rate": 2.3826410411177837e-06, "loss": 0.012364599853754044, "step": 74900 }, { "epoch": 0.7049882352941177, "grad_norm": 0.5790531106267159, "learning_rate": 2.382561516632976e-06, "loss": 0.01882103979587555, "step": 74905 }, { "epoch": 0.7050352941176471, "grad_norm": 0.5879973272537791, "learning_rate": 2.3824820001104095e-06, "loss": 0.017431220412254332, "step": 74910 }, { "epoch": 0.7050823529411765, "grad_norm": 0.4351895406177981, "learning_rate": 2.3824024915487576e-06, "loss": 0.016154029965400697, "step": 74915 }, { "epoch": 0.7051294117647059, "grad_norm": 0.4174327624613492, "learning_rate": 2.3823229909466904e-06, "loss": 0.013390403985977174, "step": 74920 }, { "epoch": 0.7051764705882353, "grad_norm": 0.3006986248766787, "learning_rate": 2.3822434983028798e-06, "loss": 0.01627572476863861, "step": 74925 }, { "epoch": 0.7052235294117647, "grad_norm": 0.2993098845617116, "learning_rate": 2.3821640136159997e-06, "loss": 0.014429295063018798, "step": 74930 }, { "epoch": 0.7052705882352941, "grad_norm": 0.3916583453095128, "learning_rate": 2.382084536884721e-06, "loss": 0.01848987936973572, "step": 74935 }, { "epoch": 0.7053176470588235, "grad_norm": 0.47188631491713146, "learning_rate": 2.382005068107718e-06, "loss": 0.015441891551017762, "step": 74940 }, { "epoch": 0.7053647058823529, "grad_norm": 0.48317733889928105, "learning_rate": 2.381925607283663e-06, "loss": 0.01354886293411255, "step": 74945 }, { "epoch": 0.7054117647058824, "grad_norm": 0.4300339790569133, "learning_rate": 2.381846154411231e-06, "loss": 0.016354933381080627, "step": 74950 }, { "epoch": 0.7054588235294118, "grad_norm": 0.4732382259706812, "learning_rate": 2.381766709489094e-06, "loss": 0.013927312195301056, "step": 74955 }, { "epoch": 0.7055058823529412, "grad_norm": 0.43983708145311634, "learning_rate": 2.381687272515927e-06, "loss": 0.014176176488399505, "step": 74960 }, { "epoch": 0.7055529411764706, "grad_norm": 0.6209747736695236, "learning_rate": 2.381607843490405e-06, "loss": 0.01703988015651703, "step": 74965 }, { "epoch": 0.7056, "grad_norm": 0.5004548254588717, "learning_rate": 2.3815284224112027e-06, "loss": 0.014025044441223145, "step": 74970 }, { "epoch": 0.7056470588235294, "grad_norm": 0.4840861224864363, "learning_rate": 2.381449009276995e-06, "loss": 0.01844226270914078, "step": 74975 }, { "epoch": 0.7056941176470588, "grad_norm": 0.5647280456952694, "learning_rate": 2.3813696040864567e-06, "loss": 0.017132173478603362, "step": 74980 }, { "epoch": 0.7057411764705882, "grad_norm": 0.5855268841888152, "learning_rate": 2.3812902068382647e-06, "loss": 0.01694001704454422, "step": 74985 }, { "epoch": 0.7057882352941176, "grad_norm": 0.41454837774640435, "learning_rate": 2.3812108175310947e-06, "loss": 0.017819502949714662, "step": 74990 }, { "epoch": 0.705835294117647, "grad_norm": 0.42338892336330275, "learning_rate": 2.3811314361636227e-06, "loss": 0.016883434355258943, "step": 74995 }, { "epoch": 0.7058823529411765, "grad_norm": 0.42988640692093444, "learning_rate": 2.3810520627345257e-06, "loss": 0.014769186079502106, "step": 75000 }, { "epoch": 0.7059294117647059, "grad_norm": 0.41041175940180574, "learning_rate": 2.3809726972424807e-06, "loss": 0.015345311164855957, "step": 75005 }, { "epoch": 0.7059764705882353, "grad_norm": 0.49304878297232624, "learning_rate": 2.380893339686165e-06, "loss": 0.018302562832832336, "step": 75010 }, { "epoch": 0.7060235294117647, "grad_norm": 0.5651230754810286, "learning_rate": 2.3808139900642558e-06, "loss": 0.015797901153564452, "step": 75015 }, { "epoch": 0.7060705882352941, "grad_norm": 3.7421226147445354, "learning_rate": 2.380734648375431e-06, "loss": 0.01856493502855301, "step": 75020 }, { "epoch": 0.7061176470588235, "grad_norm": 0.6144346419631166, "learning_rate": 2.38065531461837e-06, "loss": 0.016898691654205322, "step": 75025 }, { "epoch": 0.7061647058823529, "grad_norm": 0.47001016801061996, "learning_rate": 2.3805759887917495e-06, "loss": 0.01945836842060089, "step": 75030 }, { "epoch": 0.7062117647058823, "grad_norm": 0.6236843994380874, "learning_rate": 2.3804966708942494e-06, "loss": 0.016560685634613038, "step": 75035 }, { "epoch": 0.7062588235294117, "grad_norm": 0.33368808614120593, "learning_rate": 2.380417360924549e-06, "loss": 0.011302714049816132, "step": 75040 }, { "epoch": 0.7063058823529412, "grad_norm": 0.6043090467015031, "learning_rate": 2.380338058881328e-06, "loss": 0.014750984311103821, "step": 75045 }, { "epoch": 0.7063529411764706, "grad_norm": 0.6888516886418528, "learning_rate": 2.3802587647632646e-06, "loss": 0.016108447313308717, "step": 75050 }, { "epoch": 0.7064, "grad_norm": 0.3710753630366427, "learning_rate": 2.3801794785690405e-06, "loss": 0.02275414764881134, "step": 75055 }, { "epoch": 0.7064470588235294, "grad_norm": 0.5041055678901007, "learning_rate": 2.3801002002973354e-06, "loss": 0.017060546576976775, "step": 75060 }, { "epoch": 0.7064941176470588, "grad_norm": 0.4957543877050932, "learning_rate": 2.38002092994683e-06, "loss": 0.01475268006324768, "step": 75065 }, { "epoch": 0.7065411764705882, "grad_norm": 0.33682489626065126, "learning_rate": 2.379941667516205e-06, "loss": 0.014821428060531616, "step": 75070 }, { "epoch": 0.7065882352941176, "grad_norm": 0.5123728410435915, "learning_rate": 2.379862413004143e-06, "loss": 0.0123318612575531, "step": 75075 }, { "epoch": 0.706635294117647, "grad_norm": 0.38629847099282516, "learning_rate": 2.3797831664093242e-06, "loss": 0.014971587061882018, "step": 75080 }, { "epoch": 0.7066823529411764, "grad_norm": 0.4340880007183238, "learning_rate": 2.3797039277304306e-06, "loss": 0.018920333683490755, "step": 75085 }, { "epoch": 0.7067294117647058, "grad_norm": 0.8425571505321947, "learning_rate": 2.379624696966145e-06, "loss": 0.014497880637645722, "step": 75090 }, { "epoch": 0.7067764705882353, "grad_norm": 0.7152301070021334, "learning_rate": 2.3795454741151506e-06, "loss": 0.016471251845359802, "step": 75095 }, { "epoch": 0.7068235294117647, "grad_norm": 0.511839795096137, "learning_rate": 2.3794662591761286e-06, "loss": 0.016545698046684265, "step": 75100 }, { "epoch": 0.7068705882352941, "grad_norm": 0.670345408903733, "learning_rate": 2.379387052147763e-06, "loss": 0.014194345474243164, "step": 75105 }, { "epoch": 0.7069176470588235, "grad_norm": 0.6703307465347149, "learning_rate": 2.379307853028737e-06, "loss": 0.01945838928222656, "step": 75110 }, { "epoch": 0.7069647058823529, "grad_norm": 0.5180929230080926, "learning_rate": 2.379228661817734e-06, "loss": 0.01739603132009506, "step": 75115 }, { "epoch": 0.7070117647058823, "grad_norm": 0.3926355066030771, "learning_rate": 2.37914947851344e-06, "loss": 0.013480870425701142, "step": 75120 }, { "epoch": 0.7070588235294117, "grad_norm": 0.6577643761386543, "learning_rate": 2.379070303114537e-06, "loss": 0.013857240974903106, "step": 75125 }, { "epoch": 0.7071058823529411, "grad_norm": 0.6503352415897609, "learning_rate": 2.37899113561971e-06, "loss": 0.0153249591588974, "step": 75130 }, { "epoch": 0.7071529411764705, "grad_norm": 0.39467077608410434, "learning_rate": 2.378911976027646e-06, "loss": 0.01257098764181137, "step": 75135 }, { "epoch": 0.7072, "grad_norm": 0.583407223087507, "learning_rate": 2.378832824337028e-06, "loss": 0.012355352193117142, "step": 75140 }, { "epoch": 0.7072470588235295, "grad_norm": 0.6277273398065545, "learning_rate": 2.3787536805465426e-06, "loss": 0.013403655588626861, "step": 75145 }, { "epoch": 0.7072941176470589, "grad_norm": 0.8816687937626668, "learning_rate": 2.378674544654876e-06, "loss": 0.01763686090707779, "step": 75150 }, { "epoch": 0.7073411764705883, "grad_norm": 0.5070087205394398, "learning_rate": 2.3785954166607135e-06, "loss": 0.014126998186111451, "step": 75155 }, { "epoch": 0.7073882352941177, "grad_norm": 0.4414274238450152, "learning_rate": 2.3785162965627424e-06, "loss": 0.016924339532852172, "step": 75160 }, { "epoch": 0.707435294117647, "grad_norm": 0.5873451801719285, "learning_rate": 2.3784371843596487e-06, "loss": 0.023485210537910462, "step": 75165 }, { "epoch": 0.7074823529411765, "grad_norm": 0.4180359050389419, "learning_rate": 2.3783580800501206e-06, "loss": 0.013720247149467468, "step": 75170 }, { "epoch": 0.7075294117647059, "grad_norm": 0.3885271871185037, "learning_rate": 2.378278983632845e-06, "loss": 0.015426804125308991, "step": 75175 }, { "epoch": 0.7075764705882353, "grad_norm": 0.6123231842074494, "learning_rate": 2.378199895106509e-06, "loss": 0.012411563098430634, "step": 75180 }, { "epoch": 0.7076235294117647, "grad_norm": 0.6267949376779156, "learning_rate": 2.3781208144698022e-06, "loss": 0.017752477526664735, "step": 75185 }, { "epoch": 0.7076705882352942, "grad_norm": 0.45812173374767434, "learning_rate": 2.3780417417214112e-06, "loss": 0.01209891140460968, "step": 75190 }, { "epoch": 0.7077176470588236, "grad_norm": 0.546103893391584, "learning_rate": 2.3779626768600257e-06, "loss": 0.017029008269309996, "step": 75195 }, { "epoch": 0.707764705882353, "grad_norm": 0.8376917921313564, "learning_rate": 2.3778836198843343e-06, "loss": 0.017474573850631715, "step": 75200 }, { "epoch": 0.7078117647058824, "grad_norm": 0.3935696949990578, "learning_rate": 2.3778045707930267e-06, "loss": 0.015137499570846558, "step": 75205 }, { "epoch": 0.7078588235294118, "grad_norm": 0.44620193561558136, "learning_rate": 2.377725529584792e-06, "loss": 0.015795868635177613, "step": 75210 }, { "epoch": 0.7079058823529412, "grad_norm": 0.5244379397997797, "learning_rate": 2.37764649625832e-06, "loss": 0.015698403120040894, "step": 75215 }, { "epoch": 0.7079529411764706, "grad_norm": 0.3983476110037019, "learning_rate": 2.377567470812301e-06, "loss": 0.014762881398200988, "step": 75220 }, { "epoch": 0.708, "grad_norm": 0.5937271158921209, "learning_rate": 2.3774884532454257e-06, "loss": 0.015203282237052917, "step": 75225 }, { "epoch": 0.7080470588235294, "grad_norm": 0.38327381922063625, "learning_rate": 2.3774094435563845e-06, "loss": 0.014663314819335938, "step": 75230 }, { "epoch": 0.7080941176470589, "grad_norm": 0.5355728104841172, "learning_rate": 2.377330441743869e-06, "loss": 0.014535924792289734, "step": 75235 }, { "epoch": 0.7081411764705883, "grad_norm": 0.3559599098497506, "learning_rate": 2.3772514478065704e-06, "loss": 0.012308456003665924, "step": 75240 }, { "epoch": 0.7081882352941177, "grad_norm": 0.6342950817667705, "learning_rate": 2.3771724617431798e-06, "loss": 0.012714412808418275, "step": 75245 }, { "epoch": 0.7082352941176471, "grad_norm": 0.513666080697932, "learning_rate": 2.3770934835523903e-06, "loss": 0.01006360501050949, "step": 75250 }, { "epoch": 0.7082823529411765, "grad_norm": 0.45976868063267146, "learning_rate": 2.3770145132328933e-06, "loss": 0.015761181712150574, "step": 75255 }, { "epoch": 0.7083294117647059, "grad_norm": 0.4420361799616642, "learning_rate": 2.3769355507833814e-06, "loss": 0.01413264572620392, "step": 75260 }, { "epoch": 0.7083764705882353, "grad_norm": 0.5696762791466494, "learning_rate": 2.3768565962025486e-06, "loss": 0.01540677398443222, "step": 75265 }, { "epoch": 0.7084235294117647, "grad_norm": 0.5776611394266313, "learning_rate": 2.376777649489087e-06, "loss": 0.018912607431411745, "step": 75270 }, { "epoch": 0.7084705882352941, "grad_norm": 0.5441073758726378, "learning_rate": 2.3766987106416904e-06, "loss": 0.0149079829454422, "step": 75275 }, { "epoch": 0.7085176470588235, "grad_norm": 0.5909752902080418, "learning_rate": 2.376619779659053e-06, "loss": 0.013531449437141418, "step": 75280 }, { "epoch": 0.708564705882353, "grad_norm": 0.5143172941174908, "learning_rate": 2.376540856539869e-06, "loss": 0.014553724229335785, "step": 75285 }, { "epoch": 0.7086117647058824, "grad_norm": 0.7898122993603438, "learning_rate": 2.376461941282832e-06, "loss": 0.01464543342590332, "step": 75290 }, { "epoch": 0.7086588235294118, "grad_norm": 0.6666623868318349, "learning_rate": 2.376383033886637e-06, "loss": 0.017482280731201172, "step": 75295 }, { "epoch": 0.7087058823529412, "grad_norm": 0.4982233040335358, "learning_rate": 2.3763041343499796e-06, "loss": 0.019962078332901, "step": 75300 }, { "epoch": 0.7087529411764706, "grad_norm": 0.4113225825381662, "learning_rate": 2.3762252426715545e-06, "loss": 0.014691823720932006, "step": 75305 }, { "epoch": 0.7088, "grad_norm": 0.5155784321053576, "learning_rate": 2.376146358850058e-06, "loss": 0.018103229999542236, "step": 75310 }, { "epoch": 0.7088470588235294, "grad_norm": 0.47708743295855, "learning_rate": 2.3760674828841855e-06, "loss": 0.015407320857048035, "step": 75315 }, { "epoch": 0.7088941176470588, "grad_norm": 0.5739507216937265, "learning_rate": 2.375988614772633e-06, "loss": 0.018156135082244874, "step": 75320 }, { "epoch": 0.7089411764705882, "grad_norm": 0.35968553232874256, "learning_rate": 2.3759097545140986e-06, "loss": 0.014862054586410522, "step": 75325 }, { "epoch": 0.7089882352941177, "grad_norm": 0.5036751689970863, "learning_rate": 2.3758309021072774e-06, "loss": 0.016282203793525695, "step": 75330 }, { "epoch": 0.7090352941176471, "grad_norm": 0.5235559654066663, "learning_rate": 2.3757520575508673e-06, "loss": 0.015610437095165252, "step": 75335 }, { "epoch": 0.7090823529411765, "grad_norm": 0.5177651179002235, "learning_rate": 2.375673220843566e-06, "loss": 0.012839284539222718, "step": 75340 }, { "epoch": 0.7091294117647059, "grad_norm": 0.45355049681218745, "learning_rate": 2.3755943919840704e-06, "loss": 0.01420174390077591, "step": 75345 }, { "epoch": 0.7091764705882353, "grad_norm": 0.5018348382949127, "learning_rate": 2.375515570971079e-06, "loss": 0.01162453293800354, "step": 75350 }, { "epoch": 0.7092235294117647, "grad_norm": 0.6128203260678085, "learning_rate": 2.375436757803291e-06, "loss": 0.01685210466384888, "step": 75355 }, { "epoch": 0.7092705882352941, "grad_norm": 0.42054522368895925, "learning_rate": 2.375357952479404e-06, "loss": 0.014673268795013428, "step": 75360 }, { "epoch": 0.7093176470588235, "grad_norm": 0.5381173419235156, "learning_rate": 2.3752791549981173e-06, "loss": 0.01694743037223816, "step": 75365 }, { "epoch": 0.7093647058823529, "grad_norm": 0.40059565164377614, "learning_rate": 2.3752003653581303e-06, "loss": 0.012217651307582855, "step": 75370 }, { "epoch": 0.7094117647058824, "grad_norm": 0.4429982620571719, "learning_rate": 2.375121583558142e-06, "loss": 0.015130923688411712, "step": 75375 }, { "epoch": 0.7094588235294118, "grad_norm": 0.4161016241872068, "learning_rate": 2.375042809596854e-06, "loss": 0.014010335505008697, "step": 75380 }, { "epoch": 0.7095058823529412, "grad_norm": 0.6156423790511719, "learning_rate": 2.3749640434729644e-06, "loss": 0.01721930205821991, "step": 75385 }, { "epoch": 0.7095529411764706, "grad_norm": 0.5259933249216092, "learning_rate": 2.3748852851851743e-06, "loss": 0.013383117318153382, "step": 75390 }, { "epoch": 0.7096, "grad_norm": 0.5112394628526824, "learning_rate": 2.3748065347321853e-06, "loss": 0.014814136922359467, "step": 75395 }, { "epoch": 0.7096470588235294, "grad_norm": 0.3437213291771569, "learning_rate": 2.374727792112698e-06, "loss": 0.013748684525489807, "step": 75400 }, { "epoch": 0.7096941176470588, "grad_norm": 0.6411424066741561, "learning_rate": 2.3746490573254136e-06, "loss": 0.013382871448993684, "step": 75405 }, { "epoch": 0.7097411764705882, "grad_norm": 0.46375817205171743, "learning_rate": 2.374570330369034e-06, "loss": 0.013079462945461274, "step": 75410 }, { "epoch": 0.7097882352941176, "grad_norm": 0.6690856260051821, "learning_rate": 2.3744916112422604e-06, "loss": 0.018256857991218567, "step": 75415 }, { "epoch": 0.709835294117647, "grad_norm": 0.3896577982581592, "learning_rate": 2.374412899943797e-06, "loss": 0.014366063475608825, "step": 75420 }, { "epoch": 0.7098823529411765, "grad_norm": 0.4352583596371489, "learning_rate": 2.3743341964723444e-06, "loss": 0.016581706702709198, "step": 75425 }, { "epoch": 0.7099294117647059, "grad_norm": 0.5377509548768654, "learning_rate": 2.374255500826606e-06, "loss": 0.01585726737976074, "step": 75430 }, { "epoch": 0.7099764705882353, "grad_norm": 0.819693220468038, "learning_rate": 2.374176813005286e-06, "loss": 0.024554330110549926, "step": 75435 }, { "epoch": 0.7100235294117647, "grad_norm": 0.5350937416980299, "learning_rate": 2.374098133007087e-06, "loss": 0.014863508939743041, "step": 75440 }, { "epoch": 0.7100705882352941, "grad_norm": 0.7918082536571658, "learning_rate": 2.374019460830713e-06, "loss": 0.017480719089508056, "step": 75445 }, { "epoch": 0.7101176470588235, "grad_norm": 0.3781040872061302, "learning_rate": 2.373940796474868e-06, "loss": 0.015774717926979064, "step": 75450 }, { "epoch": 0.7101647058823529, "grad_norm": 0.6822457070322422, "learning_rate": 2.3738621399382565e-06, "loss": 0.016640575230121614, "step": 75455 }, { "epoch": 0.7102117647058823, "grad_norm": 0.45629159093860533, "learning_rate": 2.373783491219583e-06, "loss": 0.014898781478405, "step": 75460 }, { "epoch": 0.7102588235294117, "grad_norm": 0.648581561171735, "learning_rate": 2.373704850317553e-06, "loss": 0.012777747213840484, "step": 75465 }, { "epoch": 0.7103058823529412, "grad_norm": 0.46542814907946956, "learning_rate": 2.3736262172308713e-06, "loss": 0.015370050072669983, "step": 75470 }, { "epoch": 0.7103529411764706, "grad_norm": 0.554576169581713, "learning_rate": 2.373547591958244e-06, "loss": 0.018188172578811647, "step": 75475 }, { "epoch": 0.7104, "grad_norm": 0.5028831181450254, "learning_rate": 2.3734689744983757e-06, "loss": 0.01580570638179779, "step": 75480 }, { "epoch": 0.7104470588235294, "grad_norm": 0.5766870835241176, "learning_rate": 2.3733903648499745e-06, "loss": 0.017876413464546204, "step": 75485 }, { "epoch": 0.7104941176470588, "grad_norm": 0.45167515360476307, "learning_rate": 2.3733117630117456e-06, "loss": 0.015446211397647857, "step": 75490 }, { "epoch": 0.7105411764705882, "grad_norm": 0.5652896277395416, "learning_rate": 2.3732331689823965e-06, "loss": 0.019590306282043456, "step": 75495 }, { "epoch": 0.7105882352941176, "grad_norm": 0.7492368766668135, "learning_rate": 2.3731545827606332e-06, "loss": 0.014866474270820617, "step": 75500 }, { "epoch": 0.710635294117647, "grad_norm": 0.48846147305459603, "learning_rate": 2.3730760043451655e-06, "loss": 0.012872803211212158, "step": 75505 }, { "epoch": 0.7106823529411764, "grad_norm": 0.5700448921016703, "learning_rate": 2.372997433734698e-06, "loss": 0.013992170989513397, "step": 75510 }, { "epoch": 0.7107294117647058, "grad_norm": 0.48240698116589764, "learning_rate": 2.372918870927941e-06, "loss": 0.017855712771415712, "step": 75515 }, { "epoch": 0.7107764705882353, "grad_norm": 0.2808769209046877, "learning_rate": 2.372840315923601e-06, "loss": 0.013080567121505737, "step": 75520 }, { "epoch": 0.7108235294117647, "grad_norm": 0.47937241654244006, "learning_rate": 2.3727617687203886e-06, "loss": 0.018135789036750793, "step": 75525 }, { "epoch": 0.7108705882352941, "grad_norm": 0.4442706051924598, "learning_rate": 2.3726832293170116e-06, "loss": 0.01189824640750885, "step": 75530 }, { "epoch": 0.7109176470588235, "grad_norm": 0.49293919330347896, "learning_rate": 2.3726046977121793e-06, "loss": 0.015284892916679383, "step": 75535 }, { "epoch": 0.7109647058823529, "grad_norm": 0.5290844887731031, "learning_rate": 2.3725261739046004e-06, "loss": 0.019164732098579405, "step": 75540 }, { "epoch": 0.7110117647058823, "grad_norm": 0.6518794248701087, "learning_rate": 2.3724476578929865e-06, "loss": 0.015393459796905517, "step": 75545 }, { "epoch": 0.7110588235294117, "grad_norm": 0.5976958534736978, "learning_rate": 2.3723691496760455e-06, "loss": 0.016264474391937254, "step": 75550 }, { "epoch": 0.7111058823529411, "grad_norm": 0.6493074802619581, "learning_rate": 2.37229064925249e-06, "loss": 0.01562855690717697, "step": 75555 }, { "epoch": 0.7111529411764705, "grad_norm": 0.4775195368305797, "learning_rate": 2.372212156621029e-06, "loss": 0.01767842471599579, "step": 75560 }, { "epoch": 0.7112, "grad_norm": 0.3615473222873764, "learning_rate": 2.3721336717803744e-06, "loss": 0.015250599384307862, "step": 75565 }, { "epoch": 0.7112470588235295, "grad_norm": 0.35087658472943195, "learning_rate": 2.3720551947292374e-06, "loss": 0.013268885016441346, "step": 75570 }, { "epoch": 0.7112941176470589, "grad_norm": 0.5133223577060878, "learning_rate": 2.3719767254663286e-06, "loss": 0.012574338912963867, "step": 75575 }, { "epoch": 0.7113411764705883, "grad_norm": 0.5772889341201319, "learning_rate": 2.3718982639903614e-06, "loss": 0.013227342069149018, "step": 75580 }, { "epoch": 0.7113882352941177, "grad_norm": 0.4896669067079214, "learning_rate": 2.3718198103000474e-06, "loss": 0.015310122072696686, "step": 75585 }, { "epoch": 0.711435294117647, "grad_norm": 0.3780688623899546, "learning_rate": 2.371741364394099e-06, "loss": 0.012273424118757249, "step": 75590 }, { "epoch": 0.7114823529411765, "grad_norm": 0.5553049259201254, "learning_rate": 2.371662926271229e-06, "loss": 0.016610485315322877, "step": 75595 }, { "epoch": 0.7115294117647059, "grad_norm": 0.7129104004653922, "learning_rate": 2.3715844959301494e-06, "loss": 0.018004533648490906, "step": 75600 }, { "epoch": 0.7115764705882353, "grad_norm": 0.4366723296236372, "learning_rate": 2.3715060733695757e-06, "loss": 0.014293625950813293, "step": 75605 }, { "epoch": 0.7116235294117647, "grad_norm": 0.3369940426187329, "learning_rate": 2.37142765858822e-06, "loss": 0.013305461406707764, "step": 75610 }, { "epoch": 0.7116705882352942, "grad_norm": 0.4145754068039877, "learning_rate": 2.3713492515847967e-06, "loss": 0.015987949073314668, "step": 75615 }, { "epoch": 0.7117176470588236, "grad_norm": 0.4338756702787431, "learning_rate": 2.37127085235802e-06, "loss": 0.013824549317359925, "step": 75620 }, { "epoch": 0.711764705882353, "grad_norm": 0.4522907235438738, "learning_rate": 2.3711924609066044e-06, "loss": 0.011254566162824631, "step": 75625 }, { "epoch": 0.7118117647058824, "grad_norm": 0.3987247973231021, "learning_rate": 2.3711140772292657e-06, "loss": 0.012082722783088685, "step": 75630 }, { "epoch": 0.7118588235294118, "grad_norm": 0.5434587708155425, "learning_rate": 2.3710357013247178e-06, "loss": 0.015173456072807312, "step": 75635 }, { "epoch": 0.7119058823529412, "grad_norm": 0.39286175570304427, "learning_rate": 2.3709573331916767e-06, "loss": 0.015259715914726257, "step": 75640 }, { "epoch": 0.7119529411764706, "grad_norm": 0.5476490347920293, "learning_rate": 2.370878972828858e-06, "loss": 0.011426056921482085, "step": 75645 }, { "epoch": 0.712, "grad_norm": 0.5506696253842183, "learning_rate": 2.3708006202349775e-06, "loss": 0.013697388768196105, "step": 75650 }, { "epoch": 0.7120470588235294, "grad_norm": 0.5463896804383163, "learning_rate": 2.3707222754087523e-06, "loss": 0.015208952128887177, "step": 75655 }, { "epoch": 0.7120941176470589, "grad_norm": 0.4981421969713632, "learning_rate": 2.3706439383488994e-06, "loss": 0.012800846993923188, "step": 75660 }, { "epoch": 0.7121411764705883, "grad_norm": 0.8237974778689358, "learning_rate": 2.370565609054134e-06, "loss": 0.018155859410762788, "step": 75665 }, { "epoch": 0.7121882352941177, "grad_norm": 0.49567319631566703, "learning_rate": 2.3704872875231743e-06, "loss": 0.022918945550918578, "step": 75670 }, { "epoch": 0.7122352941176471, "grad_norm": 0.4527218898319777, "learning_rate": 2.3704089737547386e-06, "loss": 0.015828409790992738, "step": 75675 }, { "epoch": 0.7122823529411765, "grad_norm": 0.47051581804106835, "learning_rate": 2.370330667747544e-06, "loss": 0.012817886471748353, "step": 75680 }, { "epoch": 0.7123294117647059, "grad_norm": 0.4983511667746667, "learning_rate": 2.3702523695003075e-06, "loss": 0.013499313592910766, "step": 75685 }, { "epoch": 0.7123764705882353, "grad_norm": 0.39394911959588425, "learning_rate": 2.370174079011749e-06, "loss": 0.016368719935417175, "step": 75690 }, { "epoch": 0.7124235294117647, "grad_norm": 0.46786428126273427, "learning_rate": 2.3700957962805877e-06, "loss": 0.014000038802623748, "step": 75695 }, { "epoch": 0.7124705882352941, "grad_norm": 0.4187582928040519, "learning_rate": 2.3700175213055414e-06, "loss": 0.016837182641029357, "step": 75700 }, { "epoch": 0.7125176470588235, "grad_norm": 0.6826444617678404, "learning_rate": 2.3699392540853295e-06, "loss": 0.016947641968727112, "step": 75705 }, { "epoch": 0.712564705882353, "grad_norm": 0.37042254179421297, "learning_rate": 2.3698609946186726e-06, "loss": 0.014784915745258332, "step": 75710 }, { "epoch": 0.7126117647058824, "grad_norm": 0.5101117878457966, "learning_rate": 2.3697827429042894e-06, "loss": 0.016250801086425782, "step": 75715 }, { "epoch": 0.7126588235294118, "grad_norm": 0.4680152493050725, "learning_rate": 2.3697044989409002e-06, "loss": 0.014199385046958923, "step": 75720 }, { "epoch": 0.7127058823529412, "grad_norm": 0.44248158880311056, "learning_rate": 2.369626262727227e-06, "loss": 0.01571963131427765, "step": 75725 }, { "epoch": 0.7127529411764706, "grad_norm": 0.49465039743975114, "learning_rate": 2.3695480342619885e-06, "loss": 0.01544736921787262, "step": 75730 }, { "epoch": 0.7128, "grad_norm": 0.5273436969407702, "learning_rate": 2.3694698135439074e-06, "loss": 0.014135763049125671, "step": 75735 }, { "epoch": 0.7128470588235294, "grad_norm": 0.40902295466446154, "learning_rate": 2.369391600571704e-06, "loss": 0.01270386278629303, "step": 75740 }, { "epoch": 0.7128941176470588, "grad_norm": 0.3543687384130079, "learning_rate": 2.369313395344101e-06, "loss": 0.015156346559524535, "step": 75745 }, { "epoch": 0.7129411764705882, "grad_norm": 0.43037192053874573, "learning_rate": 2.3692351978598195e-06, "loss": 0.021788351237773895, "step": 75750 }, { "epoch": 0.7129882352941177, "grad_norm": 0.30928777577783273, "learning_rate": 2.3691570081175826e-06, "loss": 0.013360612094402313, "step": 75755 }, { "epoch": 0.7130352941176471, "grad_norm": 0.4489065245950093, "learning_rate": 2.369078826116112e-06, "loss": 0.013498303294181824, "step": 75760 }, { "epoch": 0.7130823529411765, "grad_norm": 0.31042908970501226, "learning_rate": 2.3690006518541307e-06, "loss": 0.016444182395935057, "step": 75765 }, { "epoch": 0.7131294117647059, "grad_norm": 0.5104057718076759, "learning_rate": 2.3689224853303628e-06, "loss": 0.014260539412498474, "step": 75770 }, { "epoch": 0.7131764705882353, "grad_norm": 0.3474087645063178, "learning_rate": 2.3688443265435306e-06, "loss": 0.01126481145620346, "step": 75775 }, { "epoch": 0.7132235294117647, "grad_norm": 0.3235620577227008, "learning_rate": 2.368766175492358e-06, "loss": 0.013740625977516175, "step": 75780 }, { "epoch": 0.7132705882352941, "grad_norm": 0.5543739210318985, "learning_rate": 2.3686880321755697e-06, "loss": 0.013454322516918183, "step": 75785 }, { "epoch": 0.7133176470588235, "grad_norm": 0.5478600349389359, "learning_rate": 2.3686098965918897e-06, "loss": 0.01945016235113144, "step": 75790 }, { "epoch": 0.7133647058823529, "grad_norm": 0.40233236337999634, "learning_rate": 2.368531768740043e-06, "loss": 0.015584355592727661, "step": 75795 }, { "epoch": 0.7134117647058823, "grad_norm": 0.17874390520142694, "learning_rate": 2.3684536486187536e-06, "loss": 0.013923631608486175, "step": 75800 }, { "epoch": 0.7134588235294118, "grad_norm": 0.39350280865173654, "learning_rate": 2.3683755362267467e-06, "loss": 0.014654968678951264, "step": 75805 }, { "epoch": 0.7135058823529412, "grad_norm": 0.5489814028962191, "learning_rate": 2.3682974315627487e-06, "loss": 0.014940157532691956, "step": 75810 }, { "epoch": 0.7135529411764706, "grad_norm": 0.33051614727692175, "learning_rate": 2.368219334625486e-06, "loss": 0.011230215430259705, "step": 75815 }, { "epoch": 0.7136, "grad_norm": 0.43538350345996873, "learning_rate": 2.3681412454136823e-06, "loss": 0.02445513606071472, "step": 75820 }, { "epoch": 0.7136470588235294, "grad_norm": 0.782688298187035, "learning_rate": 2.3680631639260663e-06, "loss": 0.0134004145860672, "step": 75825 }, { "epoch": 0.7136941176470588, "grad_norm": 0.7256815756098006, "learning_rate": 2.367985090161364e-06, "loss": 0.015341462194919586, "step": 75830 }, { "epoch": 0.7137411764705882, "grad_norm": 0.6083756975141534, "learning_rate": 2.367907024118302e-06, "loss": 0.018511706590652467, "step": 75835 }, { "epoch": 0.7137882352941176, "grad_norm": 0.33997962319089725, "learning_rate": 2.367828965795608e-06, "loss": 0.015186689794063568, "step": 75840 }, { "epoch": 0.713835294117647, "grad_norm": 0.3705269915454143, "learning_rate": 2.3677509151920088e-06, "loss": 0.014604626595973969, "step": 75845 }, { "epoch": 0.7138823529411765, "grad_norm": 0.9244360846963052, "learning_rate": 2.367672872306233e-06, "loss": 0.011121708899736404, "step": 75850 }, { "epoch": 0.7139294117647059, "grad_norm": 0.480725518250864, "learning_rate": 2.367594837137009e-06, "loss": 0.01892722398042679, "step": 75855 }, { "epoch": 0.7139764705882353, "grad_norm": 0.45662013419392433, "learning_rate": 2.367516809683065e-06, "loss": 0.01929626315832138, "step": 75860 }, { "epoch": 0.7140235294117647, "grad_norm": 0.5046135194261208, "learning_rate": 2.3674387899431292e-06, "loss": 0.015764543414115907, "step": 75865 }, { "epoch": 0.7140705882352941, "grad_norm": 0.2951162180068203, "learning_rate": 2.367360777915931e-06, "loss": 0.019527295231819154, "step": 75870 }, { "epoch": 0.7141176470588235, "grad_norm": 0.5471103351848479, "learning_rate": 2.3672827736001998e-06, "loss": 0.01753508150577545, "step": 75875 }, { "epoch": 0.7141647058823529, "grad_norm": 0.3402928705117429, "learning_rate": 2.367204776994665e-06, "loss": 0.01324986070394516, "step": 75880 }, { "epoch": 0.7142117647058823, "grad_norm": 0.31153545277252215, "learning_rate": 2.3671267880980567e-06, "loss": 0.01254696249961853, "step": 75885 }, { "epoch": 0.7142588235294117, "grad_norm": 0.5237779217504278, "learning_rate": 2.3670488069091054e-06, "loss": 0.013319088518619538, "step": 75890 }, { "epoch": 0.7143058823529411, "grad_norm": 0.301634061466354, "learning_rate": 2.366970833426541e-06, "loss": 0.015062466263771057, "step": 75895 }, { "epoch": 0.7143529411764706, "grad_norm": 0.4837462997519706, "learning_rate": 2.3668928676490947e-06, "loss": 0.012890870869159698, "step": 75900 }, { "epoch": 0.7144, "grad_norm": 0.5610611652174733, "learning_rate": 2.3668149095754976e-06, "loss": 0.014811943471431731, "step": 75905 }, { "epoch": 0.7144470588235294, "grad_norm": 0.5048198039048737, "learning_rate": 2.366736959204481e-06, "loss": 0.01424955427646637, "step": 75910 }, { "epoch": 0.7144941176470588, "grad_norm": 0.6858736978984838, "learning_rate": 2.3666590165347765e-06, "loss": 0.018842554092407225, "step": 75915 }, { "epoch": 0.7145411764705882, "grad_norm": 0.8322996025113337, "learning_rate": 2.3665810815651156e-06, "loss": 0.02997828722000122, "step": 75920 }, { "epoch": 0.7145882352941176, "grad_norm": 0.5506703865142776, "learning_rate": 2.3665031542942316e-06, "loss": 0.016134078800678252, "step": 75925 }, { "epoch": 0.714635294117647, "grad_norm": 0.5005105756459913, "learning_rate": 2.366425234720857e-06, "loss": 0.014090408384799958, "step": 75930 }, { "epoch": 0.7146823529411764, "grad_norm": 0.5301962476044947, "learning_rate": 2.366347322843723e-06, "loss": 0.014380064606666566, "step": 75935 }, { "epoch": 0.7147294117647058, "grad_norm": 0.4239143956203204, "learning_rate": 2.3662694186615644e-06, "loss": 0.01615835726261139, "step": 75940 }, { "epoch": 0.7147764705882353, "grad_norm": 0.7678553568640732, "learning_rate": 2.366191522173114e-06, "loss": 0.02078622281551361, "step": 75945 }, { "epoch": 0.7148235294117647, "grad_norm": 0.47034680573748666, "learning_rate": 2.3661136333771066e-06, "loss": 0.017304812371730805, "step": 75950 }, { "epoch": 0.7148705882352941, "grad_norm": 0.6408086755181045, "learning_rate": 2.3660357522722736e-06, "loss": 0.018999400734901428, "step": 75955 }, { "epoch": 0.7149176470588235, "grad_norm": 0.4070205125768844, "learning_rate": 2.365957878857352e-06, "loss": 0.015654455125331878, "step": 75960 }, { "epoch": 0.714964705882353, "grad_norm": 0.7625048774990749, "learning_rate": 2.3658800131310753e-06, "loss": 0.015087887644767761, "step": 75965 }, { "epoch": 0.7150117647058823, "grad_norm": 0.5389434370131747, "learning_rate": 2.3658021550921777e-06, "loss": 0.01515050083398819, "step": 75970 }, { "epoch": 0.7150588235294117, "grad_norm": 0.3657631397844696, "learning_rate": 2.3657243047393955e-06, "loss": 0.019073909521102904, "step": 75975 }, { "epoch": 0.7151058823529411, "grad_norm": 0.5829974483302609, "learning_rate": 2.3656464620714638e-06, "loss": 0.015923865139484406, "step": 75980 }, { "epoch": 0.7151529411764705, "grad_norm": 0.33235623041971213, "learning_rate": 2.3655686270871183e-06, "loss": 0.012524423003196717, "step": 75985 }, { "epoch": 0.7152, "grad_norm": 0.6644214081735659, "learning_rate": 2.365490799785095e-06, "loss": 0.012551026046276092, "step": 75990 }, { "epoch": 0.7152470588235295, "grad_norm": 0.5985207509551335, "learning_rate": 2.3654129801641303e-06, "loss": 0.016495963931083678, "step": 75995 }, { "epoch": 0.7152941176470589, "grad_norm": 0.524269572650651, "learning_rate": 2.3653351682229612e-06, "loss": 0.016390621662139893, "step": 76000 }, { "epoch": 0.7153411764705883, "grad_norm": 0.4934704218891493, "learning_rate": 2.365257363960324e-06, "loss": 0.015246812999248505, "step": 76005 }, { "epoch": 0.7153882352941177, "grad_norm": 0.5631413619334391, "learning_rate": 2.365179567374956e-06, "loss": 0.015968939661979674, "step": 76010 }, { "epoch": 0.7154352941176471, "grad_norm": 0.4619020464777826, "learning_rate": 2.365101778465595e-06, "loss": 0.016235385835170747, "step": 76015 }, { "epoch": 0.7154823529411765, "grad_norm": 0.489271443753207, "learning_rate": 2.3650239972309786e-06, "loss": 0.013137036561965942, "step": 76020 }, { "epoch": 0.7155294117647059, "grad_norm": 0.61855004035829, "learning_rate": 2.3649462236698447e-06, "loss": 0.0156917929649353, "step": 76025 }, { "epoch": 0.7155764705882353, "grad_norm": 0.3899714335226109, "learning_rate": 2.364868457780932e-06, "loss": 0.014747534692287446, "step": 76030 }, { "epoch": 0.7156235294117647, "grad_norm": 0.49561789860018235, "learning_rate": 2.3647906995629792e-06, "loss": 0.019304129481315612, "step": 76035 }, { "epoch": 0.7156705882352942, "grad_norm": 0.6505560617894846, "learning_rate": 2.364712949014725e-06, "loss": 0.01945669800043106, "step": 76040 }, { "epoch": 0.7157176470588236, "grad_norm": 0.6894476085430454, "learning_rate": 2.3646352061349097e-06, "loss": 0.012792731821537017, "step": 76045 }, { "epoch": 0.715764705882353, "grad_norm": 0.7642410803715716, "learning_rate": 2.3645574709222705e-06, "loss": 0.01960398256778717, "step": 76050 }, { "epoch": 0.7158117647058824, "grad_norm": 0.42308795597148446, "learning_rate": 2.3644797433755493e-06, "loss": 0.01794877052307129, "step": 76055 }, { "epoch": 0.7158588235294118, "grad_norm": 0.541432913904417, "learning_rate": 2.3644020234934856e-06, "loss": 0.015794716775417328, "step": 76060 }, { "epoch": 0.7159058823529412, "grad_norm": 0.5269359244031838, "learning_rate": 2.3643243112748196e-06, "loss": 0.019204112887382507, "step": 76065 }, { "epoch": 0.7159529411764706, "grad_norm": 0.3759752643984715, "learning_rate": 2.364246606718292e-06, "loss": 0.01790654957294464, "step": 76070 }, { "epoch": 0.716, "grad_norm": 0.5150333012319924, "learning_rate": 2.3641689098226443e-06, "loss": 0.013760063052177429, "step": 76075 }, { "epoch": 0.7160470588235294, "grad_norm": 0.8074155101769724, "learning_rate": 2.364091220586617e-06, "loss": 0.017223671078681946, "step": 76080 }, { "epoch": 0.7160941176470588, "grad_norm": 0.42425533447524805, "learning_rate": 2.364013539008953e-06, "loss": 0.01861345171928406, "step": 76085 }, { "epoch": 0.7161411764705883, "grad_norm": 0.5015204209175943, "learning_rate": 2.3639358650883925e-06, "loss": 0.018934577703475952, "step": 76090 }, { "epoch": 0.7161882352941177, "grad_norm": 0.41110271503273205, "learning_rate": 2.3638581988236787e-06, "loss": 0.01582668423652649, "step": 76095 }, { "epoch": 0.7162352941176471, "grad_norm": 0.5014960644297267, "learning_rate": 2.3637805402135534e-06, "loss": 0.020316848158836366, "step": 76100 }, { "epoch": 0.7162823529411765, "grad_norm": 0.417065378350599, "learning_rate": 2.3637028892567595e-06, "loss": 0.013513705134391785, "step": 76105 }, { "epoch": 0.7163294117647059, "grad_norm": 0.5057277136999119, "learning_rate": 2.3636252459520406e-06, "loss": 0.015265101194381714, "step": 76110 }, { "epoch": 0.7163764705882353, "grad_norm": 0.5094163306684234, "learning_rate": 2.363547610298139e-06, "loss": 0.012208539247512817, "step": 76115 }, { "epoch": 0.7164235294117647, "grad_norm": 0.41560298959406533, "learning_rate": 2.3634699822938e-06, "loss": 0.01825603246688843, "step": 76120 }, { "epoch": 0.7164705882352941, "grad_norm": 0.5995715418922182, "learning_rate": 2.363392361937765e-06, "loss": 0.01801655888557434, "step": 76125 }, { "epoch": 0.7165176470588235, "grad_norm": 0.3821255775739109, "learning_rate": 2.3633147492287797e-06, "loss": 0.01470598578453064, "step": 76130 }, { "epoch": 0.716564705882353, "grad_norm": 0.4289963137111052, "learning_rate": 2.363237144165589e-06, "loss": 0.01673557609319687, "step": 76135 }, { "epoch": 0.7166117647058824, "grad_norm": 0.5062934391265864, "learning_rate": 2.363159546746936e-06, "loss": 0.017149247229099274, "step": 76140 }, { "epoch": 0.7166588235294118, "grad_norm": 0.5269611114423594, "learning_rate": 2.363081956971567e-06, "loss": 0.014630797505378722, "step": 76145 }, { "epoch": 0.7167058823529412, "grad_norm": 0.4772294304644294, "learning_rate": 2.363004374838228e-06, "loss": 0.01729484498500824, "step": 76150 }, { "epoch": 0.7167529411764706, "grad_norm": 0.5155512485725265, "learning_rate": 2.362926800345663e-06, "loss": 0.012853877246379852, "step": 76155 }, { "epoch": 0.7168, "grad_norm": 0.7770194013954458, "learning_rate": 2.362849233492618e-06, "loss": 0.013787499070167542, "step": 76160 }, { "epoch": 0.7168470588235294, "grad_norm": 0.3894076013004775, "learning_rate": 2.36277167427784e-06, "loss": 0.012813866138458252, "step": 76165 }, { "epoch": 0.7168941176470588, "grad_norm": 0.6485510614722012, "learning_rate": 2.3626941227000756e-06, "loss": 0.01538873165845871, "step": 76170 }, { "epoch": 0.7169411764705882, "grad_norm": 0.6422231286685106, "learning_rate": 2.3626165787580706e-06, "loss": 0.016098056733608247, "step": 76175 }, { "epoch": 0.7169882352941176, "grad_norm": 0.4916486033003392, "learning_rate": 2.362539042450573e-06, "loss": 0.015412138402462005, "step": 76180 }, { "epoch": 0.7170352941176471, "grad_norm": 0.5646097812865394, "learning_rate": 2.3624615137763297e-06, "loss": 0.013988512754440307, "step": 76185 }, { "epoch": 0.7170823529411765, "grad_norm": 0.6467977629538418, "learning_rate": 2.362383992734088e-06, "loss": 0.017625151574611662, "step": 76190 }, { "epoch": 0.7171294117647059, "grad_norm": 0.5403170001360503, "learning_rate": 2.362306479322597e-06, "loss": 0.015027454495429993, "step": 76195 }, { "epoch": 0.7171764705882353, "grad_norm": 0.46251730838425603, "learning_rate": 2.362228973540603e-06, "loss": 0.014845067262649536, "step": 76200 }, { "epoch": 0.7172235294117647, "grad_norm": 0.5994737995833296, "learning_rate": 2.362151475386856e-06, "loss": 0.016535697877407073, "step": 76205 }, { "epoch": 0.7172705882352941, "grad_norm": 0.5153109285198919, "learning_rate": 2.3620739848601047e-06, "loss": 0.01743185818195343, "step": 76210 }, { "epoch": 0.7173176470588235, "grad_norm": 0.701707993925501, "learning_rate": 2.3619965019590974e-06, "loss": 0.016001537442207336, "step": 76215 }, { "epoch": 0.7173647058823529, "grad_norm": 0.643275666017531, "learning_rate": 2.3619190266825836e-06, "loss": 0.0156881183385849, "step": 76220 }, { "epoch": 0.7174117647058823, "grad_norm": 0.45036453277281724, "learning_rate": 2.3618415590293138e-06, "loss": 0.01670180559158325, "step": 76225 }, { "epoch": 0.7174588235294118, "grad_norm": 0.4983740174577069, "learning_rate": 2.361764098998037e-06, "loss": 0.016535264253616334, "step": 76230 }, { "epoch": 0.7175058823529412, "grad_norm": 0.35699411152024574, "learning_rate": 2.3616866465875036e-06, "loss": 0.01616959571838379, "step": 76235 }, { "epoch": 0.7175529411764706, "grad_norm": 0.6402043902934099, "learning_rate": 2.3616092017964644e-06, "loss": 0.014098703861236572, "step": 76240 }, { "epoch": 0.7176, "grad_norm": 0.5131341997409948, "learning_rate": 2.3615317646236697e-06, "loss": 0.015019720792770386, "step": 76245 }, { "epoch": 0.7176470588235294, "grad_norm": 0.30089026162036525, "learning_rate": 2.3614543350678713e-06, "loss": 0.013263365626335144, "step": 76250 }, { "epoch": 0.7176941176470588, "grad_norm": 0.32523187876931514, "learning_rate": 2.3613769131278198e-06, "loss": 0.013325144350528718, "step": 76255 }, { "epoch": 0.7177411764705882, "grad_norm": 0.5751714304399457, "learning_rate": 2.361299498802267e-06, "loss": 0.01666800379753113, "step": 76260 }, { "epoch": 0.7177882352941176, "grad_norm": 0.5926469926860697, "learning_rate": 2.3612220920899655e-06, "loss": 0.01804048866033554, "step": 76265 }, { "epoch": 0.717835294117647, "grad_norm": 0.35853728091951476, "learning_rate": 2.3611446929896666e-06, "loss": 0.01846521645784378, "step": 76270 }, { "epoch": 0.7178823529411764, "grad_norm": 0.38612940613312907, "learning_rate": 2.3610673015001234e-06, "loss": 0.01957162469625473, "step": 76275 }, { "epoch": 0.7179294117647059, "grad_norm": 0.44908581415689214, "learning_rate": 2.3609899176200883e-06, "loss": 0.015047374367713928, "step": 76280 }, { "epoch": 0.7179764705882353, "grad_norm": 0.5639349767989776, "learning_rate": 2.360912541348315e-06, "loss": 0.015021073818206786, "step": 76285 }, { "epoch": 0.7180235294117647, "grad_norm": 0.3730383311948673, "learning_rate": 2.3608351726835565e-06, "loss": 0.017535318434238435, "step": 76290 }, { "epoch": 0.7180705882352941, "grad_norm": 0.43947917686319526, "learning_rate": 2.360757811624566e-06, "loss": 0.01410050094127655, "step": 76295 }, { "epoch": 0.7181176470588235, "grad_norm": 0.3297823142745317, "learning_rate": 2.3606804581700975e-06, "loss": 0.015511815249919892, "step": 76300 }, { "epoch": 0.7181647058823529, "grad_norm": 0.36738239324286953, "learning_rate": 2.360603112318906e-06, "loss": 0.01643776297569275, "step": 76305 }, { "epoch": 0.7182117647058823, "grad_norm": 0.44046472650630497, "learning_rate": 2.3605257740697453e-06, "loss": 0.014647218585014343, "step": 76310 }, { "epoch": 0.7182588235294117, "grad_norm": 0.3385361179452584, "learning_rate": 2.3604484434213705e-06, "loss": 0.0150750532746315, "step": 76315 }, { "epoch": 0.7183058823529411, "grad_norm": 0.43190507958512153, "learning_rate": 2.3603711203725367e-06, "loss": 0.018986910581588745, "step": 76320 }, { "epoch": 0.7183529411764706, "grad_norm": 0.5704715922444796, "learning_rate": 2.3602938049219997e-06, "loss": 0.014816805720329285, "step": 76325 }, { "epoch": 0.7184, "grad_norm": 0.6173180333655534, "learning_rate": 2.360216497068514e-06, "loss": 0.017831350862979888, "step": 76330 }, { "epoch": 0.7184470588235294, "grad_norm": 0.3856990625342202, "learning_rate": 2.360139196810836e-06, "loss": 0.01793069839477539, "step": 76335 }, { "epoch": 0.7184941176470588, "grad_norm": 0.2951197996402534, "learning_rate": 2.3600619041477225e-06, "loss": 0.014578036963939667, "step": 76340 }, { "epoch": 0.7185411764705882, "grad_norm": 0.41367368685732786, "learning_rate": 2.359984619077929e-06, "loss": 0.016157159209251405, "step": 76345 }, { "epoch": 0.7185882352941176, "grad_norm": 0.4539795690203111, "learning_rate": 2.359907341600213e-06, "loss": 0.014955246448516845, "step": 76350 }, { "epoch": 0.718635294117647, "grad_norm": 0.2909602858179016, "learning_rate": 2.3598300717133315e-06, "loss": 0.016802436113357543, "step": 76355 }, { "epoch": 0.7186823529411764, "grad_norm": 0.3338815396994053, "learning_rate": 2.3597528094160423e-06, "loss": 0.01864044964313507, "step": 76360 }, { "epoch": 0.7187294117647058, "grad_norm": 1.687045730556923, "learning_rate": 2.359675554707102e-06, "loss": 0.015780109167099, "step": 76365 }, { "epoch": 0.7187764705882352, "grad_norm": 0.5402406496340909, "learning_rate": 2.359598307585269e-06, "loss": 0.016635498404502867, "step": 76370 }, { "epoch": 0.7188235294117648, "grad_norm": 0.5520356666590324, "learning_rate": 2.359521068049302e-06, "loss": 0.014056015014648437, "step": 76375 }, { "epoch": 0.7188705882352942, "grad_norm": 0.3370931716365211, "learning_rate": 2.359443836097959e-06, "loss": 0.01371997743844986, "step": 76380 }, { "epoch": 0.7189176470588236, "grad_norm": 0.49622724326238987, "learning_rate": 2.3593666117299983e-06, "loss": 0.018716903030872346, "step": 76385 }, { "epoch": 0.718964705882353, "grad_norm": 0.5060890606128015, "learning_rate": 2.35928939494418e-06, "loss": 0.018106773495674133, "step": 76390 }, { "epoch": 0.7190117647058824, "grad_norm": 0.42277049774759334, "learning_rate": 2.3592121857392627e-06, "loss": 0.012154991924762725, "step": 76395 }, { "epoch": 0.7190588235294118, "grad_norm": 0.5825339802887319, "learning_rate": 2.3591349841140067e-06, "loss": 0.013384997844696045, "step": 76400 }, { "epoch": 0.7191058823529412, "grad_norm": 0.43618481014052907, "learning_rate": 2.359057790067171e-06, "loss": 0.017701554298400878, "step": 76405 }, { "epoch": 0.7191529411764706, "grad_norm": 0.6396462643427705, "learning_rate": 2.3589806035975163e-06, "loss": 0.019304236769676207, "step": 76410 }, { "epoch": 0.7192, "grad_norm": 0.31829338027876514, "learning_rate": 2.3589034247038036e-06, "loss": 0.009700141847133636, "step": 76415 }, { "epoch": 0.7192470588235295, "grad_norm": 0.5539499366542205, "learning_rate": 2.3588262533847927e-06, "loss": 0.01477048546075821, "step": 76420 }, { "epoch": 0.7192941176470589, "grad_norm": 0.41331405111749653, "learning_rate": 2.358749089639245e-06, "loss": 0.015175233781337737, "step": 76425 }, { "epoch": 0.7193411764705883, "grad_norm": 0.29417049536775003, "learning_rate": 2.358671933465922e-06, "loss": 0.013089214265346528, "step": 76430 }, { "epoch": 0.7193882352941177, "grad_norm": 0.6903272707957476, "learning_rate": 2.3585947848635855e-06, "loss": 0.014065222442150116, "step": 76435 }, { "epoch": 0.7194352941176471, "grad_norm": 0.3713434554357189, "learning_rate": 2.3585176438309974e-06, "loss": 0.01581951081752777, "step": 76440 }, { "epoch": 0.7194823529411765, "grad_norm": 1.1936589267982314, "learning_rate": 2.358440510366919e-06, "loss": 0.01483590602874756, "step": 76445 }, { "epoch": 0.7195294117647059, "grad_norm": 3.6241895245016162, "learning_rate": 2.358363384470114e-06, "loss": 0.015565672516822815, "step": 76450 }, { "epoch": 0.7195764705882353, "grad_norm": 0.7103877409323534, "learning_rate": 2.3582862661393444e-06, "loss": 0.016249257326126098, "step": 76455 }, { "epoch": 0.7196235294117647, "grad_norm": 0.35948259103968755, "learning_rate": 2.358209155373373e-06, "loss": 0.017654460668563843, "step": 76460 }, { "epoch": 0.7196705882352942, "grad_norm": 0.46914679831011846, "learning_rate": 2.3581320521709636e-06, "loss": 0.012971657514572143, "step": 76465 }, { "epoch": 0.7197176470588236, "grad_norm": 0.4989705484246742, "learning_rate": 2.3580549565308804e-06, "loss": 0.011937204003334045, "step": 76470 }, { "epoch": 0.719764705882353, "grad_norm": 0.4496109619607771, "learning_rate": 2.3579778684518865e-06, "loss": 0.016687488555908202, "step": 76475 }, { "epoch": 0.7198117647058824, "grad_norm": 0.3975173034797255, "learning_rate": 2.3579007879327454e-06, "loss": 0.018161213397979735, "step": 76480 }, { "epoch": 0.7198588235294118, "grad_norm": 0.3505859501803923, "learning_rate": 2.357823714972223e-06, "loss": 0.015205606818199158, "step": 76485 }, { "epoch": 0.7199058823529412, "grad_norm": 0.33551769822649274, "learning_rate": 2.3577466495690833e-06, "loss": 0.014261049032211304, "step": 76490 }, { "epoch": 0.7199529411764706, "grad_norm": 0.38219981860076957, "learning_rate": 2.357669591722091e-06, "loss": 0.01510760337114334, "step": 76495 }, { "epoch": 0.72, "grad_norm": 0.5959282306879962, "learning_rate": 2.357592541430012e-06, "loss": 0.016711360216140746, "step": 76500 }, { "epoch": 0.7200470588235294, "grad_norm": 0.3132303465315538, "learning_rate": 2.357515498691612e-06, "loss": 0.017362138628959654, "step": 76505 }, { "epoch": 0.7200941176470588, "grad_norm": 0.5598370180294894, "learning_rate": 2.357438463505656e-06, "loss": 0.014162012934684753, "step": 76510 }, { "epoch": 0.7201411764705883, "grad_norm": 0.6435774870922761, "learning_rate": 2.3573614358709113e-06, "loss": 0.015109953284263612, "step": 76515 }, { "epoch": 0.7201882352941177, "grad_norm": 0.4978072578122095, "learning_rate": 2.3572844157861433e-06, "loss": 0.015067625045776366, "step": 76520 }, { "epoch": 0.7202352941176471, "grad_norm": 0.45053947783401177, "learning_rate": 2.35720740325012e-06, "loss": 0.01476643979549408, "step": 76525 }, { "epoch": 0.7202823529411765, "grad_norm": 0.3613635266114804, "learning_rate": 2.3571303982616063e-06, "loss": 0.01542128026485443, "step": 76530 }, { "epoch": 0.7203294117647059, "grad_norm": 0.27647364548270925, "learning_rate": 2.357053400819371e-06, "loss": 0.012753540277481079, "step": 76535 }, { "epoch": 0.7203764705882353, "grad_norm": 0.47209528305899184, "learning_rate": 2.356976410922181e-06, "loss": 0.01756037175655365, "step": 76540 }, { "epoch": 0.7204235294117647, "grad_norm": 0.39747711020549614, "learning_rate": 2.3568994285688054e-06, "loss": 0.0123406782746315, "step": 76545 }, { "epoch": 0.7204705882352941, "grad_norm": 0.6132666561864248, "learning_rate": 2.3568224537580107e-06, "loss": 0.01376519799232483, "step": 76550 }, { "epoch": 0.7205176470588235, "grad_norm": 0.4405894808392952, "learning_rate": 2.3567454864885663e-06, "loss": 0.014197732508182525, "step": 76555 }, { "epoch": 0.720564705882353, "grad_norm": 0.4448983899397537, "learning_rate": 2.3566685267592407e-06, "loss": 0.015260982513427734, "step": 76560 }, { "epoch": 0.7206117647058824, "grad_norm": 0.5197061177110617, "learning_rate": 2.3565915745688023e-06, "loss": 0.015316173434257507, "step": 76565 }, { "epoch": 0.7206588235294118, "grad_norm": 0.6778761449157723, "learning_rate": 2.3565146299160215e-06, "loss": 0.01584511399269104, "step": 76570 }, { "epoch": 0.7207058823529412, "grad_norm": 0.5866972733096427, "learning_rate": 2.3564376927996664e-06, "loss": 0.021514050662517548, "step": 76575 }, { "epoch": 0.7207529411764706, "grad_norm": 0.3822411474870798, "learning_rate": 2.3563607632185075e-06, "loss": 0.015985670685768127, "step": 76580 }, { "epoch": 0.7208, "grad_norm": 0.5345709876446517, "learning_rate": 2.3562838411713153e-06, "loss": 0.021161162853240968, "step": 76585 }, { "epoch": 0.7208470588235294, "grad_norm": 0.6850195520539174, "learning_rate": 2.3562069266568596e-06, "loss": 0.015152528882026672, "step": 76590 }, { "epoch": 0.7208941176470588, "grad_norm": 0.4070732662156372, "learning_rate": 2.3561300196739116e-06, "loss": 0.019236572086811066, "step": 76595 }, { "epoch": 0.7209411764705882, "grad_norm": 0.5997742023942098, "learning_rate": 2.3560531202212416e-06, "loss": 0.015163266658782959, "step": 76600 }, { "epoch": 0.7209882352941176, "grad_norm": 0.6260286615157781, "learning_rate": 2.3559762282976213e-06, "loss": 0.01576220542192459, "step": 76605 }, { "epoch": 0.7210352941176471, "grad_norm": 0.34927579344308496, "learning_rate": 2.355899343901821e-06, "loss": 0.016858604550361634, "step": 76610 }, { "epoch": 0.7210823529411765, "grad_norm": 0.7431225989510428, "learning_rate": 2.3558224670326144e-06, "loss": 0.017749108374118805, "step": 76615 }, { "epoch": 0.7211294117647059, "grad_norm": 0.4547154218898127, "learning_rate": 2.3557455976887723e-06, "loss": 0.016247640550136565, "step": 76620 }, { "epoch": 0.7211764705882353, "grad_norm": 0.5902251466168196, "learning_rate": 2.355668735869068e-06, "loss": 0.016698965430259706, "step": 76625 }, { "epoch": 0.7212235294117647, "grad_norm": 0.4955846995933193, "learning_rate": 2.355591881572273e-06, "loss": 0.01513725221157074, "step": 76630 }, { "epoch": 0.7212705882352941, "grad_norm": 0.42261407739957646, "learning_rate": 2.3555150347971603e-06, "loss": 0.015755778551101683, "step": 76635 }, { "epoch": 0.7213176470588235, "grad_norm": 0.6513266551262004, "learning_rate": 2.355438195542504e-06, "loss": 0.02039945125579834, "step": 76640 }, { "epoch": 0.7213647058823529, "grad_norm": 0.43249456103628087, "learning_rate": 2.3553613638070768e-06, "loss": 0.01973299980163574, "step": 76645 }, { "epoch": 0.7214117647058823, "grad_norm": 0.47132650543772764, "learning_rate": 2.355284539589652e-06, "loss": 0.015433117747306824, "step": 76650 }, { "epoch": 0.7214588235294118, "grad_norm": 0.37028217299592886, "learning_rate": 2.355207722889005e-06, "loss": 0.017160648107528688, "step": 76655 }, { "epoch": 0.7215058823529412, "grad_norm": 0.5093001182179581, "learning_rate": 2.355130913703909e-06, "loss": 0.012665566802024842, "step": 76660 }, { "epoch": 0.7215529411764706, "grad_norm": 0.280587876617723, "learning_rate": 2.355054112033139e-06, "loss": 0.013471260666847229, "step": 76665 }, { "epoch": 0.7216, "grad_norm": 0.5695501430175919, "learning_rate": 2.3549773178754696e-06, "loss": 0.013684403896331788, "step": 76670 }, { "epoch": 0.7216470588235294, "grad_norm": 0.3229208777197547, "learning_rate": 2.3549005312296764e-06, "loss": 0.013606974482536316, "step": 76675 }, { "epoch": 0.7216941176470588, "grad_norm": 0.49403970875504344, "learning_rate": 2.3548237520945345e-06, "loss": 0.013130387663841248, "step": 76680 }, { "epoch": 0.7217411764705882, "grad_norm": 0.5840305254661825, "learning_rate": 2.354746980468819e-06, "loss": 0.014609222114086152, "step": 76685 }, { "epoch": 0.7217882352941176, "grad_norm": 0.5055961914929962, "learning_rate": 2.354670216351307e-06, "loss": 0.015192800760269165, "step": 76690 }, { "epoch": 0.721835294117647, "grad_norm": 0.40880672046001887, "learning_rate": 2.3545934597407743e-06, "loss": 0.0160731241106987, "step": 76695 }, { "epoch": 0.7218823529411764, "grad_norm": 0.6809983518506574, "learning_rate": 2.354516710635997e-06, "loss": 0.014313572645187378, "step": 76700 }, { "epoch": 0.7219294117647059, "grad_norm": 0.36746443659637196, "learning_rate": 2.3544399690357523e-06, "loss": 0.016367501020431517, "step": 76705 }, { "epoch": 0.7219764705882353, "grad_norm": 0.3757424388123265, "learning_rate": 2.3543632349388175e-06, "loss": 0.013680800795555115, "step": 76710 }, { "epoch": 0.7220235294117647, "grad_norm": 0.436106300014686, "learning_rate": 2.35428650834397e-06, "loss": 0.011047768592834472, "step": 76715 }, { "epoch": 0.7220705882352941, "grad_norm": 0.36228093346149387, "learning_rate": 2.354209789249987e-06, "loss": 0.016770441830158234, "step": 76720 }, { "epoch": 0.7221176470588235, "grad_norm": 0.6897684068689178, "learning_rate": 2.3541330776556463e-06, "loss": 0.014549437165260314, "step": 76725 }, { "epoch": 0.7221647058823529, "grad_norm": 0.4428148124852651, "learning_rate": 2.354056373559727e-06, "loss": 0.014507350325584412, "step": 76730 }, { "epoch": 0.7222117647058823, "grad_norm": 0.5453188240324262, "learning_rate": 2.353979676961006e-06, "loss": 0.022225943207740784, "step": 76735 }, { "epoch": 0.7222588235294117, "grad_norm": 0.5648540292630438, "learning_rate": 2.353902987858264e-06, "loss": 0.0168831005692482, "step": 76740 }, { "epoch": 0.7223058823529411, "grad_norm": 0.4760480576879618, "learning_rate": 2.353826306250279e-06, "loss": 0.011401426792144776, "step": 76745 }, { "epoch": 0.7223529411764706, "grad_norm": 0.4715672021229086, "learning_rate": 2.35374963213583e-06, "loss": 0.015050706267356873, "step": 76750 }, { "epoch": 0.7224, "grad_norm": 0.33719335347152946, "learning_rate": 2.3536729655136976e-06, "loss": 0.01299210786819458, "step": 76755 }, { "epoch": 0.7224470588235294, "grad_norm": 0.5525602801152261, "learning_rate": 2.3535963063826604e-06, "loss": 0.01726205199956894, "step": 76760 }, { "epoch": 0.7224941176470588, "grad_norm": 0.3186859793710698, "learning_rate": 2.3535196547414993e-06, "loss": 0.018771621584892272, "step": 76765 }, { "epoch": 0.7225411764705882, "grad_norm": 0.649662160798044, "learning_rate": 2.353443010588995e-06, "loss": 0.01536841094493866, "step": 76770 }, { "epoch": 0.7225882352941176, "grad_norm": 0.8829691463736593, "learning_rate": 2.3533663739239284e-06, "loss": 0.0188930481672287, "step": 76775 }, { "epoch": 0.722635294117647, "grad_norm": 0.590937762136777, "learning_rate": 2.353289744745079e-06, "loss": 0.01579010784626007, "step": 76780 }, { "epoch": 0.7226823529411764, "grad_norm": 0.40551431936735344, "learning_rate": 2.3532131230512294e-06, "loss": 0.021947988867759706, "step": 76785 }, { "epoch": 0.7227294117647058, "grad_norm": 0.5563381709608948, "learning_rate": 2.353136508841161e-06, "loss": 0.018406420946121216, "step": 76790 }, { "epoch": 0.7227764705882352, "grad_norm": 0.5838088420383228, "learning_rate": 2.3530599021136557e-06, "loss": 0.015123000741004944, "step": 76795 }, { "epoch": 0.7228235294117648, "grad_norm": 0.5221713146604884, "learning_rate": 2.3529833028674947e-06, "loss": 0.012734149396419526, "step": 76800 }, { "epoch": 0.7228705882352942, "grad_norm": 0.38676570844608005, "learning_rate": 2.3529067111014613e-06, "loss": 0.014056438207626342, "step": 76805 }, { "epoch": 0.7229176470588236, "grad_norm": 0.45396934609534983, "learning_rate": 2.3528301268143374e-06, "loss": 0.015152648091316223, "step": 76810 }, { "epoch": 0.722964705882353, "grad_norm": 0.7109162964277546, "learning_rate": 2.3527535500049073e-06, "loss": 0.0162484809756279, "step": 76815 }, { "epoch": 0.7230117647058824, "grad_norm": 0.35154030551020715, "learning_rate": 2.3526769806719524e-06, "loss": 0.012699839472770692, "step": 76820 }, { "epoch": 0.7230588235294118, "grad_norm": 0.6719462612068243, "learning_rate": 2.3526004188142576e-06, "loss": 0.019636158645153046, "step": 76825 }, { "epoch": 0.7231058823529412, "grad_norm": 0.40000559210030623, "learning_rate": 2.3525238644306062e-06, "loss": 0.015481318533420562, "step": 76830 }, { "epoch": 0.7231529411764706, "grad_norm": 0.5303413555683213, "learning_rate": 2.352447317519782e-06, "loss": 0.019972696900367737, "step": 76835 }, { "epoch": 0.7232, "grad_norm": 0.3698372262480173, "learning_rate": 2.352370778080569e-06, "loss": 0.01342678964138031, "step": 76840 }, { "epoch": 0.7232470588235295, "grad_norm": 0.413148684315611, "learning_rate": 2.352294246111753e-06, "loss": 0.013656261563301086, "step": 76845 }, { "epoch": 0.7232941176470589, "grad_norm": 0.6491514444279095, "learning_rate": 2.352217721612118e-06, "loss": 0.01372487097978592, "step": 76850 }, { "epoch": 0.7233411764705883, "grad_norm": 0.5827251546962797, "learning_rate": 2.352141204580449e-06, "loss": 0.016937044262886048, "step": 76855 }, { "epoch": 0.7233882352941177, "grad_norm": 0.4615954924426904, "learning_rate": 2.3520646950155315e-06, "loss": 0.015935218334197997, "step": 76860 }, { "epoch": 0.7234352941176471, "grad_norm": 0.4719180549814216, "learning_rate": 2.351988192916152e-06, "loss": 0.014621458947658539, "step": 76865 }, { "epoch": 0.7234823529411765, "grad_norm": 0.5069966349309978, "learning_rate": 2.3519116982810957e-06, "loss": 0.01442718505859375, "step": 76870 }, { "epoch": 0.7235294117647059, "grad_norm": 0.7455588270782582, "learning_rate": 2.3518352111091487e-06, "loss": 0.01698295772075653, "step": 76875 }, { "epoch": 0.7235764705882353, "grad_norm": 0.5482577162920962, "learning_rate": 2.351758731399099e-06, "loss": 0.019591861963272096, "step": 76880 }, { "epoch": 0.7236235294117647, "grad_norm": 0.4479717360958297, "learning_rate": 2.3516822591497317e-06, "loss": 0.02045457363128662, "step": 76885 }, { "epoch": 0.7236705882352941, "grad_norm": 0.5275742594407963, "learning_rate": 2.351605794359834e-06, "loss": 0.015635761618614196, "step": 76890 }, { "epoch": 0.7237176470588236, "grad_norm": 0.4625467004803295, "learning_rate": 2.351529337028194e-06, "loss": 0.012839971482753754, "step": 76895 }, { "epoch": 0.723764705882353, "grad_norm": 0.6332080424621895, "learning_rate": 2.3514528871535994e-06, "loss": 0.012649926543235778, "step": 76900 }, { "epoch": 0.7238117647058824, "grad_norm": 0.5309569264098617, "learning_rate": 2.3513764447348377e-06, "loss": 0.01895945370197296, "step": 76905 }, { "epoch": 0.7238588235294118, "grad_norm": 0.4173940305056856, "learning_rate": 2.351300009770697e-06, "loss": 0.016831083595752715, "step": 76910 }, { "epoch": 0.7239058823529412, "grad_norm": 0.7350634044037296, "learning_rate": 2.3512235822599657e-06, "loss": 0.016888460516929625, "step": 76915 }, { "epoch": 0.7239529411764706, "grad_norm": 0.6646268435497951, "learning_rate": 2.3511471622014333e-06, "loss": 0.012857705354690552, "step": 76920 }, { "epoch": 0.724, "grad_norm": 0.5496648801623383, "learning_rate": 2.351070749593888e-06, "loss": 0.014663457870483398, "step": 76925 }, { "epoch": 0.7240470588235294, "grad_norm": 0.3546914112273754, "learning_rate": 2.3509943444361193e-06, "loss": 0.014209896326065063, "step": 76930 }, { "epoch": 0.7240941176470588, "grad_norm": 0.38513735502403446, "learning_rate": 2.350917946726917e-06, "loss": 0.014849495887756348, "step": 76935 }, { "epoch": 0.7241411764705883, "grad_norm": 0.3606262237882284, "learning_rate": 2.3508415564650706e-06, "loss": 0.012909649312496186, "step": 76940 }, { "epoch": 0.7241882352941177, "grad_norm": 0.4848881270970531, "learning_rate": 2.3507651736493704e-06, "loss": 0.014088074862957, "step": 76945 }, { "epoch": 0.7242352941176471, "grad_norm": 0.3900940589393342, "learning_rate": 2.3506887982786065e-06, "loss": 0.01701710820198059, "step": 76950 }, { "epoch": 0.7242823529411765, "grad_norm": 0.4032144510608111, "learning_rate": 2.35061243035157e-06, "loss": 0.013708318769931793, "step": 76955 }, { "epoch": 0.7243294117647059, "grad_norm": 0.4789830647190619, "learning_rate": 2.350536069867052e-06, "loss": 0.018696328997611998, "step": 76960 }, { "epoch": 0.7243764705882353, "grad_norm": 0.41367280348672336, "learning_rate": 2.3504597168238435e-06, "loss": 0.014445269107818603, "step": 76965 }, { "epoch": 0.7244235294117647, "grad_norm": 0.5362280513955785, "learning_rate": 2.3503833712207354e-06, "loss": 0.02357480227947235, "step": 76970 }, { "epoch": 0.7244705882352941, "grad_norm": 0.4370601809561231, "learning_rate": 2.35030703305652e-06, "loss": 0.011420002579689026, "step": 76975 }, { "epoch": 0.7245176470588235, "grad_norm": 0.42258871234938006, "learning_rate": 2.35023070232999e-06, "loss": 0.017807713150978087, "step": 76980 }, { "epoch": 0.7245647058823529, "grad_norm": 0.6824749530518265, "learning_rate": 2.350154379039936e-06, "loss": 0.012173054367303848, "step": 76985 }, { "epoch": 0.7246117647058824, "grad_norm": 0.40049172116806553, "learning_rate": 2.350078063185152e-06, "loss": 0.01750391274690628, "step": 76990 }, { "epoch": 0.7246588235294118, "grad_norm": 0.3249912707922927, "learning_rate": 2.35000175476443e-06, "loss": 0.017162820696830748, "step": 76995 }, { "epoch": 0.7247058823529412, "grad_norm": 0.875903790156634, "learning_rate": 2.3499254537765645e-06, "loss": 0.017341598868370056, "step": 77000 }, { "epoch": 0.7247529411764706, "grad_norm": 0.3710667426941208, "learning_rate": 2.3498491602203475e-06, "loss": 0.013226321339607239, "step": 77005 }, { "epoch": 0.7248, "grad_norm": 0.34117655395744595, "learning_rate": 2.349772874094573e-06, "loss": 0.012296147644519806, "step": 77010 }, { "epoch": 0.7248470588235294, "grad_norm": 0.5067300860190801, "learning_rate": 2.3496965953980354e-06, "loss": 0.015844208002090455, "step": 77015 }, { "epoch": 0.7248941176470588, "grad_norm": 0.47204943398780447, "learning_rate": 2.3496203241295287e-06, "loss": 0.014616253972053527, "step": 77020 }, { "epoch": 0.7249411764705882, "grad_norm": 0.57674547639951, "learning_rate": 2.349544060287847e-06, "loss": 0.01674174815416336, "step": 77025 }, { "epoch": 0.7249882352941176, "grad_norm": 0.5375225346099276, "learning_rate": 2.3494678038717857e-06, "loss": 0.011636091768741608, "step": 77030 }, { "epoch": 0.7250352941176471, "grad_norm": 0.6222364634633291, "learning_rate": 2.349391554880139e-06, "loss": 0.016277021169662474, "step": 77035 }, { "epoch": 0.7250823529411765, "grad_norm": 0.35529574272112013, "learning_rate": 2.349315313311704e-06, "loss": 0.02096986025571823, "step": 77040 }, { "epoch": 0.7251294117647059, "grad_norm": 0.4188252404891443, "learning_rate": 2.3492390791652742e-06, "loss": 0.01480177789926529, "step": 77045 }, { "epoch": 0.7251764705882353, "grad_norm": 0.7547225181911427, "learning_rate": 2.349162852439647e-06, "loss": 0.019549041986465454, "step": 77050 }, { "epoch": 0.7252235294117647, "grad_norm": 0.46053849379424106, "learning_rate": 2.349086633133618e-06, "loss": 0.016995324194431304, "step": 77055 }, { "epoch": 0.7252705882352941, "grad_norm": 0.5191099847320119, "learning_rate": 2.3490104212459837e-06, "loss": 0.014493000507354737, "step": 77060 }, { "epoch": 0.7253176470588235, "grad_norm": 0.6329092949559425, "learning_rate": 2.3489342167755397e-06, "loss": 0.020295512676239014, "step": 77065 }, { "epoch": 0.7253647058823529, "grad_norm": 0.38868652533613013, "learning_rate": 2.3488580197210846e-06, "loss": 0.012305381149053574, "step": 77070 }, { "epoch": 0.7254117647058823, "grad_norm": 0.29865147420702837, "learning_rate": 2.3487818300814153e-06, "loss": 0.013214272260665894, "step": 77075 }, { "epoch": 0.7254588235294117, "grad_norm": 0.649212742631152, "learning_rate": 2.3487056478553287e-06, "loss": 0.01639137864112854, "step": 77080 }, { "epoch": 0.7255058823529412, "grad_norm": 0.6410162833399622, "learning_rate": 2.3486294730416227e-06, "loss": 0.019052761793136596, "step": 77085 }, { "epoch": 0.7255529411764706, "grad_norm": 0.51692838746875, "learning_rate": 2.348553305639096e-06, "loss": 0.018866574764251708, "step": 77090 }, { "epoch": 0.7256, "grad_norm": 0.5481557486804409, "learning_rate": 2.3484771456465462e-06, "loss": 0.02263057231903076, "step": 77095 }, { "epoch": 0.7256470588235294, "grad_norm": 0.42157455230210444, "learning_rate": 2.3484009930627723e-06, "loss": 0.01401723176240921, "step": 77100 }, { "epoch": 0.7256941176470588, "grad_norm": 0.549141075508047, "learning_rate": 2.348324847886573e-06, "loss": 0.013981571793556214, "step": 77105 }, { "epoch": 0.7257411764705882, "grad_norm": 0.5474201750662391, "learning_rate": 2.3482487101167475e-06, "loss": 0.01861943453550339, "step": 77110 }, { "epoch": 0.7257882352941176, "grad_norm": 0.6276563526653597, "learning_rate": 2.3481725797520954e-06, "loss": 0.014373591542243958, "step": 77115 }, { "epoch": 0.725835294117647, "grad_norm": 0.4691128067720418, "learning_rate": 2.3480964567914154e-06, "loss": 0.015090462565422059, "step": 77120 }, { "epoch": 0.7258823529411764, "grad_norm": 0.4662366683726704, "learning_rate": 2.3480203412335094e-06, "loss": 0.01532292515039444, "step": 77125 }, { "epoch": 0.7259294117647059, "grad_norm": 0.4024355288812254, "learning_rate": 2.3479442330771764e-06, "loss": 0.02024970352649689, "step": 77130 }, { "epoch": 0.7259764705882353, "grad_norm": 0.4826081799049043, "learning_rate": 2.3478681323212165e-06, "loss": 0.013486744463443756, "step": 77135 }, { "epoch": 0.7260235294117647, "grad_norm": 0.6696002154397287, "learning_rate": 2.3477920389644316e-06, "loss": 0.020562615990638734, "step": 77140 }, { "epoch": 0.7260705882352941, "grad_norm": 0.41276437729441984, "learning_rate": 2.3477159530056223e-06, "loss": 0.02014939785003662, "step": 77145 }, { "epoch": 0.7261176470588235, "grad_norm": 0.741285816181578, "learning_rate": 2.3476398744435897e-06, "loss": 0.013778142631053925, "step": 77150 }, { "epoch": 0.7261647058823529, "grad_norm": 0.3378363581009858, "learning_rate": 2.3475638032771355e-06, "loss": 0.013961368799209594, "step": 77155 }, { "epoch": 0.7262117647058823, "grad_norm": 0.26762488552661856, "learning_rate": 2.3474877395050617e-06, "loss": 0.014661289751529694, "step": 77160 }, { "epoch": 0.7262588235294117, "grad_norm": 1.159890426340609, "learning_rate": 2.3474116831261705e-06, "loss": 0.0282900333404541, "step": 77165 }, { "epoch": 0.7263058823529411, "grad_norm": 0.5853927268620621, "learning_rate": 2.347335634139264e-06, "loss": 0.012049201130867004, "step": 77170 }, { "epoch": 0.7263529411764705, "grad_norm": 0.4672601918862309, "learning_rate": 2.347259592543145e-06, "loss": 0.016816452145576477, "step": 77175 }, { "epoch": 0.7264, "grad_norm": 0.3562458606003168, "learning_rate": 2.347183558336616e-06, "loss": 0.01797177791595459, "step": 77180 }, { "epoch": 0.7264470588235294, "grad_norm": 0.4495995877192458, "learning_rate": 2.3471075315184814e-06, "loss": 0.013198149204254151, "step": 77185 }, { "epoch": 0.7264941176470588, "grad_norm": 0.9123822297906033, "learning_rate": 2.3470315120875443e-06, "loss": 0.017324432730674744, "step": 77190 }, { "epoch": 0.7265411764705882, "grad_norm": 0.5135416367264978, "learning_rate": 2.346955500042608e-06, "loss": 0.012987153232097625, "step": 77195 }, { "epoch": 0.7265882352941176, "grad_norm": 0.47230928062281063, "learning_rate": 2.3468794953824762e-06, "loss": 0.01564463973045349, "step": 77200 }, { "epoch": 0.726635294117647, "grad_norm": 0.538469591023594, "learning_rate": 2.346803498105955e-06, "loss": 0.014535406231880188, "step": 77205 }, { "epoch": 0.7266823529411764, "grad_norm": 0.3731079826298663, "learning_rate": 2.3467275082118473e-06, "loss": 0.01493898183107376, "step": 77210 }, { "epoch": 0.7267294117647058, "grad_norm": 0.5409980152400096, "learning_rate": 2.346651525698958e-06, "loss": 0.013329418003559112, "step": 77215 }, { "epoch": 0.7267764705882352, "grad_norm": 0.5969461632889028, "learning_rate": 2.3465755505660934e-06, "loss": 0.01924113780260086, "step": 77220 }, { "epoch": 0.7268235294117648, "grad_norm": 0.3831912989401543, "learning_rate": 2.3464995828120575e-06, "loss": 0.016784437000751495, "step": 77225 }, { "epoch": 0.7268705882352942, "grad_norm": 0.36349491483858176, "learning_rate": 2.3464236224356574e-06, "loss": 0.015664611756801606, "step": 77230 }, { "epoch": 0.7269176470588236, "grad_norm": 0.4651780888780406, "learning_rate": 2.3463476694356976e-06, "loss": 0.014694461226463318, "step": 77235 }, { "epoch": 0.726964705882353, "grad_norm": 0.4512853579767511, "learning_rate": 2.346271723810985e-06, "loss": 0.012424292415380478, "step": 77240 }, { "epoch": 0.7270117647058824, "grad_norm": 0.5040022536903148, "learning_rate": 2.3461957855603266e-06, "loss": 0.017694497108459474, "step": 77245 }, { "epoch": 0.7270588235294118, "grad_norm": 0.480976525837601, "learning_rate": 2.3461198546825282e-06, "loss": 0.016553223133087158, "step": 77250 }, { "epoch": 0.7271058823529412, "grad_norm": 0.4685760540359013, "learning_rate": 2.346043931176397e-06, "loss": 0.013138847053050995, "step": 77255 }, { "epoch": 0.7271529411764706, "grad_norm": 0.2880067156426791, "learning_rate": 2.3459680150407416e-06, "loss": 0.015358330309391021, "step": 77260 }, { "epoch": 0.7272, "grad_norm": 0.32652778603700744, "learning_rate": 2.345892106274368e-06, "loss": 0.014478009939193726, "step": 77265 }, { "epoch": 0.7272470588235294, "grad_norm": 0.48717681506555116, "learning_rate": 2.3458162048760844e-06, "loss": 0.01559370756149292, "step": 77270 }, { "epoch": 0.7272941176470589, "grad_norm": 0.3506119483380539, "learning_rate": 2.345740310844699e-06, "loss": 0.014684391021728516, "step": 77275 }, { "epoch": 0.7273411764705883, "grad_norm": 0.5562094032233219, "learning_rate": 2.3456644241790204e-06, "loss": 0.013793188333511352, "step": 77280 }, { "epoch": 0.7273882352941177, "grad_norm": 0.6956113991495697, "learning_rate": 2.345588544877857e-06, "loss": 0.0209499254822731, "step": 77285 }, { "epoch": 0.7274352941176471, "grad_norm": 0.4051939028122497, "learning_rate": 2.3455126729400177e-06, "loss": 0.01792566478252411, "step": 77290 }, { "epoch": 0.7274823529411765, "grad_norm": 0.3931827904923909, "learning_rate": 2.3454368083643124e-06, "loss": 0.012933914363384248, "step": 77295 }, { "epoch": 0.7275294117647059, "grad_norm": 0.4517670707003939, "learning_rate": 2.345360951149549e-06, "loss": 0.015442150831222533, "step": 77300 }, { "epoch": 0.7275764705882353, "grad_norm": 0.45718007308698694, "learning_rate": 2.345285101294539e-06, "loss": 0.011574803292751313, "step": 77305 }, { "epoch": 0.7276235294117647, "grad_norm": 0.44493649159459914, "learning_rate": 2.345209258798091e-06, "loss": 0.016982756555080414, "step": 77310 }, { "epoch": 0.7276705882352941, "grad_norm": 0.6595898809392137, "learning_rate": 2.3451334236590158e-06, "loss": 0.018786048889160155, "step": 77315 }, { "epoch": 0.7277176470588236, "grad_norm": 0.41434792698717954, "learning_rate": 2.345057595876124e-06, "loss": 0.0153952956199646, "step": 77320 }, { "epoch": 0.727764705882353, "grad_norm": 0.322489934243985, "learning_rate": 2.3449817754482266e-06, "loss": 0.01371767818927765, "step": 77325 }, { "epoch": 0.7278117647058824, "grad_norm": 0.5597356882153243, "learning_rate": 2.344905962374134e-06, "loss": 0.016551074385643006, "step": 77330 }, { "epoch": 0.7278588235294118, "grad_norm": 0.5233542787665624, "learning_rate": 2.344830156652658e-06, "loss": 0.01432456374168396, "step": 77335 }, { "epoch": 0.7279058823529412, "grad_norm": 0.4444944611201506, "learning_rate": 2.3447543582826106e-06, "loss": 0.014144027233123779, "step": 77340 }, { "epoch": 0.7279529411764706, "grad_norm": 0.5216467161701644, "learning_rate": 2.3446785672628026e-06, "loss": 0.013165144622325898, "step": 77345 }, { "epoch": 0.728, "grad_norm": 0.3108022852671523, "learning_rate": 2.344602783592047e-06, "loss": 0.013003383576869965, "step": 77350 }, { "epoch": 0.7280470588235294, "grad_norm": 0.5127798498451009, "learning_rate": 2.3445270072691557e-06, "loss": 0.016947492957115173, "step": 77355 }, { "epoch": 0.7280941176470588, "grad_norm": 0.4643753113773244, "learning_rate": 2.344451238292942e-06, "loss": 0.02215121388435364, "step": 77360 }, { "epoch": 0.7281411764705882, "grad_norm": 0.41884339660592607, "learning_rate": 2.3443754766622183e-06, "loss": 0.0130476713180542, "step": 77365 }, { "epoch": 0.7281882352941177, "grad_norm": 0.37260525882335, "learning_rate": 2.344299722375798e-06, "loss": 0.011400136351585387, "step": 77370 }, { "epoch": 0.7282352941176471, "grad_norm": 0.49535721164456165, "learning_rate": 2.3442239754324948e-06, "loss": 0.012334606051445008, "step": 77375 }, { "epoch": 0.7282823529411765, "grad_norm": 0.363955252322648, "learning_rate": 2.3441482358311226e-06, "loss": 0.013171663880348206, "step": 77380 }, { "epoch": 0.7283294117647059, "grad_norm": 0.3427615766813199, "learning_rate": 2.344072503570494e-06, "loss": 0.016467563807964325, "step": 77385 }, { "epoch": 0.7283764705882353, "grad_norm": 0.5134786722209154, "learning_rate": 2.343996778649425e-06, "loss": 0.012862204015254975, "step": 77390 }, { "epoch": 0.7284235294117647, "grad_norm": 0.6027121543633398, "learning_rate": 2.34392106106673e-06, "loss": 0.015195724368095399, "step": 77395 }, { "epoch": 0.7284705882352941, "grad_norm": 0.3120636737913855, "learning_rate": 2.343845350821223e-06, "loss": 0.0162528857588768, "step": 77400 }, { "epoch": 0.7285176470588235, "grad_norm": 0.34566363965172486, "learning_rate": 2.3437696479117196e-06, "loss": 0.015325793623924255, "step": 77405 }, { "epoch": 0.7285647058823529, "grad_norm": 0.44661676770413095, "learning_rate": 2.343693952337034e-06, "loss": 0.017985329031944275, "step": 77410 }, { "epoch": 0.7286117647058824, "grad_norm": 0.2637539083317159, "learning_rate": 2.343618264095984e-06, "loss": 0.011244527250528335, "step": 77415 }, { "epoch": 0.7286588235294118, "grad_norm": 0.30284754696963695, "learning_rate": 2.343542583187384e-06, "loss": 0.010705526173114776, "step": 77420 }, { "epoch": 0.7287058823529412, "grad_norm": 0.514458098845353, "learning_rate": 2.3434669096100507e-06, "loss": 0.018289029598236084, "step": 77425 }, { "epoch": 0.7287529411764706, "grad_norm": 0.4958916064744084, "learning_rate": 2.3433912433628003e-06, "loss": 0.016529105603694916, "step": 77430 }, { "epoch": 0.7288, "grad_norm": 0.5144071854058947, "learning_rate": 2.3433155844444495e-06, "loss": 0.01312481015920639, "step": 77435 }, { "epoch": 0.7288470588235294, "grad_norm": 0.50791108874651, "learning_rate": 2.343239932853815e-06, "loss": 0.011858738213777541, "step": 77440 }, { "epoch": 0.7288941176470588, "grad_norm": 0.9834630520790941, "learning_rate": 2.3431642885897144e-06, "loss": 0.017308613657951354, "step": 77445 }, { "epoch": 0.7289411764705882, "grad_norm": 0.3890096524259056, "learning_rate": 2.3430886516509652e-06, "loss": 0.013282901048660279, "step": 77450 }, { "epoch": 0.7289882352941176, "grad_norm": 0.6513213823514251, "learning_rate": 2.3430130220363854e-06, "loss": 0.014078152179718018, "step": 77455 }, { "epoch": 0.729035294117647, "grad_norm": 0.4754513070119482, "learning_rate": 2.342937399744793e-06, "loss": 0.015696889162063597, "step": 77460 }, { "epoch": 0.7290823529411765, "grad_norm": 0.6273028249645568, "learning_rate": 2.3428617847750054e-06, "loss": 0.015695303678512573, "step": 77465 }, { "epoch": 0.7291294117647059, "grad_norm": 0.5347127185285957, "learning_rate": 2.342786177125843e-06, "loss": 0.015834513306617736, "step": 77470 }, { "epoch": 0.7291764705882353, "grad_norm": 0.5630049583474154, "learning_rate": 2.3427105767961223e-06, "loss": 0.014643388986587524, "step": 77475 }, { "epoch": 0.7292235294117647, "grad_norm": 0.47965593607770596, "learning_rate": 2.3426349837846636e-06, "loss": 0.013741660118103027, "step": 77480 }, { "epoch": 0.7292705882352941, "grad_norm": 0.24696444984043464, "learning_rate": 2.342559398090287e-06, "loss": 0.01201671063899994, "step": 77485 }, { "epoch": 0.7293176470588235, "grad_norm": 0.22728969013578365, "learning_rate": 2.342483819711811e-06, "loss": 0.014704638719558715, "step": 77490 }, { "epoch": 0.7293647058823529, "grad_norm": 0.5426399669747239, "learning_rate": 2.342408248648056e-06, "loss": 0.019553172588348388, "step": 77495 }, { "epoch": 0.7294117647058823, "grad_norm": 0.6056866015457716, "learning_rate": 2.342332684897842e-06, "loss": 0.014226463437080384, "step": 77500 }, { "epoch": 0.7294588235294117, "grad_norm": 0.48507750348406636, "learning_rate": 2.3422571284599896e-06, "loss": 0.01187623143196106, "step": 77505 }, { "epoch": 0.7295058823529412, "grad_norm": 0.5019629936425297, "learning_rate": 2.3421815793333193e-06, "loss": 0.016768723726272583, "step": 77510 }, { "epoch": 0.7295529411764706, "grad_norm": 0.48102149185206605, "learning_rate": 2.3421060375166526e-06, "loss": 0.0142200767993927, "step": 77515 }, { "epoch": 0.7296, "grad_norm": 0.5119424702028166, "learning_rate": 2.3420305030088096e-06, "loss": 0.011778445541858673, "step": 77520 }, { "epoch": 0.7296470588235294, "grad_norm": 0.5103381656950422, "learning_rate": 2.3419549758086133e-06, "loss": 0.011033616960048676, "step": 77525 }, { "epoch": 0.7296941176470588, "grad_norm": 0.7407799463750776, "learning_rate": 2.3418794559148846e-06, "loss": 0.014796757698059082, "step": 77530 }, { "epoch": 0.7297411764705882, "grad_norm": 0.5418383479350347, "learning_rate": 2.3418039433264446e-06, "loss": 0.012257419526576996, "step": 77535 }, { "epoch": 0.7297882352941176, "grad_norm": 0.48455506913002694, "learning_rate": 2.3417284380421174e-06, "loss": 0.011980628967285157, "step": 77540 }, { "epoch": 0.729835294117647, "grad_norm": 0.44975111266378387, "learning_rate": 2.341652940060725e-06, "loss": 0.011727380752563476, "step": 77545 }, { "epoch": 0.7298823529411764, "grad_norm": 0.48030863311540095, "learning_rate": 2.34157744938109e-06, "loss": 0.016808710992336273, "step": 77550 }, { "epoch": 0.7299294117647059, "grad_norm": 0.4997499031630717, "learning_rate": 2.341501966002035e-06, "loss": 0.01754913628101349, "step": 77555 }, { "epoch": 0.7299764705882353, "grad_norm": 0.6854028390731401, "learning_rate": 2.341426489922385e-06, "loss": 0.019621174037456512, "step": 77560 }, { "epoch": 0.7300235294117647, "grad_norm": 0.37839162307968793, "learning_rate": 2.3413510211409614e-06, "loss": 0.014257077872753144, "step": 77565 }, { "epoch": 0.7300705882352941, "grad_norm": 0.645452602099807, "learning_rate": 2.34127555965659e-06, "loss": 0.020838946104049683, "step": 77570 }, { "epoch": 0.7301176470588235, "grad_norm": 0.4166059517216403, "learning_rate": 2.3412001054680937e-06, "loss": 0.011666120588779449, "step": 77575 }, { "epoch": 0.7301647058823529, "grad_norm": 0.5634887827894137, "learning_rate": 2.341124658574298e-06, "loss": 0.017000605165958405, "step": 77580 }, { "epoch": 0.7302117647058823, "grad_norm": 0.46992530741092825, "learning_rate": 2.3410492189740266e-06, "loss": 0.015614800155162811, "step": 77585 }, { "epoch": 0.7302588235294117, "grad_norm": 0.5476008141144605, "learning_rate": 2.340973786666105e-06, "loss": 0.01575720012187958, "step": 77590 }, { "epoch": 0.7303058823529411, "grad_norm": 0.5798925193885175, "learning_rate": 2.340898361649358e-06, "loss": 0.01295185536146164, "step": 77595 }, { "epoch": 0.7303529411764705, "grad_norm": 0.37560328727080317, "learning_rate": 2.3408229439226116e-06, "loss": 0.012697204947471619, "step": 77600 }, { "epoch": 0.7304, "grad_norm": 0.5844365185140983, "learning_rate": 2.340747533484691e-06, "loss": 0.01561306118965149, "step": 77605 }, { "epoch": 0.7304470588235294, "grad_norm": 0.6259935389623841, "learning_rate": 2.340672130334423e-06, "loss": 0.015482677519321442, "step": 77610 }, { "epoch": 0.7304941176470588, "grad_norm": 0.46276676806052297, "learning_rate": 2.340596734470633e-06, "loss": 0.01687193810939789, "step": 77615 }, { "epoch": 0.7305411764705882, "grad_norm": 0.6742427084888759, "learning_rate": 2.340521345892148e-06, "loss": 0.02200918793678284, "step": 77620 }, { "epoch": 0.7305882352941176, "grad_norm": 0.43647636927198913, "learning_rate": 2.3404459645977947e-06, "loss": 0.015929263830184937, "step": 77625 }, { "epoch": 0.730635294117647, "grad_norm": 0.5691792320272661, "learning_rate": 2.340370590586401e-06, "loss": 0.014938941597938538, "step": 77630 }, { "epoch": 0.7306823529411764, "grad_norm": 0.39689448064205934, "learning_rate": 2.340295223856793e-06, "loss": 0.0187034010887146, "step": 77635 }, { "epoch": 0.7307294117647058, "grad_norm": 0.6027203124394476, "learning_rate": 2.340219864407798e-06, "loss": 0.015640616416931152, "step": 77640 }, { "epoch": 0.7307764705882352, "grad_norm": 0.5007084042605673, "learning_rate": 2.3401445122382457e-06, "loss": 0.015341562032699586, "step": 77645 }, { "epoch": 0.7308235294117648, "grad_norm": 0.716426793406995, "learning_rate": 2.3400691673469624e-06, "loss": 0.01291336715221405, "step": 77650 }, { "epoch": 0.7308705882352942, "grad_norm": 0.5603108787388055, "learning_rate": 2.339993829732778e-06, "loss": 0.020312196016311644, "step": 77655 }, { "epoch": 0.7309176470588236, "grad_norm": 0.5485684029407758, "learning_rate": 2.33991849939452e-06, "loss": 0.01554928719997406, "step": 77660 }, { "epoch": 0.730964705882353, "grad_norm": 0.4637368286351499, "learning_rate": 2.339843176331018e-06, "loss": 0.01581116020679474, "step": 77665 }, { "epoch": 0.7310117647058824, "grad_norm": 0.5769546846504419, "learning_rate": 2.3397678605411006e-06, "loss": 0.016745808720588683, "step": 77670 }, { "epoch": 0.7310588235294118, "grad_norm": 0.4265702692970863, "learning_rate": 2.339692552023598e-06, "loss": 0.012764446437358856, "step": 77675 }, { "epoch": 0.7311058823529412, "grad_norm": 0.4544674237205374, "learning_rate": 2.339617250777339e-06, "loss": 0.011977843940258026, "step": 77680 }, { "epoch": 0.7311529411764706, "grad_norm": 0.3605721644924485, "learning_rate": 2.3395419568011546e-06, "loss": 0.0126284658908844, "step": 77685 }, { "epoch": 0.7312, "grad_norm": 0.512219894835231, "learning_rate": 2.3394666700938743e-06, "loss": 0.013572284579277038, "step": 77690 }, { "epoch": 0.7312470588235294, "grad_norm": 0.4304426510368014, "learning_rate": 2.339391390654329e-06, "loss": 0.015086846053600311, "step": 77695 }, { "epoch": 0.7312941176470589, "grad_norm": 0.3930030933642469, "learning_rate": 2.3393161184813485e-06, "loss": 0.013575592637062072, "step": 77700 }, { "epoch": 0.7313411764705883, "grad_norm": 0.4244546324120435, "learning_rate": 2.3392408535737656e-06, "loss": 0.016578125953674316, "step": 77705 }, { "epoch": 0.7313882352941177, "grad_norm": 0.4881530869293098, "learning_rate": 2.33916559593041e-06, "loss": 0.019686231017112733, "step": 77710 }, { "epoch": 0.7314352941176471, "grad_norm": 0.5039795082600027, "learning_rate": 2.3390903455501142e-06, "loss": 0.013790461421012878, "step": 77715 }, { "epoch": 0.7314823529411765, "grad_norm": 0.5064608007637277, "learning_rate": 2.3390151024317097e-06, "loss": 0.020434366166591646, "step": 77720 }, { "epoch": 0.7315294117647059, "grad_norm": 0.6539611325099298, "learning_rate": 2.3389398665740285e-06, "loss": 0.014182190597057342, "step": 77725 }, { "epoch": 0.7315764705882353, "grad_norm": 0.5242111403855871, "learning_rate": 2.3388646379759028e-06, "loss": 0.02142854630947113, "step": 77730 }, { "epoch": 0.7316235294117647, "grad_norm": 0.5926052101087862, "learning_rate": 2.338789416636166e-06, "loss": 0.015283522009849549, "step": 77735 }, { "epoch": 0.7316705882352941, "grad_norm": 0.4628571968787484, "learning_rate": 2.33871420255365e-06, "loss": 0.013781097531318665, "step": 77740 }, { "epoch": 0.7317176470588236, "grad_norm": 0.43256341052700614, "learning_rate": 2.3386389957271884e-06, "loss": 0.018037140369415283, "step": 77745 }, { "epoch": 0.731764705882353, "grad_norm": 0.3518976660359948, "learning_rate": 2.3385637961556142e-06, "loss": 0.0157664954662323, "step": 77750 }, { "epoch": 0.7318117647058824, "grad_norm": 0.5797777472772512, "learning_rate": 2.338488603837762e-06, "loss": 0.01811041384935379, "step": 77755 }, { "epoch": 0.7318588235294118, "grad_norm": 0.388154752285574, "learning_rate": 2.3384134187724654e-06, "loss": 0.010596705973148346, "step": 77760 }, { "epoch": 0.7319058823529412, "grad_norm": 0.5014888446291523, "learning_rate": 2.3383382409585575e-06, "loss": 0.0153425931930542, "step": 77765 }, { "epoch": 0.7319529411764706, "grad_norm": 0.3912702358151593, "learning_rate": 2.338263070394874e-06, "loss": 0.015567712485790253, "step": 77770 }, { "epoch": 0.732, "grad_norm": 0.7355353668386145, "learning_rate": 2.338187907080249e-06, "loss": 0.015834882855415344, "step": 77775 }, { "epoch": 0.7320470588235294, "grad_norm": 0.44124236716924736, "learning_rate": 2.3381127510135177e-06, "loss": 0.01503436714410782, "step": 77780 }, { "epoch": 0.7320941176470588, "grad_norm": 0.8094570123682574, "learning_rate": 2.3380376021935154e-06, "loss": 0.014301297068595887, "step": 77785 }, { "epoch": 0.7321411764705882, "grad_norm": 0.37093652961915585, "learning_rate": 2.337962460619077e-06, "loss": 0.011425453424453735, "step": 77790 }, { "epoch": 0.7321882352941177, "grad_norm": 0.6181133427200026, "learning_rate": 2.337887326289039e-06, "loss": 0.014113977551460266, "step": 77795 }, { "epoch": 0.7322352941176471, "grad_norm": 0.6836800731095998, "learning_rate": 2.3378121992022378e-06, "loss": 0.013499116897583008, "step": 77800 }, { "epoch": 0.7322823529411765, "grad_norm": 0.5359379565861819, "learning_rate": 2.337737079357508e-06, "loss": 0.018695907294750215, "step": 77805 }, { "epoch": 0.7323294117647059, "grad_norm": 0.8572895671543379, "learning_rate": 2.3376619667536878e-06, "loss": 0.0169820636510849, "step": 77810 }, { "epoch": 0.7323764705882353, "grad_norm": 0.2855200737882482, "learning_rate": 2.3375868613896135e-06, "loss": 0.012589406967163087, "step": 77815 }, { "epoch": 0.7324235294117647, "grad_norm": 0.35342857231989333, "learning_rate": 2.3375117632641215e-06, "loss": 0.01454613208770752, "step": 77820 }, { "epoch": 0.7324705882352941, "grad_norm": 0.6971549746151945, "learning_rate": 2.33743667237605e-06, "loss": 0.018278223276138306, "step": 77825 }, { "epoch": 0.7325176470588235, "grad_norm": 0.32824021099887946, "learning_rate": 2.3373615887242363e-06, "loss": 0.013195118308067322, "step": 77830 }, { "epoch": 0.7325647058823529, "grad_norm": 0.31054003478627035, "learning_rate": 2.3372865123075185e-06, "loss": 0.017255493998527528, "step": 77835 }, { "epoch": 0.7326117647058824, "grad_norm": 0.356152829795878, "learning_rate": 2.3372114431247336e-06, "loss": 0.016418318450450897, "step": 77840 }, { "epoch": 0.7326588235294118, "grad_norm": 0.48824009376573624, "learning_rate": 2.3371363811747215e-06, "loss": 0.016674834489822387, "step": 77845 }, { "epoch": 0.7327058823529412, "grad_norm": 0.299406298906567, "learning_rate": 2.33706132645632e-06, "loss": 0.01804729402065277, "step": 77850 }, { "epoch": 0.7327529411764706, "grad_norm": 0.6199669614095847, "learning_rate": 2.3369862789683677e-06, "loss": 0.013833288848400117, "step": 77855 }, { "epoch": 0.7328, "grad_norm": 0.5049608641244839, "learning_rate": 2.336911238709705e-06, "loss": 0.014261673390865325, "step": 77860 }, { "epoch": 0.7328470588235294, "grad_norm": 0.39788470066106363, "learning_rate": 2.3368362056791702e-06, "loss": 0.016921365261077882, "step": 77865 }, { "epoch": 0.7328941176470588, "grad_norm": 0.5339322858258168, "learning_rate": 2.336761179875603e-06, "loss": 0.012778981029987336, "step": 77870 }, { "epoch": 0.7329411764705882, "grad_norm": 0.4774659950218619, "learning_rate": 2.3366861612978436e-06, "loss": 0.017702314257621764, "step": 77875 }, { "epoch": 0.7329882352941176, "grad_norm": 0.6026795353742929, "learning_rate": 2.3366111499447326e-06, "loss": 0.011864522099494934, "step": 77880 }, { "epoch": 0.733035294117647, "grad_norm": 0.5966812350482729, "learning_rate": 2.33653614581511e-06, "loss": 0.016588011384010316, "step": 77885 }, { "epoch": 0.7330823529411765, "grad_norm": 0.39503244231424295, "learning_rate": 2.336461148907817e-06, "loss": 0.012655733525753022, "step": 77890 }, { "epoch": 0.7331294117647059, "grad_norm": 0.40806173215655406, "learning_rate": 2.336386159221693e-06, "loss": 0.015079861879348755, "step": 77895 }, { "epoch": 0.7331764705882353, "grad_norm": 0.3951707690862054, "learning_rate": 2.3363111767555816e-06, "loss": 0.013533678650856019, "step": 77900 }, { "epoch": 0.7332235294117647, "grad_norm": 0.6106099543623921, "learning_rate": 2.3362362015083228e-06, "loss": 0.014902350306510926, "step": 77905 }, { "epoch": 0.7332705882352941, "grad_norm": 0.4676951692098798, "learning_rate": 2.3361612334787587e-06, "loss": 0.013101342320442199, "step": 77910 }, { "epoch": 0.7333176470588235, "grad_norm": 0.41766787615898554, "learning_rate": 2.336086272665731e-06, "loss": 0.01573294699192047, "step": 77915 }, { "epoch": 0.7333647058823529, "grad_norm": 0.534419164295001, "learning_rate": 2.336011319068083e-06, "loss": 0.015459704399108886, "step": 77920 }, { "epoch": 0.7334117647058823, "grad_norm": 0.5600183851289694, "learning_rate": 2.335936372684656e-06, "loss": 0.013301819562911987, "step": 77925 }, { "epoch": 0.7334588235294117, "grad_norm": 0.5251134823125542, "learning_rate": 2.3358614335142935e-06, "loss": 0.014961040019989014, "step": 77930 }, { "epoch": 0.7335058823529412, "grad_norm": 0.4543743127752028, "learning_rate": 2.335786501555839e-06, "loss": 0.0126413494348526, "step": 77935 }, { "epoch": 0.7335529411764706, "grad_norm": 0.32744652133909696, "learning_rate": 2.3357115768081342e-06, "loss": 0.012807491421699523, "step": 77940 }, { "epoch": 0.7336, "grad_norm": 0.4511188015296387, "learning_rate": 2.3356366592700246e-06, "loss": 0.013708655536174775, "step": 77945 }, { "epoch": 0.7336470588235294, "grad_norm": 0.6365963528467087, "learning_rate": 2.335561748940352e-06, "loss": 0.020240111649036406, "step": 77950 }, { "epoch": 0.7336941176470588, "grad_norm": 0.5434690433464848, "learning_rate": 2.335486845817963e-06, "loss": 0.014922630786895753, "step": 77955 }, { "epoch": 0.7337411764705882, "grad_norm": 0.5234198826781451, "learning_rate": 2.3354119499017e-06, "loss": 0.012718075513839721, "step": 77960 }, { "epoch": 0.7337882352941176, "grad_norm": 0.5133332245379694, "learning_rate": 2.3353370611904088e-06, "loss": 0.015142515301704407, "step": 77965 }, { "epoch": 0.733835294117647, "grad_norm": 0.5377039973553466, "learning_rate": 2.335262179682933e-06, "loss": 0.013524828851222992, "step": 77970 }, { "epoch": 0.7338823529411764, "grad_norm": 0.4333091918607108, "learning_rate": 2.3351873053781186e-06, "loss": 0.014146468043327332, "step": 77975 }, { "epoch": 0.7339294117647058, "grad_norm": 0.6382058994690171, "learning_rate": 2.335112438274811e-06, "loss": 0.015286800265312196, "step": 77980 }, { "epoch": 0.7339764705882353, "grad_norm": 0.4116936475725636, "learning_rate": 2.3350375783718558e-06, "loss": 0.012809151411056518, "step": 77985 }, { "epoch": 0.7340235294117647, "grad_norm": 0.498233873616593, "learning_rate": 2.3349627256680984e-06, "loss": 0.013717515766620636, "step": 77990 }, { "epoch": 0.7340705882352941, "grad_norm": 0.3830777375288388, "learning_rate": 2.3348878801623863e-06, "loss": 0.012520404160022735, "step": 77995 }, { "epoch": 0.7341176470588235, "grad_norm": 0.3822901867256154, "learning_rate": 2.3348130418535643e-06, "loss": 0.013176903128623962, "step": 78000 }, { "epoch": 0.7341647058823529, "grad_norm": 0.4969759884049973, "learning_rate": 2.33473821074048e-06, "loss": 0.016712597012519835, "step": 78005 }, { "epoch": 0.7342117647058823, "grad_norm": 0.4918600652785038, "learning_rate": 2.33466338682198e-06, "loss": 0.014590099453926086, "step": 78010 }, { "epoch": 0.7342588235294117, "grad_norm": 0.6822821723202099, "learning_rate": 2.334588570096912e-06, "loss": 0.018799062073230743, "step": 78015 }, { "epoch": 0.7343058823529411, "grad_norm": 0.4191989722824537, "learning_rate": 2.334513760564123e-06, "loss": 0.01869082748889923, "step": 78020 }, { "epoch": 0.7343529411764705, "grad_norm": 0.45248434657702985, "learning_rate": 2.334438958222461e-06, "loss": 0.011751382052898407, "step": 78025 }, { "epoch": 0.7344, "grad_norm": 0.43629575160336437, "learning_rate": 2.334364163070774e-06, "loss": 0.012424204498529434, "step": 78030 }, { "epoch": 0.7344470588235295, "grad_norm": 0.5386149172614241, "learning_rate": 2.3342893751079102e-06, "loss": 0.014039438962936402, "step": 78035 }, { "epoch": 0.7344941176470589, "grad_norm": 0.3823746498485578, "learning_rate": 2.334214594332718e-06, "loss": 0.015287066996097564, "step": 78040 }, { "epoch": 0.7345411764705883, "grad_norm": 0.4368120959079871, "learning_rate": 2.3341398207440454e-06, "loss": 0.014793324470520019, "step": 78045 }, { "epoch": 0.7345882352941177, "grad_norm": 0.4334486270079448, "learning_rate": 2.334065054340743e-06, "loss": 0.013185672461986542, "step": 78050 }, { "epoch": 0.734635294117647, "grad_norm": 0.31743790742590017, "learning_rate": 2.3339902951216594e-06, "loss": 0.01364162117242813, "step": 78055 }, { "epoch": 0.7346823529411765, "grad_norm": 0.5887829183961731, "learning_rate": 2.3339155430856435e-06, "loss": 0.014728590846061707, "step": 78060 }, { "epoch": 0.7347294117647059, "grad_norm": 0.371927462488749, "learning_rate": 2.333840798231546e-06, "loss": 0.012514987587928772, "step": 78065 }, { "epoch": 0.7347764705882353, "grad_norm": 0.6435296575296667, "learning_rate": 2.3337660605582164e-06, "loss": 0.016023001074790953, "step": 78070 }, { "epoch": 0.7348235294117647, "grad_norm": 0.7955239730165705, "learning_rate": 2.3336913300645053e-06, "loss": 0.015377187728881836, "step": 78075 }, { "epoch": 0.7348705882352942, "grad_norm": 0.4797236034332242, "learning_rate": 2.333616606749263e-06, "loss": 0.015428641438484192, "step": 78080 }, { "epoch": 0.7349176470588236, "grad_norm": 0.5037339303336577, "learning_rate": 2.3335418906113404e-06, "loss": 0.017598426342010497, "step": 78085 }, { "epoch": 0.734964705882353, "grad_norm": 0.7252567570060103, "learning_rate": 2.3334671816495888e-06, "loss": 0.012574003636837005, "step": 78090 }, { "epoch": 0.7350117647058824, "grad_norm": 0.5863277736172126, "learning_rate": 2.3333924798628597e-06, "loss": 0.015583503246307372, "step": 78095 }, { "epoch": 0.7350588235294118, "grad_norm": 0.47852559451290066, "learning_rate": 2.3333177852500037e-06, "loss": 0.012855374813079834, "step": 78100 }, { "epoch": 0.7351058823529412, "grad_norm": 0.5181774450840584, "learning_rate": 2.333243097809874e-06, "loss": 0.015353795886039735, "step": 78105 }, { "epoch": 0.7351529411764706, "grad_norm": 0.45586181221479755, "learning_rate": 2.3331684175413214e-06, "loss": 0.013540583848953246, "step": 78110 }, { "epoch": 0.7352, "grad_norm": 0.4859712206198666, "learning_rate": 2.333093744443199e-06, "loss": 0.015843936800956727, "step": 78115 }, { "epoch": 0.7352470588235294, "grad_norm": 0.46626454970463677, "learning_rate": 2.33301907851436e-06, "loss": 0.014242500066757202, "step": 78120 }, { "epoch": 0.7352941176470589, "grad_norm": 0.4570900466969257, "learning_rate": 2.3329444197536556e-06, "loss": 0.014056158065795899, "step": 78125 }, { "epoch": 0.7353411764705883, "grad_norm": 0.532357722787939, "learning_rate": 2.3328697681599403e-06, "loss": 0.015142963826656341, "step": 78130 }, { "epoch": 0.7353882352941177, "grad_norm": 0.48207856584526176, "learning_rate": 2.3327951237320674e-06, "loss": 0.009658978879451751, "step": 78135 }, { "epoch": 0.7354352941176471, "grad_norm": 0.6139273285574508, "learning_rate": 2.33272048646889e-06, "loss": 0.015210580825805665, "step": 78140 }, { "epoch": 0.7354823529411765, "grad_norm": 0.4712962988197684, "learning_rate": 2.332645856369263e-06, "loss": 0.016126632690429688, "step": 78145 }, { "epoch": 0.7355294117647059, "grad_norm": 0.5823474253806427, "learning_rate": 2.332571233432039e-06, "loss": 0.013753305375576019, "step": 78150 }, { "epoch": 0.7355764705882353, "grad_norm": 0.5133595379855613, "learning_rate": 2.3324966176560745e-06, "loss": 0.017546211183071137, "step": 78155 }, { "epoch": 0.7356235294117647, "grad_norm": 0.7194984328460778, "learning_rate": 2.332422009040222e-06, "loss": 0.01751425415277481, "step": 78160 }, { "epoch": 0.7356705882352941, "grad_norm": 0.5135886261233166, "learning_rate": 2.3323474075833374e-06, "loss": 0.010894940793514251, "step": 78165 }, { "epoch": 0.7357176470588235, "grad_norm": 0.7124170361219557, "learning_rate": 2.3322728132842757e-06, "loss": 0.015958718955516815, "step": 78170 }, { "epoch": 0.735764705882353, "grad_norm": 0.6931962753800666, "learning_rate": 2.332198226141893e-06, "loss": 0.013660527765750885, "step": 78175 }, { "epoch": 0.7358117647058824, "grad_norm": 0.599662148529595, "learning_rate": 2.3321236461550452e-06, "loss": 0.01687961518764496, "step": 78180 }, { "epoch": 0.7358588235294118, "grad_norm": 0.2859459723256015, "learning_rate": 2.3320490733225865e-06, "loss": 0.017374348640441895, "step": 78185 }, { "epoch": 0.7359058823529412, "grad_norm": 0.5894837891859374, "learning_rate": 2.3319745076433747e-06, "loss": 0.018416450917720796, "step": 78190 }, { "epoch": 0.7359529411764706, "grad_norm": 0.44241065002346835, "learning_rate": 2.3318999491162656e-06, "loss": 0.015128158032894135, "step": 78195 }, { "epoch": 0.736, "grad_norm": 0.5055193519386907, "learning_rate": 2.3318253977401166e-06, "loss": 0.015223416686058044, "step": 78200 }, { "epoch": 0.7360470588235294, "grad_norm": 0.47414943229992584, "learning_rate": 2.3317508535137833e-06, "loss": 0.018794375658035278, "step": 78205 }, { "epoch": 0.7360941176470588, "grad_norm": 0.4988072068228582, "learning_rate": 2.3316763164361247e-06, "loss": 0.02003961205482483, "step": 78210 }, { "epoch": 0.7361411764705882, "grad_norm": 0.49692652184784614, "learning_rate": 2.331601786505997e-06, "loss": 0.02183598130941391, "step": 78215 }, { "epoch": 0.7361882352941177, "grad_norm": 0.4358087309189572, "learning_rate": 2.331527263722259e-06, "loss": 0.01778382956981659, "step": 78220 }, { "epoch": 0.7362352941176471, "grad_norm": 0.6667978744822823, "learning_rate": 2.331452748083768e-06, "loss": 0.01611219197511673, "step": 78225 }, { "epoch": 0.7362823529411765, "grad_norm": 0.43216895564115254, "learning_rate": 2.331378239589382e-06, "loss": 0.013478416204452514, "step": 78230 }, { "epoch": 0.7363294117647059, "grad_norm": 0.4931180055425042, "learning_rate": 2.33130373823796e-06, "loss": 0.01565313935279846, "step": 78235 }, { "epoch": 0.7363764705882353, "grad_norm": 0.930987283844092, "learning_rate": 2.3312292440283604e-06, "loss": 0.018548038601875306, "step": 78240 }, { "epoch": 0.7364235294117647, "grad_norm": 0.47814598559366356, "learning_rate": 2.3311547569594427e-06, "loss": 0.01557668149471283, "step": 78245 }, { "epoch": 0.7364705882352941, "grad_norm": 0.4993682024905944, "learning_rate": 2.331080277030066e-06, "loss": 0.0151222825050354, "step": 78250 }, { "epoch": 0.7365176470588235, "grad_norm": 0.6532834408340288, "learning_rate": 2.3310058042390895e-06, "loss": 0.015521392226219177, "step": 78255 }, { "epoch": 0.7365647058823529, "grad_norm": 0.32236217884936963, "learning_rate": 2.330931338585374e-06, "loss": 0.012862552702426911, "step": 78260 }, { "epoch": 0.7366117647058823, "grad_norm": 0.31374578377963314, "learning_rate": 2.330856880067778e-06, "loss": 0.01141616553068161, "step": 78265 }, { "epoch": 0.7366588235294118, "grad_norm": 0.474651159700744, "learning_rate": 2.3307824286851632e-06, "loss": 0.019003164768218995, "step": 78270 }, { "epoch": 0.7367058823529412, "grad_norm": 0.4310800541640041, "learning_rate": 2.33070798443639e-06, "loss": 0.02029494345188141, "step": 78275 }, { "epoch": 0.7367529411764706, "grad_norm": 0.4000941414625137, "learning_rate": 2.3306335473203182e-06, "loss": 0.015046633780002594, "step": 78280 }, { "epoch": 0.7368, "grad_norm": 0.3911054431806481, "learning_rate": 2.3305591173358096e-06, "loss": 0.014119309186935425, "step": 78285 }, { "epoch": 0.7368470588235294, "grad_norm": 0.33063438107609155, "learning_rate": 2.330484694481725e-06, "loss": 0.01510201096534729, "step": 78290 }, { "epoch": 0.7368941176470588, "grad_norm": 0.3469518601904363, "learning_rate": 2.3304102787569272e-06, "loss": 0.013767817616462707, "step": 78295 }, { "epoch": 0.7369411764705882, "grad_norm": 0.45200492870448994, "learning_rate": 2.3303358701602767e-06, "loss": 0.016311730444431304, "step": 78300 }, { "epoch": 0.7369882352941176, "grad_norm": 1.2262105530826144, "learning_rate": 2.3302614686906367e-06, "loss": 0.019704280793666838, "step": 78305 }, { "epoch": 0.737035294117647, "grad_norm": 0.4163993877918725, "learning_rate": 2.330187074346868e-06, "loss": 0.01594688594341278, "step": 78310 }, { "epoch": 0.7370823529411765, "grad_norm": 0.4010330765452626, "learning_rate": 2.330112687127835e-06, "loss": 0.012623080611228943, "step": 78315 }, { "epoch": 0.7371294117647059, "grad_norm": 0.69295730444822, "learning_rate": 2.3300383070323997e-06, "loss": 0.015965673327445983, "step": 78320 }, { "epoch": 0.7371764705882353, "grad_norm": 0.6012129867349284, "learning_rate": 2.329963934059425e-06, "loss": 0.016778558492660522, "step": 78325 }, { "epoch": 0.7372235294117647, "grad_norm": 0.5344650721963086, "learning_rate": 2.329889568207774e-06, "loss": 0.012390197813510894, "step": 78330 }, { "epoch": 0.7372705882352941, "grad_norm": 0.42452858855639136, "learning_rate": 2.329815209476311e-06, "loss": 0.013373276591300965, "step": 78335 }, { "epoch": 0.7373176470588235, "grad_norm": 0.3166534277265584, "learning_rate": 2.3297408578638997e-06, "loss": 0.011976350843906403, "step": 78340 }, { "epoch": 0.7373647058823529, "grad_norm": 0.4959994940432896, "learning_rate": 2.329666513369404e-06, "loss": 0.015955615043640136, "step": 78345 }, { "epoch": 0.7374117647058823, "grad_norm": 0.4809083116848579, "learning_rate": 2.3295921759916886e-06, "loss": 0.01581568717956543, "step": 78350 }, { "epoch": 0.7374588235294117, "grad_norm": 0.4928260992992585, "learning_rate": 2.3295178457296182e-06, "loss": 0.015260827541351319, "step": 78355 }, { "epoch": 0.7375058823529411, "grad_norm": 0.4994228634764175, "learning_rate": 2.329443522582057e-06, "loss": 0.020363929867744445, "step": 78360 }, { "epoch": 0.7375529411764706, "grad_norm": 0.4266825154072722, "learning_rate": 2.3293692065478706e-06, "loss": 0.01668577194213867, "step": 78365 }, { "epoch": 0.7376, "grad_norm": 0.4944643857489574, "learning_rate": 2.3292948976259246e-06, "loss": 0.014024126529693603, "step": 78370 }, { "epoch": 0.7376470588235294, "grad_norm": 0.31508778285117073, "learning_rate": 2.3292205958150836e-06, "loss": 0.012886244058609008, "step": 78375 }, { "epoch": 0.7376941176470588, "grad_norm": 0.38739248055629744, "learning_rate": 2.3291463011142156e-06, "loss": 0.013657380640506745, "step": 78380 }, { "epoch": 0.7377411764705882, "grad_norm": 0.8265689553371086, "learning_rate": 2.3290720135221843e-06, "loss": 0.01776411235332489, "step": 78385 }, { "epoch": 0.7377882352941176, "grad_norm": 0.4813419853126242, "learning_rate": 2.3289977330378575e-06, "loss": 0.020261088013648988, "step": 78390 }, { "epoch": 0.737835294117647, "grad_norm": 0.4198241868173542, "learning_rate": 2.328923459660101e-06, "loss": 0.013764898478984832, "step": 78395 }, { "epoch": 0.7378823529411764, "grad_norm": 0.47450385717735605, "learning_rate": 2.328849193387783e-06, "loss": 0.01290489435195923, "step": 78400 }, { "epoch": 0.7379294117647058, "grad_norm": 0.5907891793774883, "learning_rate": 2.32877493421977e-06, "loss": 0.013643114268779755, "step": 78405 }, { "epoch": 0.7379764705882353, "grad_norm": 0.46968730256420804, "learning_rate": 2.328700682154929e-06, "loss": 0.016845694184303282, "step": 78410 }, { "epoch": 0.7380235294117647, "grad_norm": 0.4040649793815799, "learning_rate": 2.3286264371921284e-06, "loss": 0.012867431342601775, "step": 78415 }, { "epoch": 0.7380705882352941, "grad_norm": 0.5647775721131932, "learning_rate": 2.3285521993302352e-06, "loss": 0.017614805698394777, "step": 78420 }, { "epoch": 0.7381176470588235, "grad_norm": 0.638072563369606, "learning_rate": 2.3284779685681185e-06, "loss": 0.016130782663822174, "step": 78425 }, { "epoch": 0.738164705882353, "grad_norm": 0.5357784928320511, "learning_rate": 2.3284037449046456e-06, "loss": 0.012583041191101074, "step": 78430 }, { "epoch": 0.7382117647058823, "grad_norm": 0.6351973235695296, "learning_rate": 2.3283295283386867e-06, "loss": 0.01666531264781952, "step": 78435 }, { "epoch": 0.7382588235294117, "grad_norm": 0.49791076973351495, "learning_rate": 2.3282553188691097e-06, "loss": 0.01571163088083267, "step": 78440 }, { "epoch": 0.7383058823529411, "grad_norm": 0.5115591892904624, "learning_rate": 2.328181116494784e-06, "loss": 0.014806720614433288, "step": 78445 }, { "epoch": 0.7383529411764705, "grad_norm": 0.7538694827205231, "learning_rate": 2.3281069212145787e-06, "loss": 0.01857490688562393, "step": 78450 }, { "epoch": 0.7384, "grad_norm": 0.5323003367482203, "learning_rate": 2.328032733027364e-06, "loss": 0.017462988197803498, "step": 78455 }, { "epoch": 0.7384470588235295, "grad_norm": 0.6118941490038784, "learning_rate": 2.327958551932009e-06, "loss": 0.013278207182884217, "step": 78460 }, { "epoch": 0.7384941176470589, "grad_norm": 0.8077609459720059, "learning_rate": 2.327884377927385e-06, "loss": 0.017314860224723817, "step": 78465 }, { "epoch": 0.7385411764705883, "grad_norm": 0.5291036238843173, "learning_rate": 2.3278102110123623e-06, "loss": 0.01724204868078232, "step": 78470 }, { "epoch": 0.7385882352941177, "grad_norm": 0.512248873845919, "learning_rate": 2.3277360511858113e-06, "loss": 0.016354110836982728, "step": 78475 }, { "epoch": 0.7386352941176471, "grad_norm": 0.5367981078129077, "learning_rate": 2.327661898446602e-06, "loss": 0.01453196108341217, "step": 78480 }, { "epoch": 0.7386823529411765, "grad_norm": 0.40414256617559047, "learning_rate": 2.327587752793607e-06, "loss": 0.012271448224782943, "step": 78485 }, { "epoch": 0.7387294117647059, "grad_norm": 0.4753958979834259, "learning_rate": 2.3275136142256976e-06, "loss": 0.013209150731563568, "step": 78490 }, { "epoch": 0.7387764705882353, "grad_norm": 0.4488607381789577, "learning_rate": 2.327439482741745e-06, "loss": 0.016665373742580415, "step": 78495 }, { "epoch": 0.7388235294117647, "grad_norm": 0.4783803766911821, "learning_rate": 2.32736535834062e-06, "loss": 0.014967048168182373, "step": 78500 }, { "epoch": 0.7388705882352942, "grad_norm": 0.4504306674026581, "learning_rate": 2.3272912410211975e-06, "loss": 0.020144963264465333, "step": 78505 }, { "epoch": 0.7389176470588236, "grad_norm": 0.4002919138557818, "learning_rate": 2.327217130782348e-06, "loss": 0.014112436771392822, "step": 78510 }, { "epoch": 0.738964705882353, "grad_norm": 0.5296800866298202, "learning_rate": 2.327143027622945e-06, "loss": 0.014885008335113525, "step": 78515 }, { "epoch": 0.7390117647058824, "grad_norm": 0.3936350596969521, "learning_rate": 2.327068931541861e-06, "loss": 0.012102475762367249, "step": 78520 }, { "epoch": 0.7390588235294118, "grad_norm": 0.47650527722607944, "learning_rate": 2.3269948425379697e-06, "loss": 0.01292984038591385, "step": 78525 }, { "epoch": 0.7391058823529412, "grad_norm": 0.39392356959505787, "learning_rate": 2.326920760610144e-06, "loss": 0.01124585047364235, "step": 78530 }, { "epoch": 0.7391529411764706, "grad_norm": 0.49475227965352664, "learning_rate": 2.3268466857572574e-06, "loss": 0.013238941133022309, "step": 78535 }, { "epoch": 0.7392, "grad_norm": 0.5353622030374905, "learning_rate": 2.326772617978185e-06, "loss": 0.013320069015026092, "step": 78540 }, { "epoch": 0.7392470588235294, "grad_norm": 0.4412046756708192, "learning_rate": 2.3266985572718e-06, "loss": 0.012699577212333679, "step": 78545 }, { "epoch": 0.7392941176470588, "grad_norm": 0.4940708906646796, "learning_rate": 2.3266245036369776e-06, "loss": 0.014446844160556794, "step": 78550 }, { "epoch": 0.7393411764705883, "grad_norm": 0.7790918925095631, "learning_rate": 2.326550457072591e-06, "loss": 0.01731354892253876, "step": 78555 }, { "epoch": 0.7393882352941177, "grad_norm": 0.46265255207370837, "learning_rate": 2.3264764175775172e-06, "loss": 0.018848252296447755, "step": 78560 }, { "epoch": 0.7394352941176471, "grad_norm": 0.43856063154812025, "learning_rate": 2.3264023851506304e-06, "loss": 0.011106061935424804, "step": 78565 }, { "epoch": 0.7394823529411765, "grad_norm": 0.419327600305701, "learning_rate": 2.3263283597908058e-06, "loss": 0.014953690767288207, "step": 78570 }, { "epoch": 0.7395294117647059, "grad_norm": 0.5549830069297282, "learning_rate": 2.3262543414969194e-06, "loss": 0.018154367804527283, "step": 78575 }, { "epoch": 0.7395764705882353, "grad_norm": 0.42885966540776743, "learning_rate": 2.3261803302678478e-06, "loss": 0.014619368314743041, "step": 78580 }, { "epoch": 0.7396235294117647, "grad_norm": 0.19339962104596656, "learning_rate": 2.326106326102466e-06, "loss": 0.011871953308582307, "step": 78585 }, { "epoch": 0.7396705882352941, "grad_norm": 0.6320546890331504, "learning_rate": 2.3260323289996507e-06, "loss": 0.01426575630903244, "step": 78590 }, { "epoch": 0.7397176470588235, "grad_norm": 0.6164384388442115, "learning_rate": 2.32595833895828e-06, "loss": 0.016408661007881166, "step": 78595 }, { "epoch": 0.739764705882353, "grad_norm": 0.3826773815813162, "learning_rate": 2.3258843559772293e-06, "loss": 0.014315572381019593, "step": 78600 }, { "epoch": 0.7398117647058824, "grad_norm": 0.39757108236092087, "learning_rate": 2.3258103800553767e-06, "loss": 0.012977962195873261, "step": 78605 }, { "epoch": 0.7398588235294118, "grad_norm": 0.6217437089082286, "learning_rate": 2.325736411191599e-06, "loss": 0.01623155325651169, "step": 78610 }, { "epoch": 0.7399058823529412, "grad_norm": 0.5032922835890892, "learning_rate": 2.3256624493847745e-06, "loss": 0.018632560968399048, "step": 78615 }, { "epoch": 0.7399529411764706, "grad_norm": 0.4990270334707307, "learning_rate": 2.3255884946337808e-06, "loss": 0.012959955632686615, "step": 78620 }, { "epoch": 0.74, "grad_norm": 0.40142026544146747, "learning_rate": 2.3255145469374965e-06, "loss": 0.01683828979730606, "step": 78625 }, { "epoch": 0.7400470588235294, "grad_norm": 0.459622797889524, "learning_rate": 2.3254406062947996e-06, "loss": 0.012896755337715149, "step": 78630 }, { "epoch": 0.7400941176470588, "grad_norm": 0.3856972942834521, "learning_rate": 2.325366672704569e-06, "loss": 0.014632230997085572, "step": 78635 }, { "epoch": 0.7401411764705882, "grad_norm": 0.4920838300233036, "learning_rate": 2.3252927461656836e-06, "loss": 0.01696210354566574, "step": 78640 }, { "epoch": 0.7401882352941177, "grad_norm": 0.5134049153366725, "learning_rate": 2.3252188266770225e-06, "loss": 0.014755168557167053, "step": 78645 }, { "epoch": 0.7402352941176471, "grad_norm": 0.43923965439525775, "learning_rate": 2.3251449142374658e-06, "loss": 0.019670170545578004, "step": 78650 }, { "epoch": 0.7402823529411765, "grad_norm": 0.48954263560716543, "learning_rate": 2.3250710088458927e-06, "loss": 0.015651129186153412, "step": 78655 }, { "epoch": 0.7403294117647059, "grad_norm": 0.4184359462476562, "learning_rate": 2.3249971105011827e-06, "loss": 0.012771062552928925, "step": 78660 }, { "epoch": 0.7403764705882353, "grad_norm": 0.4074610277686035, "learning_rate": 2.3249232192022173e-06, "loss": 0.018205294013023378, "step": 78665 }, { "epoch": 0.7404235294117647, "grad_norm": 0.5705662081718098, "learning_rate": 2.3248493349478754e-06, "loss": 0.016215652227401733, "step": 78670 }, { "epoch": 0.7404705882352941, "grad_norm": 0.40856123778973663, "learning_rate": 2.324775457737039e-06, "loss": 0.015344363451004029, "step": 78675 }, { "epoch": 0.7405176470588235, "grad_norm": 0.6399317266149933, "learning_rate": 2.324701587568588e-06, "loss": 0.01907324492931366, "step": 78680 }, { "epoch": 0.7405647058823529, "grad_norm": 0.6272480058989149, "learning_rate": 2.324627724441405e-06, "loss": 0.01283591091632843, "step": 78685 }, { "epoch": 0.7406117647058823, "grad_norm": 0.5139212693284231, "learning_rate": 2.3245538683543707e-06, "loss": 0.015509769320487976, "step": 78690 }, { "epoch": 0.7406588235294118, "grad_norm": 0.44167533132316716, "learning_rate": 2.324480019306366e-06, "loss": 0.014203786849975586, "step": 78695 }, { "epoch": 0.7407058823529412, "grad_norm": 0.4197301587069043, "learning_rate": 2.3244061772962735e-06, "loss": 0.014254334568977355, "step": 78700 }, { "epoch": 0.7407529411764706, "grad_norm": 0.6558780395943006, "learning_rate": 2.324332342322976e-06, "loss": 0.014361447095870972, "step": 78705 }, { "epoch": 0.7408, "grad_norm": 0.5899514321751664, "learning_rate": 2.3242585143853555e-06, "loss": 0.013010045886039734, "step": 78710 }, { "epoch": 0.7408470588235294, "grad_norm": 0.3891848930667004, "learning_rate": 2.324184693482294e-06, "loss": 0.014630872011184692, "step": 78715 }, { "epoch": 0.7408941176470588, "grad_norm": 0.5220856260065747, "learning_rate": 2.3241108796126755e-06, "loss": 0.014507554471492767, "step": 78720 }, { "epoch": 0.7409411764705882, "grad_norm": 0.5689226507062433, "learning_rate": 2.3240370727753823e-06, "loss": 0.014613676071166991, "step": 78725 }, { "epoch": 0.7409882352941176, "grad_norm": 0.43014189100816774, "learning_rate": 2.323963272969299e-06, "loss": 0.013232587277889252, "step": 78730 }, { "epoch": 0.741035294117647, "grad_norm": 0.48096208376428046, "learning_rate": 2.3238894801933084e-06, "loss": 0.014716970920562743, "step": 78735 }, { "epoch": 0.7410823529411765, "grad_norm": 0.3696707819254504, "learning_rate": 2.3238156944462945e-06, "loss": 0.013480176031589509, "step": 78740 }, { "epoch": 0.7411294117647059, "grad_norm": 0.385786072581131, "learning_rate": 2.3237419157271413e-06, "loss": 0.01443837583065033, "step": 78745 }, { "epoch": 0.7411764705882353, "grad_norm": 0.4244708885857568, "learning_rate": 2.323668144034734e-06, "loss": 0.0164434015750885, "step": 78750 }, { "epoch": 0.7412235294117647, "grad_norm": 0.4414045197833076, "learning_rate": 2.3235943793679563e-06, "loss": 0.014697185158729554, "step": 78755 }, { "epoch": 0.7412705882352941, "grad_norm": 0.6181202915386976, "learning_rate": 2.3235206217256942e-06, "loss": 0.016975486278533937, "step": 78760 }, { "epoch": 0.7413176470588235, "grad_norm": 0.6102296674023697, "learning_rate": 2.3234468711068324e-06, "loss": 0.0202425017952919, "step": 78765 }, { "epoch": 0.7413647058823529, "grad_norm": 0.4022457826226596, "learning_rate": 2.3233731275102557e-06, "loss": 0.014401969313621522, "step": 78770 }, { "epoch": 0.7414117647058823, "grad_norm": 0.2736380635684882, "learning_rate": 2.3232993909348507e-06, "loss": 0.012970611453056335, "step": 78775 }, { "epoch": 0.7414588235294117, "grad_norm": 0.42071195265430744, "learning_rate": 2.323225661379503e-06, "loss": 0.013121975958347321, "step": 78780 }, { "epoch": 0.7415058823529411, "grad_norm": 0.40249099693586954, "learning_rate": 2.3231519388430985e-06, "loss": 0.011355133354663849, "step": 78785 }, { "epoch": 0.7415529411764706, "grad_norm": 0.68253104067601, "learning_rate": 2.323078223324524e-06, "loss": 0.018655738234519957, "step": 78790 }, { "epoch": 0.7416, "grad_norm": 0.48758681227525685, "learning_rate": 2.3230045148226665e-06, "loss": 0.016351892054080962, "step": 78795 }, { "epoch": 0.7416470588235294, "grad_norm": 0.4636198775321856, "learning_rate": 2.3229308133364115e-06, "loss": 0.01346093714237213, "step": 78800 }, { "epoch": 0.7416941176470588, "grad_norm": 0.8524833356781465, "learning_rate": 2.3228571188646477e-06, "loss": 0.018361811339855195, "step": 78805 }, { "epoch": 0.7417411764705882, "grad_norm": 0.5166682504259392, "learning_rate": 2.3227834314062613e-06, "loss": 0.01200675144791603, "step": 78810 }, { "epoch": 0.7417882352941176, "grad_norm": 0.4706460526534277, "learning_rate": 2.322709750960141e-06, "loss": 0.017214085161685943, "step": 78815 }, { "epoch": 0.741835294117647, "grad_norm": 0.5537845335521726, "learning_rate": 2.322636077525174e-06, "loss": 0.013668997585773468, "step": 78820 }, { "epoch": 0.7418823529411764, "grad_norm": 0.529697740930627, "learning_rate": 2.3225624111002486e-06, "loss": 0.02167147696018219, "step": 78825 }, { "epoch": 0.7419294117647058, "grad_norm": 0.39159247700254457, "learning_rate": 2.322488751684254e-06, "loss": 0.013485410809516906, "step": 78830 }, { "epoch": 0.7419764705882353, "grad_norm": 0.7074624719333795, "learning_rate": 2.322415099276077e-06, "loss": 0.015908247232437132, "step": 78835 }, { "epoch": 0.7420235294117647, "grad_norm": 0.5142758588679437, "learning_rate": 2.3223414538746086e-06, "loss": 0.014712245762348175, "step": 78840 }, { "epoch": 0.7420705882352941, "grad_norm": 0.5402062948429882, "learning_rate": 2.3222678154787363e-06, "loss": 0.015118679404258728, "step": 78845 }, { "epoch": 0.7421176470588235, "grad_norm": 0.5736914301628667, "learning_rate": 2.3221941840873503e-06, "loss": 0.014428380131721496, "step": 78850 }, { "epoch": 0.742164705882353, "grad_norm": 0.38326491554858866, "learning_rate": 2.32212055969934e-06, "loss": 0.01615900844335556, "step": 78855 }, { "epoch": 0.7422117647058823, "grad_norm": 0.404721214444824, "learning_rate": 2.322046942313595e-06, "loss": 0.01532866209745407, "step": 78860 }, { "epoch": 0.7422588235294117, "grad_norm": 0.4768400352532373, "learning_rate": 2.3219733319290066e-06, "loss": 0.014360615611076355, "step": 78865 }, { "epoch": 0.7423058823529411, "grad_norm": 0.40167079297959846, "learning_rate": 2.3218997285444635e-06, "loss": 0.01673721969127655, "step": 78870 }, { "epoch": 0.7423529411764705, "grad_norm": 0.6337102368760604, "learning_rate": 2.321826132158857e-06, "loss": 0.016013726592063904, "step": 78875 }, { "epoch": 0.7424, "grad_norm": 0.41605956499825264, "learning_rate": 2.3217525427710786e-06, "loss": 0.011660172045230866, "step": 78880 }, { "epoch": 0.7424470588235295, "grad_norm": 0.4794143430388391, "learning_rate": 2.3216789603800193e-06, "loss": 0.015263892710208893, "step": 78885 }, { "epoch": 0.7424941176470589, "grad_norm": 0.37677147249028026, "learning_rate": 2.3216053849845686e-06, "loss": 0.016132117807865144, "step": 78890 }, { "epoch": 0.7425411764705883, "grad_norm": 0.5680930294078105, "learning_rate": 2.3215318165836205e-06, "loss": 0.012298166751861572, "step": 78895 }, { "epoch": 0.7425882352941177, "grad_norm": 0.4388128020945576, "learning_rate": 2.3214582551760655e-06, "loss": 0.013609735667705536, "step": 78900 }, { "epoch": 0.7426352941176471, "grad_norm": 0.9362434390542278, "learning_rate": 2.321384700760796e-06, "loss": 0.014248867332935334, "step": 78905 }, { "epoch": 0.7426823529411765, "grad_norm": 0.5339307381272795, "learning_rate": 2.3213111533367047e-06, "loss": 0.01477116346359253, "step": 78910 }, { "epoch": 0.7427294117647059, "grad_norm": 0.5249065955231772, "learning_rate": 2.321237612902683e-06, "loss": 0.013770312070846558, "step": 78915 }, { "epoch": 0.7427764705882353, "grad_norm": 0.3874089571849341, "learning_rate": 2.321164079457626e-06, "loss": 0.015248031914234161, "step": 78920 }, { "epoch": 0.7428235294117647, "grad_norm": 0.4248931077848886, "learning_rate": 2.321090553000424e-06, "loss": 0.010942941904067994, "step": 78925 }, { "epoch": 0.7428705882352942, "grad_norm": 0.49265418842276587, "learning_rate": 2.3210170335299726e-06, "loss": 0.015360337495803834, "step": 78930 }, { "epoch": 0.7429176470588236, "grad_norm": 0.5212926449310848, "learning_rate": 2.3209435210451637e-06, "loss": 0.015302351117134095, "step": 78935 }, { "epoch": 0.742964705882353, "grad_norm": 0.49527490038369976, "learning_rate": 2.320870015544892e-06, "loss": 0.011895713210105897, "step": 78940 }, { "epoch": 0.7430117647058824, "grad_norm": 0.5690117411472331, "learning_rate": 2.3207965170280514e-06, "loss": 0.015366065502166747, "step": 78945 }, { "epoch": 0.7430588235294118, "grad_norm": 0.5255124557640446, "learning_rate": 2.3207230254935365e-06, "loss": 0.015622316300868988, "step": 78950 }, { "epoch": 0.7431058823529412, "grad_norm": 0.4005152943982927, "learning_rate": 2.320649540940241e-06, "loss": 0.016001087427139283, "step": 78955 }, { "epoch": 0.7431529411764706, "grad_norm": 0.32845232603820756, "learning_rate": 2.32057606336706e-06, "loss": 0.013937124609947204, "step": 78960 }, { "epoch": 0.7432, "grad_norm": 0.43449696334029086, "learning_rate": 2.3205025927728893e-06, "loss": 0.01322571635246277, "step": 78965 }, { "epoch": 0.7432470588235294, "grad_norm": 0.61784622167961, "learning_rate": 2.320429129156623e-06, "loss": 0.014490860700607299, "step": 78970 }, { "epoch": 0.7432941176470588, "grad_norm": 0.5050633048721823, "learning_rate": 2.3203556725171576e-06, "loss": 0.01462433785200119, "step": 78975 }, { "epoch": 0.7433411764705883, "grad_norm": 0.8216378397536422, "learning_rate": 2.320282222853389e-06, "loss": 0.019510406255722045, "step": 78980 }, { "epoch": 0.7433882352941177, "grad_norm": 0.8003413963328432, "learning_rate": 2.3202087801642116e-06, "loss": 0.013667187094688416, "step": 78985 }, { "epoch": 0.7434352941176471, "grad_norm": 0.612000521679539, "learning_rate": 2.3201353444485238e-06, "loss": 0.013858981430530548, "step": 78990 }, { "epoch": 0.7434823529411765, "grad_norm": 0.5979128591076139, "learning_rate": 2.32006191570522e-06, "loss": 0.015129308402538299, "step": 78995 }, { "epoch": 0.7435294117647059, "grad_norm": 0.36754433240385637, "learning_rate": 2.3199884939331985e-06, "loss": 0.014509716629981994, "step": 79000 }, { "epoch": 0.7435764705882353, "grad_norm": 0.4574088997953072, "learning_rate": 2.3199150791313555e-06, "loss": 0.017121842503547667, "step": 79005 }, { "epoch": 0.7436235294117647, "grad_norm": 0.46043614277650646, "learning_rate": 2.3198416712985887e-06, "loss": 0.014730212092399598, "step": 79010 }, { "epoch": 0.7436705882352941, "grad_norm": 0.47501604769630323, "learning_rate": 2.3197682704337957e-06, "loss": 0.01193217858672142, "step": 79015 }, { "epoch": 0.7437176470588235, "grad_norm": 0.2921732732164701, "learning_rate": 2.3196948765358733e-06, "loss": 0.014844250679016114, "step": 79020 }, { "epoch": 0.743764705882353, "grad_norm": 0.5147187226766508, "learning_rate": 2.3196214896037207e-06, "loss": 0.015930160880088806, "step": 79025 }, { "epoch": 0.7438117647058824, "grad_norm": 0.7053923442147539, "learning_rate": 2.3195481096362347e-06, "loss": 0.020089244842529295, "step": 79030 }, { "epoch": 0.7438588235294118, "grad_norm": 0.473552574929013, "learning_rate": 2.3194747366323154e-06, "loss": 0.014308950304985047, "step": 79035 }, { "epoch": 0.7439058823529412, "grad_norm": 0.6152854292867969, "learning_rate": 2.31940137059086e-06, "loss": 0.01478385627269745, "step": 79040 }, { "epoch": 0.7439529411764706, "grad_norm": 0.3973159924038659, "learning_rate": 2.319328011510768e-06, "loss": 0.01241593509912491, "step": 79045 }, { "epoch": 0.744, "grad_norm": 0.39766273548353126, "learning_rate": 2.319254659390939e-06, "loss": 0.014253509044647217, "step": 79050 }, { "epoch": 0.7440470588235294, "grad_norm": 0.41177140898350006, "learning_rate": 2.3191813142302718e-06, "loss": 0.015204563736915588, "step": 79055 }, { "epoch": 0.7440941176470588, "grad_norm": 0.3356093229834804, "learning_rate": 2.3191079760276663e-06, "loss": 0.011787580698728562, "step": 79060 }, { "epoch": 0.7441411764705882, "grad_norm": 0.598075140377978, "learning_rate": 2.319034644782023e-06, "loss": 0.01495780199766159, "step": 79065 }, { "epoch": 0.7441882352941176, "grad_norm": 0.633774907908207, "learning_rate": 2.318961320492241e-06, "loss": 0.0153297558426857, "step": 79070 }, { "epoch": 0.7442352941176471, "grad_norm": 0.37944928880057355, "learning_rate": 2.3188880031572213e-06, "loss": 0.010954777896404266, "step": 79075 }, { "epoch": 0.7442823529411765, "grad_norm": 0.5141670204857341, "learning_rate": 2.318814692775864e-06, "loss": 0.014483125507831573, "step": 79080 }, { "epoch": 0.7443294117647059, "grad_norm": 0.5191698489617469, "learning_rate": 2.318741389347071e-06, "loss": 0.0156266912817955, "step": 79085 }, { "epoch": 0.7443764705882353, "grad_norm": 0.43075070796535886, "learning_rate": 2.318668092869743e-06, "loss": 0.013285638391971588, "step": 79090 }, { "epoch": 0.7444235294117647, "grad_norm": 0.3879765248510847, "learning_rate": 2.318594803342781e-06, "loss": 0.012461753189563751, "step": 79095 }, { "epoch": 0.7444705882352941, "grad_norm": 0.39271917267119316, "learning_rate": 2.3185215207650864e-06, "loss": 0.01383419930934906, "step": 79100 }, { "epoch": 0.7445176470588235, "grad_norm": 0.5648462801544818, "learning_rate": 2.3184482451355626e-06, "loss": 0.019462530314922333, "step": 79105 }, { "epoch": 0.7445647058823529, "grad_norm": 0.3852078579333506, "learning_rate": 2.3183749764531097e-06, "loss": 0.017031286656856538, "step": 79110 }, { "epoch": 0.7446117647058823, "grad_norm": 0.4928318645915943, "learning_rate": 2.3183017147166318e-06, "loss": 0.015353187918663025, "step": 79115 }, { "epoch": 0.7446588235294118, "grad_norm": 0.544049368126565, "learning_rate": 2.31822845992503e-06, "loss": 0.015571716427803039, "step": 79120 }, { "epoch": 0.7447058823529412, "grad_norm": 0.504991072185999, "learning_rate": 2.318155212077208e-06, "loss": 0.014694055914878846, "step": 79125 }, { "epoch": 0.7447529411764706, "grad_norm": 0.6523132174098144, "learning_rate": 2.3180819711720686e-06, "loss": 0.017262688279151915, "step": 79130 }, { "epoch": 0.7448, "grad_norm": 0.4210536123658206, "learning_rate": 2.3180087372085157e-06, "loss": 0.017099015414714813, "step": 79135 }, { "epoch": 0.7448470588235294, "grad_norm": 0.49449478593228724, "learning_rate": 2.3179355101854512e-06, "loss": 0.01596625745296478, "step": 79140 }, { "epoch": 0.7448941176470588, "grad_norm": 0.49017281395791107, "learning_rate": 2.3178622901017813e-06, "loss": 0.013335564732551574, "step": 79145 }, { "epoch": 0.7449411764705882, "grad_norm": 0.46290414919391365, "learning_rate": 2.317789076956408e-06, "loss": 0.013507324457168578, "step": 79150 }, { "epoch": 0.7449882352941176, "grad_norm": 0.44756700081188633, "learning_rate": 2.3177158707482367e-06, "loss": 0.015263578295707703, "step": 79155 }, { "epoch": 0.745035294117647, "grad_norm": 0.3062983370333256, "learning_rate": 2.3176426714761714e-06, "loss": 0.012128908187150955, "step": 79160 }, { "epoch": 0.7450823529411764, "grad_norm": 0.4894106381478008, "learning_rate": 2.3175694791391175e-06, "loss": 0.013661885261535644, "step": 79165 }, { "epoch": 0.7451294117647059, "grad_norm": 0.38772259653553454, "learning_rate": 2.317496293735979e-06, "loss": 0.014746652543544769, "step": 79170 }, { "epoch": 0.7451764705882353, "grad_norm": 0.39785939448740143, "learning_rate": 2.317423115265662e-06, "loss": 0.015473979711532592, "step": 79175 }, { "epoch": 0.7452235294117647, "grad_norm": 0.40412016166312764, "learning_rate": 2.317349943727072e-06, "loss": 0.011280227452516556, "step": 79180 }, { "epoch": 0.7452705882352941, "grad_norm": 0.480564215863926, "learning_rate": 2.3172767791191144e-06, "loss": 0.014566773176193237, "step": 79185 }, { "epoch": 0.7453176470588235, "grad_norm": 0.6061531613454875, "learning_rate": 2.317203621440695e-06, "loss": 0.020690181851387025, "step": 79190 }, { "epoch": 0.7453647058823529, "grad_norm": 0.6918392455983936, "learning_rate": 2.3171304706907202e-06, "loss": 0.016974201798439024, "step": 79195 }, { "epoch": 0.7454117647058823, "grad_norm": 0.5422075511318216, "learning_rate": 2.317057326868097e-06, "loss": 0.014635276794433594, "step": 79200 }, { "epoch": 0.7454588235294117, "grad_norm": 0.48757876675494327, "learning_rate": 2.316984189971732e-06, "loss": 0.01823253333568573, "step": 79205 }, { "epoch": 0.7455058823529411, "grad_norm": 0.4773317162483209, "learning_rate": 2.316911060000531e-06, "loss": 0.016842231154441833, "step": 79210 }, { "epoch": 0.7455529411764706, "grad_norm": 0.5217111642069621, "learning_rate": 2.316837936953402e-06, "loss": 0.014904630184173585, "step": 79215 }, { "epoch": 0.7456, "grad_norm": 0.2860226075567667, "learning_rate": 2.3167648208292527e-06, "loss": 0.01257641613483429, "step": 79220 }, { "epoch": 0.7456470588235294, "grad_norm": 0.5691182499615166, "learning_rate": 2.3166917116269906e-06, "loss": 0.018227794766426088, "step": 79225 }, { "epoch": 0.7456941176470588, "grad_norm": 0.4744194698167941, "learning_rate": 2.316618609345524e-06, "loss": 0.012307579815387725, "step": 79230 }, { "epoch": 0.7457411764705882, "grad_norm": 0.9038681394119973, "learning_rate": 2.3165455139837593e-06, "loss": 0.022143670916557313, "step": 79235 }, { "epoch": 0.7457882352941176, "grad_norm": 0.5903894973853919, "learning_rate": 2.316472425540607e-06, "loss": 0.01141023188829422, "step": 79240 }, { "epoch": 0.745835294117647, "grad_norm": 0.5314536682339148, "learning_rate": 2.316399344014975e-06, "loss": 0.013885906338691712, "step": 79245 }, { "epoch": 0.7458823529411764, "grad_norm": 0.647389217415487, "learning_rate": 2.316326269405772e-06, "loss": 0.013496410846710206, "step": 79250 }, { "epoch": 0.7459294117647058, "grad_norm": 0.7982429230753811, "learning_rate": 2.3162532017119068e-06, "loss": 0.017174214124679565, "step": 79255 }, { "epoch": 0.7459764705882352, "grad_norm": 0.4066387654138071, "learning_rate": 2.3161801409322897e-06, "loss": 0.014639388024806976, "step": 79260 }, { "epoch": 0.7460235294117648, "grad_norm": 0.4917968202968098, "learning_rate": 2.3161070870658294e-06, "loss": 0.02205246239900589, "step": 79265 }, { "epoch": 0.7460705882352942, "grad_norm": 0.48633633523832137, "learning_rate": 2.3160340401114362e-06, "loss": 0.014384852349758148, "step": 79270 }, { "epoch": 0.7461176470588236, "grad_norm": 0.48837290108720444, "learning_rate": 2.31596100006802e-06, "loss": 0.012820105254650115, "step": 79275 }, { "epoch": 0.746164705882353, "grad_norm": 0.520887280410618, "learning_rate": 2.3158879669344915e-06, "loss": 0.014268600940704345, "step": 79280 }, { "epoch": 0.7462117647058824, "grad_norm": 0.5880433569663334, "learning_rate": 2.3158149407097606e-06, "loss": 0.014803361892700196, "step": 79285 }, { "epoch": 0.7462588235294118, "grad_norm": 0.5884646530631947, "learning_rate": 2.3157419213927383e-06, "loss": 0.013734394311904907, "step": 79290 }, { "epoch": 0.7463058823529412, "grad_norm": 0.38482633565284163, "learning_rate": 2.3156689089823363e-06, "loss": 0.02248004376888275, "step": 79295 }, { "epoch": 0.7463529411764706, "grad_norm": 0.4788196653965334, "learning_rate": 2.3155959034774652e-06, "loss": 0.015109381079673767, "step": 79300 }, { "epoch": 0.7464, "grad_norm": 0.5060475137727241, "learning_rate": 2.3155229048770363e-06, "loss": 0.011175116151571273, "step": 79305 }, { "epoch": 0.7464470588235295, "grad_norm": 0.8352965961629778, "learning_rate": 2.3154499131799623e-06, "loss": 0.015452338755130768, "step": 79310 }, { "epoch": 0.7464941176470589, "grad_norm": 0.5146393200693486, "learning_rate": 2.315376928385155e-06, "loss": 0.014952200651168823, "step": 79315 }, { "epoch": 0.7465411764705883, "grad_norm": 0.4334024085859436, "learning_rate": 2.315303950491526e-06, "loss": 0.01761031597852707, "step": 79320 }, { "epoch": 0.7465882352941177, "grad_norm": 0.40104527340863544, "learning_rate": 2.3152309794979873e-06, "loss": 0.018048164248466492, "step": 79325 }, { "epoch": 0.7466352941176471, "grad_norm": 0.6349575421878476, "learning_rate": 2.3151580154034534e-06, "loss": 0.018364039063453675, "step": 79330 }, { "epoch": 0.7466823529411765, "grad_norm": 0.5789569297233395, "learning_rate": 2.3150850582068356e-06, "loss": 0.014973844587802886, "step": 79335 }, { "epoch": 0.7467294117647059, "grad_norm": 0.485515973055798, "learning_rate": 2.3150121079070485e-06, "loss": 0.014045843482017517, "step": 79340 }, { "epoch": 0.7467764705882353, "grad_norm": 0.4585455341466726, "learning_rate": 2.3149391645030044e-06, "loss": 0.014694294333457947, "step": 79345 }, { "epoch": 0.7468235294117647, "grad_norm": 0.4204936689947625, "learning_rate": 2.314866227993618e-06, "loss": 0.015641160309314728, "step": 79350 }, { "epoch": 0.7468705882352941, "grad_norm": 0.4088094887683957, "learning_rate": 2.3147932983778024e-06, "loss": 0.013555565476417541, "step": 79355 }, { "epoch": 0.7469176470588236, "grad_norm": 0.4990980032243586, "learning_rate": 2.314720375654472e-06, "loss": 0.01429000347852707, "step": 79360 }, { "epoch": 0.746964705882353, "grad_norm": 0.38895301423087325, "learning_rate": 2.3146474598225407e-06, "loss": 0.014502885937690734, "step": 79365 }, { "epoch": 0.7470117647058824, "grad_norm": 0.5147211200358038, "learning_rate": 2.3145745508809245e-06, "loss": 0.016745114326477052, "step": 79370 }, { "epoch": 0.7470588235294118, "grad_norm": 0.46440043226218264, "learning_rate": 2.314501648828537e-06, "loss": 0.012969039380550385, "step": 79375 }, { "epoch": 0.7471058823529412, "grad_norm": 0.4108826847910998, "learning_rate": 2.314428753664294e-06, "loss": 0.0167111873626709, "step": 79380 }, { "epoch": 0.7471529411764706, "grad_norm": 0.5003546855850582, "learning_rate": 2.31435586538711e-06, "loss": 0.0133822962641716, "step": 79385 }, { "epoch": 0.7472, "grad_norm": 0.7046854391030982, "learning_rate": 2.3142829839959018e-06, "loss": 0.014234372973442077, "step": 79390 }, { "epoch": 0.7472470588235294, "grad_norm": 0.6516944494773084, "learning_rate": 2.3142101094895846e-06, "loss": 0.021831384301185607, "step": 79395 }, { "epoch": 0.7472941176470588, "grad_norm": 0.4983331731195123, "learning_rate": 2.3141372418670736e-06, "loss": 0.015394127368927002, "step": 79400 }, { "epoch": 0.7473411764705883, "grad_norm": 0.40076523698566957, "learning_rate": 2.3140643811272873e-06, "loss": 0.015476605296134949, "step": 79405 }, { "epoch": 0.7473882352941177, "grad_norm": 0.44207991575981853, "learning_rate": 2.313991527269141e-06, "loss": 0.01214626282453537, "step": 79410 }, { "epoch": 0.7474352941176471, "grad_norm": 0.4094921734904547, "learning_rate": 2.3139186802915513e-06, "loss": 0.014808689057826997, "step": 79415 }, { "epoch": 0.7474823529411765, "grad_norm": 0.5038033881917329, "learning_rate": 2.3138458401934344e-06, "loss": 0.020256850123405456, "step": 79420 }, { "epoch": 0.7475294117647059, "grad_norm": 0.5606348044070086, "learning_rate": 2.3137730069737096e-06, "loss": 0.015756669640541076, "step": 79425 }, { "epoch": 0.7475764705882353, "grad_norm": 0.4864970058733285, "learning_rate": 2.313700180631293e-06, "loss": 0.015127569437026978, "step": 79430 }, { "epoch": 0.7476235294117647, "grad_norm": 0.5187949452350047, "learning_rate": 2.313627361165103e-06, "loss": 0.021083948016166688, "step": 79435 }, { "epoch": 0.7476705882352941, "grad_norm": 0.5630626700110485, "learning_rate": 2.3135545485740576e-06, "loss": 0.016119518876075746, "step": 79440 }, { "epoch": 0.7477176470588235, "grad_norm": 0.5354851864039455, "learning_rate": 2.313481742857074e-06, "loss": 0.012879809737205506, "step": 79445 }, { "epoch": 0.7477647058823529, "grad_norm": 0.5670361204505445, "learning_rate": 2.313408944013072e-06, "loss": 0.01849181205034256, "step": 79450 }, { "epoch": 0.7478117647058824, "grad_norm": 0.3572354429378058, "learning_rate": 2.31333615204097e-06, "loss": 0.015942564606666564, "step": 79455 }, { "epoch": 0.7478588235294118, "grad_norm": 1.031049979047506, "learning_rate": 2.3132633669396855e-06, "loss": 0.016926777362823487, "step": 79460 }, { "epoch": 0.7479058823529412, "grad_norm": 0.5072088250563418, "learning_rate": 2.3131905887081398e-06, "loss": 0.013705503940582276, "step": 79465 }, { "epoch": 0.7479529411764706, "grad_norm": 0.5714930570272321, "learning_rate": 2.313117817345251e-06, "loss": 0.02020685076713562, "step": 79470 }, { "epoch": 0.748, "grad_norm": 0.41205808834062296, "learning_rate": 2.313045052849939e-06, "loss": 0.014388644695281982, "step": 79475 }, { "epoch": 0.7480470588235294, "grad_norm": 0.28816009973515755, "learning_rate": 2.3129722952211244e-06, "loss": 0.015644346177577973, "step": 79480 }, { "epoch": 0.7480941176470588, "grad_norm": 0.5410034750124301, "learning_rate": 2.312899544457726e-06, "loss": 0.01921290308237076, "step": 79485 }, { "epoch": 0.7481411764705882, "grad_norm": 0.4667497383873912, "learning_rate": 2.312826800558665e-06, "loss": 0.013984893262386323, "step": 79490 }, { "epoch": 0.7481882352941176, "grad_norm": 0.4470825302892744, "learning_rate": 2.3127540635228616e-06, "loss": 0.013253794610500335, "step": 79495 }, { "epoch": 0.7482352941176471, "grad_norm": 0.5297159269974061, "learning_rate": 2.312681333349238e-06, "loss": 0.014373554289340973, "step": 79500 }, { "epoch": 0.7482823529411765, "grad_norm": 0.615145386147267, "learning_rate": 2.3126086100367136e-06, "loss": 0.013959594070911407, "step": 79505 }, { "epoch": 0.7483294117647059, "grad_norm": 0.4875129898536158, "learning_rate": 2.3125358935842104e-06, "loss": 0.017767079174518585, "step": 79510 }, { "epoch": 0.7483764705882353, "grad_norm": 0.5267684404846205, "learning_rate": 2.3124631839906493e-06, "loss": 0.01582695245742798, "step": 79515 }, { "epoch": 0.7484235294117647, "grad_norm": 0.5034836461767158, "learning_rate": 2.3123904812549533e-06, "loss": 0.013223741948604584, "step": 79520 }, { "epoch": 0.7484705882352941, "grad_norm": 0.4199991915835691, "learning_rate": 2.312317785376044e-06, "loss": 0.01004439890384674, "step": 79525 }, { "epoch": 0.7485176470588235, "grad_norm": 0.5487560734377683, "learning_rate": 2.3122450963528428e-06, "loss": 0.017857548594474793, "step": 79530 }, { "epoch": 0.7485647058823529, "grad_norm": 0.37812572132802424, "learning_rate": 2.312172414184273e-06, "loss": 0.015405187010765075, "step": 79535 }, { "epoch": 0.7486117647058823, "grad_norm": 0.5205556896607653, "learning_rate": 2.3120997388692574e-06, "loss": 0.01424422562122345, "step": 79540 }, { "epoch": 0.7486588235294117, "grad_norm": 0.4328002964139903, "learning_rate": 2.3120270704067183e-06, "loss": 0.014280825853347778, "step": 79545 }, { "epoch": 0.7487058823529412, "grad_norm": 0.4364000046098021, "learning_rate": 2.31195440879558e-06, "loss": 0.013671332597732544, "step": 79550 }, { "epoch": 0.7487529411764706, "grad_norm": 0.44603349899172223, "learning_rate": 2.3118817540347647e-06, "loss": 0.014601513743400574, "step": 79555 }, { "epoch": 0.7488, "grad_norm": 0.3832678958612703, "learning_rate": 2.3118091061231974e-06, "loss": 0.017218583822250368, "step": 79560 }, { "epoch": 0.7488470588235294, "grad_norm": 0.512887532190251, "learning_rate": 2.3117364650598017e-06, "loss": 0.012817117571830749, "step": 79565 }, { "epoch": 0.7488941176470588, "grad_norm": 0.5708676851529999, "learning_rate": 2.3116638308435e-06, "loss": 0.015443170070648193, "step": 79570 }, { "epoch": 0.7489411764705882, "grad_norm": 0.3307001673417461, "learning_rate": 2.3115912034732195e-06, "loss": 0.013088598847389221, "step": 79575 }, { "epoch": 0.7489882352941176, "grad_norm": 0.46474772406394976, "learning_rate": 2.3115185829478825e-06, "loss": 0.014286601543426513, "step": 79580 }, { "epoch": 0.749035294117647, "grad_norm": 0.3311429985360841, "learning_rate": 2.3114459692664154e-06, "loss": 0.022982238233089446, "step": 79585 }, { "epoch": 0.7490823529411764, "grad_norm": 0.48177394857004796, "learning_rate": 2.3113733624277425e-06, "loss": 0.015247733891010284, "step": 79590 }, { "epoch": 0.7491294117647059, "grad_norm": 0.4890567615262255, "learning_rate": 2.311300762430789e-06, "loss": 0.014218634366989136, "step": 79595 }, { "epoch": 0.7491764705882353, "grad_norm": 0.6091090310669836, "learning_rate": 2.311228169274481e-06, "loss": 0.019122013449668886, "step": 79600 }, { "epoch": 0.7492235294117647, "grad_norm": 0.5201246757164465, "learning_rate": 2.3111555829577444e-06, "loss": 0.012685595452785492, "step": 79605 }, { "epoch": 0.7492705882352941, "grad_norm": 0.4708106299348958, "learning_rate": 2.311083003479505e-06, "loss": 0.01747630536556244, "step": 79610 }, { "epoch": 0.7493176470588235, "grad_norm": 0.5177644769296542, "learning_rate": 2.311010430838689e-06, "loss": 0.017042656242847443, "step": 79615 }, { "epoch": 0.7493647058823529, "grad_norm": 0.4572500189084775, "learning_rate": 2.3109378650342233e-06, "loss": 0.01630902588367462, "step": 79620 }, { "epoch": 0.7494117647058823, "grad_norm": 0.5021884082922429, "learning_rate": 2.3108653060650342e-06, "loss": 0.016214536130428316, "step": 79625 }, { "epoch": 0.7494588235294117, "grad_norm": 0.47598116742809843, "learning_rate": 2.3107927539300488e-06, "loss": 0.013889113068580627, "step": 79630 }, { "epoch": 0.7495058823529411, "grad_norm": 0.6203338871789015, "learning_rate": 2.310720208628194e-06, "loss": 0.013910356163978576, "step": 79635 }, { "epoch": 0.7495529411764705, "grad_norm": 0.45622076580320187, "learning_rate": 2.310647670158398e-06, "loss": 0.0123323455452919, "step": 79640 }, { "epoch": 0.7496, "grad_norm": 0.4916426176515809, "learning_rate": 2.3105751385195884e-06, "loss": 0.01961269974708557, "step": 79645 }, { "epoch": 0.7496470588235294, "grad_norm": 0.4260643842156848, "learning_rate": 2.3105026137106926e-06, "loss": 0.014372393488883972, "step": 79650 }, { "epoch": 0.7496941176470588, "grad_norm": 0.36676863088379835, "learning_rate": 2.3104300957306396e-06, "loss": 0.01141054555773735, "step": 79655 }, { "epoch": 0.7497411764705882, "grad_norm": 0.5830530089698163, "learning_rate": 2.3103575845783564e-06, "loss": 0.016504561901092528, "step": 79660 }, { "epoch": 0.7497882352941176, "grad_norm": 0.443682602398504, "learning_rate": 2.310285080252773e-06, "loss": 0.013108761608600616, "step": 79665 }, { "epoch": 0.749835294117647, "grad_norm": 0.4208735454434863, "learning_rate": 2.310212582752818e-06, "loss": 0.014655379951000214, "step": 79670 }, { "epoch": 0.7498823529411764, "grad_norm": 0.6447322664310267, "learning_rate": 2.3101400920774204e-06, "loss": 0.014096906781196595, "step": 79675 }, { "epoch": 0.7499294117647058, "grad_norm": 0.40293099565081486, "learning_rate": 2.3100676082255093e-06, "loss": 0.01524137258529663, "step": 79680 }, { "epoch": 0.7499764705882352, "grad_norm": 0.6574488201672333, "learning_rate": 2.309995131196014e-06, "loss": 0.015038317441940308, "step": 79685 }, { "epoch": 0.7500235294117648, "grad_norm": 0.5635134902432319, "learning_rate": 2.3099226609878655e-06, "loss": 0.01729881763458252, "step": 79690 }, { "epoch": 0.7500705882352942, "grad_norm": 0.6818133321759787, "learning_rate": 2.3098501975999926e-06, "loss": 0.012836767733097077, "step": 79695 }, { "epoch": 0.7501176470588236, "grad_norm": 0.47060745661210496, "learning_rate": 2.3097777410313266e-06, "loss": 0.014514647424221039, "step": 79700 }, { "epoch": 0.750164705882353, "grad_norm": 0.5055163821770426, "learning_rate": 2.309705291280797e-06, "loss": 0.01616084575653076, "step": 79705 }, { "epoch": 0.7502117647058824, "grad_norm": 0.42562522041289574, "learning_rate": 2.3096328483473356e-06, "loss": 0.010793019831180573, "step": 79710 }, { "epoch": 0.7502588235294118, "grad_norm": 0.6633979036010299, "learning_rate": 2.3095604122298727e-06, "loss": 0.013869738578796387, "step": 79715 }, { "epoch": 0.7503058823529412, "grad_norm": 0.6192205254542711, "learning_rate": 2.3094879829273396e-06, "loss": 0.01690175533294678, "step": 79720 }, { "epoch": 0.7503529411764706, "grad_norm": 0.5771185042817653, "learning_rate": 2.309415560438668e-06, "loss": 0.018264275789260865, "step": 79725 }, { "epoch": 0.7504, "grad_norm": 0.773803125124812, "learning_rate": 2.3093431447627895e-06, "loss": 0.01996096670627594, "step": 79730 }, { "epoch": 0.7504470588235295, "grad_norm": 0.44859834327941145, "learning_rate": 2.3092707358986366e-06, "loss": 0.016687752306461336, "step": 79735 }, { "epoch": 0.7504941176470589, "grad_norm": 0.40391885420312323, "learning_rate": 2.3091983338451403e-06, "loss": 0.018656715750694275, "step": 79740 }, { "epoch": 0.7505411764705883, "grad_norm": 0.49370890308377574, "learning_rate": 2.3091259386012336e-06, "loss": 0.017043407261371612, "step": 79745 }, { "epoch": 0.7505882352941177, "grad_norm": 0.6382490456033563, "learning_rate": 2.3090535501658494e-06, "loss": 0.017711301147937775, "step": 79750 }, { "epoch": 0.7506352941176471, "grad_norm": 0.2589109228246149, "learning_rate": 2.30898116853792e-06, "loss": 0.01505909264087677, "step": 79755 }, { "epoch": 0.7506823529411765, "grad_norm": 0.4316112849870718, "learning_rate": 2.3089087937163786e-06, "loss": 0.012828773260116577, "step": 79760 }, { "epoch": 0.7507294117647059, "grad_norm": 0.7792394839616902, "learning_rate": 2.3088364257001593e-06, "loss": 0.02331546097993851, "step": 79765 }, { "epoch": 0.7507764705882353, "grad_norm": 0.3802681083913955, "learning_rate": 2.308764064488195e-06, "loss": 0.013880957663059235, "step": 79770 }, { "epoch": 0.7508235294117647, "grad_norm": 0.5543826223428949, "learning_rate": 2.3086917100794197e-06, "loss": 0.015405485033988952, "step": 79775 }, { "epoch": 0.7508705882352941, "grad_norm": 0.3553116281374044, "learning_rate": 2.308619362472767e-06, "loss": 0.016626760363578796, "step": 79780 }, { "epoch": 0.7509176470588236, "grad_norm": 0.43724010949312525, "learning_rate": 2.308547021667172e-06, "loss": 0.015091468393802644, "step": 79785 }, { "epoch": 0.750964705882353, "grad_norm": 0.5046157554584357, "learning_rate": 2.308474687661569e-06, "loss": 0.0146531879901886, "step": 79790 }, { "epoch": 0.7510117647058824, "grad_norm": 0.5710783968543173, "learning_rate": 2.308402360454892e-06, "loss": 0.015326803922653199, "step": 79795 }, { "epoch": 0.7510588235294118, "grad_norm": 0.4401924811502931, "learning_rate": 2.308330040046076e-06, "loss": 0.011811565607786179, "step": 79800 }, { "epoch": 0.7511058823529412, "grad_norm": 0.4919276457487433, "learning_rate": 2.308257726434058e-06, "loss": 0.012924328446388245, "step": 79805 }, { "epoch": 0.7511529411764706, "grad_norm": 0.6003493649399191, "learning_rate": 2.308185419617771e-06, "loss": 0.01664976179599762, "step": 79810 }, { "epoch": 0.7512, "grad_norm": 0.6368945467201519, "learning_rate": 2.308113119596152e-06, "loss": 0.017938271164894104, "step": 79815 }, { "epoch": 0.7512470588235294, "grad_norm": 0.3418656836666661, "learning_rate": 2.308040826368137e-06, "loss": 0.014727672934532166, "step": 79820 }, { "epoch": 0.7512941176470588, "grad_norm": 0.25820022896192085, "learning_rate": 2.3079685399326617e-06, "loss": 0.010685181617736817, "step": 79825 }, { "epoch": 0.7513411764705883, "grad_norm": 0.5402519177314888, "learning_rate": 2.307896260288663e-06, "loss": 0.014509819447994232, "step": 79830 }, { "epoch": 0.7513882352941177, "grad_norm": 0.5302986057763721, "learning_rate": 2.3078239874350764e-06, "loss": 0.016793233156204224, "step": 79835 }, { "epoch": 0.7514352941176471, "grad_norm": 0.4406128664324627, "learning_rate": 2.3077517213708398e-06, "loss": 0.013998843729496002, "step": 79840 }, { "epoch": 0.7514823529411765, "grad_norm": 0.4875398099414049, "learning_rate": 2.30767946209489e-06, "loss": 0.015746712684631348, "step": 79845 }, { "epoch": 0.7515294117647059, "grad_norm": 0.49966772476621907, "learning_rate": 2.3076072096061642e-06, "loss": 0.013668105006217957, "step": 79850 }, { "epoch": 0.7515764705882353, "grad_norm": 0.27012500272123774, "learning_rate": 2.3075349639036e-06, "loss": 0.014347270131111145, "step": 79855 }, { "epoch": 0.7516235294117647, "grad_norm": 0.377544467135542, "learning_rate": 2.3074627249861355e-06, "loss": 0.016993822157382966, "step": 79860 }, { "epoch": 0.7516705882352941, "grad_norm": 0.6633161193268847, "learning_rate": 2.3073904928527084e-06, "loss": 0.01715904474258423, "step": 79865 }, { "epoch": 0.7517176470588235, "grad_norm": 0.39910659837690027, "learning_rate": 2.3073182675022564e-06, "loss": 0.013199543952941895, "step": 79870 }, { "epoch": 0.7517647058823529, "grad_norm": 0.4310525387250234, "learning_rate": 2.3072460489337186e-06, "loss": 0.011681130528450013, "step": 79875 }, { "epoch": 0.7518117647058824, "grad_norm": 0.4153004903189945, "learning_rate": 2.3071738371460335e-06, "loss": 0.016485920548439024, "step": 79880 }, { "epoch": 0.7518588235294118, "grad_norm": 0.4150960079234048, "learning_rate": 2.3071016321381404e-06, "loss": 0.012497428059577941, "step": 79885 }, { "epoch": 0.7519058823529412, "grad_norm": 0.4210544740690891, "learning_rate": 2.3070294339089778e-06, "loss": 0.014249080419540405, "step": 79890 }, { "epoch": 0.7519529411764706, "grad_norm": 0.41201304710018566, "learning_rate": 2.306957242457486e-06, "loss": 0.010282811522483826, "step": 79895 }, { "epoch": 0.752, "grad_norm": 0.564195460494054, "learning_rate": 2.3068850577826044e-06, "loss": 0.019651949405670166, "step": 79900 }, { "epoch": 0.7520470588235294, "grad_norm": 0.4721706277112496, "learning_rate": 2.3068128798832723e-06, "loss": 0.014913755655288696, "step": 79905 }, { "epoch": 0.7520941176470588, "grad_norm": 0.3586566899231119, "learning_rate": 2.3067407087584296e-06, "loss": 0.012200973927974701, "step": 79910 }, { "epoch": 0.7521411764705882, "grad_norm": 0.46665728612352636, "learning_rate": 2.3066685444070177e-06, "loss": 0.013232925534248352, "step": 79915 }, { "epoch": 0.7521882352941176, "grad_norm": 0.8211581191754199, "learning_rate": 2.3065963868279763e-06, "loss": 0.01768602728843689, "step": 79920 }, { "epoch": 0.7522352941176471, "grad_norm": 0.4584508576273463, "learning_rate": 2.3065242360202467e-06, "loss": 0.013425876200199128, "step": 79925 }, { "epoch": 0.7522823529411765, "grad_norm": 0.6384732902457526, "learning_rate": 2.30645209198277e-06, "loss": 0.017805346846580507, "step": 79930 }, { "epoch": 0.7523294117647059, "grad_norm": 0.5777370380761575, "learning_rate": 2.3063799547144862e-06, "loss": 0.018118903040885925, "step": 79935 }, { "epoch": 0.7523764705882353, "grad_norm": 0.5686724657008914, "learning_rate": 2.306307824214338e-06, "loss": 0.012257377058267594, "step": 79940 }, { "epoch": 0.7524235294117647, "grad_norm": 0.5609548369656153, "learning_rate": 2.3062357004812676e-06, "loss": 0.014328646659851074, "step": 79945 }, { "epoch": 0.7524705882352941, "grad_norm": 0.41544784235113363, "learning_rate": 2.306163583514216e-06, "loss": 0.013678072392940522, "step": 79950 }, { "epoch": 0.7525176470588235, "grad_norm": 0.48692946708545976, "learning_rate": 2.3060914733121254e-06, "loss": 0.016753849387168885, "step": 79955 }, { "epoch": 0.7525647058823529, "grad_norm": 0.6878291840540117, "learning_rate": 2.3060193698739384e-06, "loss": 0.01445048451423645, "step": 79960 }, { "epoch": 0.7526117647058823, "grad_norm": 0.42681814775346977, "learning_rate": 2.305947273198598e-06, "loss": 0.016167120635509492, "step": 79965 }, { "epoch": 0.7526588235294117, "grad_norm": 0.40760472872204995, "learning_rate": 2.305875183285046e-06, "loss": 0.013879910111427307, "step": 79970 }, { "epoch": 0.7527058823529412, "grad_norm": 0.5559879949951161, "learning_rate": 2.305803100132227e-06, "loss": 0.012017686665058137, "step": 79975 }, { "epoch": 0.7527529411764706, "grad_norm": 0.5250364190905724, "learning_rate": 2.3057310237390835e-06, "loss": 0.01295742392539978, "step": 79980 }, { "epoch": 0.7528, "grad_norm": 0.33577966438152407, "learning_rate": 2.305658954104559e-06, "loss": 0.013848519325256348, "step": 79985 }, { "epoch": 0.7528470588235294, "grad_norm": 0.7389857172783876, "learning_rate": 2.3055868912275973e-06, "loss": 0.02050551474094391, "step": 79990 }, { "epoch": 0.7528941176470588, "grad_norm": 0.47528327748085075, "learning_rate": 2.3055148351071432e-06, "loss": 0.017199233174324036, "step": 79995 }, { "epoch": 0.7529411764705882, "grad_norm": 0.46192758265792966, "learning_rate": 2.3054427857421403e-06, "loss": 0.017638787627220154, "step": 80000 }, { "epoch": 0.7529882352941176, "grad_norm": 0.479299622552591, "learning_rate": 2.305370743131533e-06, "loss": 0.015379366278648377, "step": 80005 }, { "epoch": 0.753035294117647, "grad_norm": 0.5403451924146734, "learning_rate": 2.305298707274266e-06, "loss": 0.013940280675888062, "step": 80010 }, { "epoch": 0.7530823529411764, "grad_norm": 0.5942235524997743, "learning_rate": 2.305226678169285e-06, "loss": 0.017086580395698547, "step": 80015 }, { "epoch": 0.7531294117647059, "grad_norm": 0.5161056806419508, "learning_rate": 2.3051546558155342e-06, "loss": 0.015305477380752563, "step": 80020 }, { "epoch": 0.7531764705882353, "grad_norm": 0.3917923642965487, "learning_rate": 2.3050826402119597e-06, "loss": 0.018527236580848695, "step": 80025 }, { "epoch": 0.7532235294117647, "grad_norm": 0.6747744058955215, "learning_rate": 2.305010631357507e-06, "loss": 0.017896576225757597, "step": 80030 }, { "epoch": 0.7532705882352941, "grad_norm": 0.5605309902688793, "learning_rate": 2.3049386292511215e-06, "loss": 0.015837137401103974, "step": 80035 }, { "epoch": 0.7533176470588235, "grad_norm": 0.4297126979795295, "learning_rate": 2.30486663389175e-06, "loss": 0.016476240754127503, "step": 80040 }, { "epoch": 0.7533647058823529, "grad_norm": 0.20924663729069068, "learning_rate": 2.3047946452783387e-06, "loss": 0.011996988207101822, "step": 80045 }, { "epoch": 0.7534117647058823, "grad_norm": 0.4779387233054362, "learning_rate": 2.3047226634098342e-06, "loss": 0.015036576986312866, "step": 80050 }, { "epoch": 0.7534588235294117, "grad_norm": 0.5735394017032427, "learning_rate": 2.304650688285183e-06, "loss": 0.015399104356765747, "step": 80055 }, { "epoch": 0.7535058823529411, "grad_norm": 0.5511477297542992, "learning_rate": 2.3045787199033318e-06, "loss": 0.013270027935504913, "step": 80060 }, { "epoch": 0.7535529411764705, "grad_norm": 0.3360067804359334, "learning_rate": 2.304506758263229e-06, "loss": 0.013286709785461426, "step": 80065 }, { "epoch": 0.7536, "grad_norm": 0.4349380637864154, "learning_rate": 2.3044348033638215e-06, "loss": 0.014858396351337433, "step": 80070 }, { "epoch": 0.7536470588235294, "grad_norm": 0.308700630283871, "learning_rate": 2.3043628552040565e-06, "loss": 0.009993388503789901, "step": 80075 }, { "epoch": 0.7536941176470588, "grad_norm": 0.40129471309037573, "learning_rate": 2.304290913782882e-06, "loss": 0.015089282393455505, "step": 80080 }, { "epoch": 0.7537411764705882, "grad_norm": 0.44994265039080145, "learning_rate": 2.304218979099247e-06, "loss": 0.01301877647638321, "step": 80085 }, { "epoch": 0.7537882352941176, "grad_norm": 0.3770510926662434, "learning_rate": 2.3041470511521e-06, "loss": 0.013632461428642273, "step": 80090 }, { "epoch": 0.753835294117647, "grad_norm": 0.43975738267320263, "learning_rate": 2.3040751299403887e-06, "loss": 0.012561088800430298, "step": 80095 }, { "epoch": 0.7538823529411764, "grad_norm": 0.4390173979644873, "learning_rate": 2.3040032154630625e-06, "loss": 0.013442252576351166, "step": 80100 }, { "epoch": 0.7539294117647058, "grad_norm": 0.6370076042973446, "learning_rate": 2.3039313077190707e-06, "loss": 0.017093971371650696, "step": 80105 }, { "epoch": 0.7539764705882352, "grad_norm": 0.582790868461736, "learning_rate": 2.303859406707362e-06, "loss": 0.016389098763465882, "step": 80110 }, { "epoch": 0.7540235294117648, "grad_norm": 0.6967769746298191, "learning_rate": 2.3037875124268863e-06, "loss": 0.02483770251274109, "step": 80115 }, { "epoch": 0.7540705882352942, "grad_norm": 0.5827852738368807, "learning_rate": 2.3037156248765935e-06, "loss": 0.015858674049377443, "step": 80120 }, { "epoch": 0.7541176470588236, "grad_norm": 0.4819947081457866, "learning_rate": 2.3036437440554334e-06, "loss": 0.013993501663208008, "step": 80125 }, { "epoch": 0.754164705882353, "grad_norm": 0.4582137181650946, "learning_rate": 2.3035718699623567e-06, "loss": 0.015059688687324524, "step": 80130 }, { "epoch": 0.7542117647058824, "grad_norm": 0.9610751609955781, "learning_rate": 2.303500002596313e-06, "loss": 0.014574910700321197, "step": 80135 }, { "epoch": 0.7542588235294118, "grad_norm": 0.5096134622740095, "learning_rate": 2.3034281419562546e-06, "loss": 0.012287409603595733, "step": 80140 }, { "epoch": 0.7543058823529412, "grad_norm": 0.6931742362978216, "learning_rate": 2.3033562880411307e-06, "loss": 0.016610479354858397, "step": 80145 }, { "epoch": 0.7543529411764706, "grad_norm": 0.4684353962924618, "learning_rate": 2.3032844408498933e-06, "loss": 0.01941485106945038, "step": 80150 }, { "epoch": 0.7544, "grad_norm": 0.45618418424970814, "learning_rate": 2.3032126003814933e-06, "loss": 0.01519971787929535, "step": 80155 }, { "epoch": 0.7544470588235294, "grad_norm": 0.64932646747728, "learning_rate": 2.303140766634883e-06, "loss": 0.014470133185386657, "step": 80160 }, { "epoch": 0.7544941176470589, "grad_norm": 0.49141884586265416, "learning_rate": 2.303068939609014e-06, "loss": 0.013647186756134033, "step": 80165 }, { "epoch": 0.7545411764705883, "grad_norm": 0.45359510304848477, "learning_rate": 2.302997119302838e-06, "loss": 0.015525136888027192, "step": 80170 }, { "epoch": 0.7545882352941177, "grad_norm": 0.4564670409369511, "learning_rate": 2.3029253057153077e-06, "loss": 0.01795835793018341, "step": 80175 }, { "epoch": 0.7546352941176471, "grad_norm": 0.547851464253708, "learning_rate": 2.302853498845376e-06, "loss": 0.012812289595603942, "step": 80180 }, { "epoch": 0.7546823529411765, "grad_norm": 0.43272808246672373, "learning_rate": 2.302781698691995e-06, "loss": 0.013474597036838532, "step": 80185 }, { "epoch": 0.7547294117647059, "grad_norm": 0.3865335087773589, "learning_rate": 2.302709905254118e-06, "loss": 0.014178204536437988, "step": 80190 }, { "epoch": 0.7547764705882353, "grad_norm": 0.3260498860285402, "learning_rate": 2.302638118530698e-06, "loss": 0.01704637110233307, "step": 80195 }, { "epoch": 0.7548235294117647, "grad_norm": 0.2709185525616394, "learning_rate": 2.3025663385206884e-06, "loss": 0.01843147724866867, "step": 80200 }, { "epoch": 0.7548705882352941, "grad_norm": 0.6168619672396594, "learning_rate": 2.3024945652230433e-06, "loss": 0.017928311228752138, "step": 80205 }, { "epoch": 0.7549176470588236, "grad_norm": 0.4604489953275099, "learning_rate": 2.302422798636717e-06, "loss": 0.019118575751781462, "step": 80210 }, { "epoch": 0.754964705882353, "grad_norm": 0.5411749726098646, "learning_rate": 2.3023510387606625e-06, "loss": 0.014771775901317596, "step": 80215 }, { "epoch": 0.7550117647058824, "grad_norm": 0.5255337969890768, "learning_rate": 2.3022792855938346e-06, "loss": 0.017147541046142578, "step": 80220 }, { "epoch": 0.7550588235294118, "grad_norm": 0.43400169435314756, "learning_rate": 2.3022075391351882e-06, "loss": 0.015427413582801818, "step": 80225 }, { "epoch": 0.7551058823529412, "grad_norm": 0.4667542454015714, "learning_rate": 2.3021357993836774e-06, "loss": 0.015323272347450257, "step": 80230 }, { "epoch": 0.7551529411764706, "grad_norm": 0.37040814607144085, "learning_rate": 2.3020640663382586e-06, "loss": 0.01273340880870819, "step": 80235 }, { "epoch": 0.7552, "grad_norm": 0.5201777341726883, "learning_rate": 2.3019923399978857e-06, "loss": 0.016910338401794435, "step": 80240 }, { "epoch": 0.7552470588235294, "grad_norm": 0.4640074438958544, "learning_rate": 2.301920620361515e-06, "loss": 0.014923872053623199, "step": 80245 }, { "epoch": 0.7552941176470588, "grad_norm": 0.30665831448744113, "learning_rate": 2.301848907428102e-06, "loss": 0.014256379008293152, "step": 80250 }, { "epoch": 0.7553411764705882, "grad_norm": 0.4804214703012289, "learning_rate": 2.3017772011966026e-06, "loss": 0.018745899200439453, "step": 80255 }, { "epoch": 0.7553882352941177, "grad_norm": 0.5549755123763909, "learning_rate": 2.3017055016659727e-06, "loss": 0.016043075919151308, "step": 80260 }, { "epoch": 0.7554352941176471, "grad_norm": 0.5143432448667733, "learning_rate": 2.3016338088351693e-06, "loss": 0.015782859921455384, "step": 80265 }, { "epoch": 0.7554823529411765, "grad_norm": 0.3752817985760755, "learning_rate": 2.3015621227031493e-06, "loss": 0.013440075516700744, "step": 80270 }, { "epoch": 0.7555294117647059, "grad_norm": 0.5049852078975708, "learning_rate": 2.3014904432688687e-06, "loss": 0.01609608978033066, "step": 80275 }, { "epoch": 0.7555764705882353, "grad_norm": 0.6251311672298859, "learning_rate": 2.3014187705312847e-06, "loss": 0.015789058804512025, "step": 80280 }, { "epoch": 0.7556235294117647, "grad_norm": 0.5009531631562337, "learning_rate": 2.3013471044893556e-06, "loss": 0.01634284555912018, "step": 80285 }, { "epoch": 0.7556705882352941, "grad_norm": 0.6371661490649704, "learning_rate": 2.3012754451420373e-06, "loss": 0.01666842997074127, "step": 80290 }, { "epoch": 0.7557176470588235, "grad_norm": 0.4486014597521902, "learning_rate": 2.301203792488289e-06, "loss": 0.014713543653488158, "step": 80295 }, { "epoch": 0.7557647058823529, "grad_norm": 0.5547710600048577, "learning_rate": 2.301132146527068e-06, "loss": 0.012280870974063874, "step": 80300 }, { "epoch": 0.7558117647058824, "grad_norm": 0.41857739580213327, "learning_rate": 2.301060507257333e-06, "loss": 0.012642882764339447, "step": 80305 }, { "epoch": 0.7558588235294118, "grad_norm": 0.4864976087326072, "learning_rate": 2.3009888746780424e-06, "loss": 0.020635604858398438, "step": 80310 }, { "epoch": 0.7559058823529412, "grad_norm": 0.36840956513403617, "learning_rate": 2.3009172487881546e-06, "loss": 0.014949262142181396, "step": 80315 }, { "epoch": 0.7559529411764706, "grad_norm": 0.6038107096979118, "learning_rate": 2.3008456295866286e-06, "loss": 0.012409932911396027, "step": 80320 }, { "epoch": 0.756, "grad_norm": 0.5212572887300068, "learning_rate": 2.300774017072423e-06, "loss": 0.01783544719219208, "step": 80325 }, { "epoch": 0.7560470588235294, "grad_norm": 0.40907277723123353, "learning_rate": 2.300702411244499e-06, "loss": 0.010481399297714234, "step": 80330 }, { "epoch": 0.7560941176470588, "grad_norm": 0.628843785984466, "learning_rate": 2.3006308121018146e-06, "loss": 0.016252866387367247, "step": 80335 }, { "epoch": 0.7561411764705882, "grad_norm": 0.5918577447364326, "learning_rate": 2.3005592196433296e-06, "loss": 0.016114591062068938, "step": 80340 }, { "epoch": 0.7561882352941176, "grad_norm": 0.5462879826204302, "learning_rate": 2.3004876338680044e-06, "loss": 0.01886792778968811, "step": 80345 }, { "epoch": 0.756235294117647, "grad_norm": 0.47402751697604195, "learning_rate": 2.300416054774799e-06, "loss": 0.0162571519613266, "step": 80350 }, { "epoch": 0.7562823529411765, "grad_norm": 0.8389308446733393, "learning_rate": 2.300344482362675e-06, "loss": 0.012674672901630402, "step": 80355 }, { "epoch": 0.7563294117647059, "grad_norm": 0.43878474575203713, "learning_rate": 2.3002729166305917e-06, "loss": 0.01296093910932541, "step": 80360 }, { "epoch": 0.7563764705882353, "grad_norm": 0.7575485492782383, "learning_rate": 2.300201357577511e-06, "loss": 0.014984798431396485, "step": 80365 }, { "epoch": 0.7564235294117647, "grad_norm": 0.5196736683955325, "learning_rate": 2.3001298052023937e-06, "loss": 0.01655805706977844, "step": 80370 }, { "epoch": 0.7564705882352941, "grad_norm": 0.3570242296980214, "learning_rate": 2.300058259504201e-06, "loss": 0.01602771580219269, "step": 80375 }, { "epoch": 0.7565176470588235, "grad_norm": 0.38770350144559546, "learning_rate": 2.2999867204818954e-06, "loss": 0.016167327761650085, "step": 80380 }, { "epoch": 0.7565647058823529, "grad_norm": 0.6322884448966533, "learning_rate": 2.299915188134438e-06, "loss": 0.01520524024963379, "step": 80385 }, { "epoch": 0.7566117647058823, "grad_norm": 0.39850261666132425, "learning_rate": 2.2998436624607908e-06, "loss": 0.01107977032661438, "step": 80390 }, { "epoch": 0.7566588235294117, "grad_norm": 0.49452058320730485, "learning_rate": 2.2997721434599163e-06, "loss": 0.016553103923797607, "step": 80395 }, { "epoch": 0.7567058823529412, "grad_norm": 0.5792640732922698, "learning_rate": 2.2997006311307773e-06, "loss": 0.014175063371658326, "step": 80400 }, { "epoch": 0.7567529411764706, "grad_norm": 0.461673471329422, "learning_rate": 2.299629125472336e-06, "loss": 0.013613767921924591, "step": 80405 }, { "epoch": 0.7568, "grad_norm": 0.4836318436919825, "learning_rate": 2.299557626483556e-06, "loss": 0.016733624041080475, "step": 80410 }, { "epoch": 0.7568470588235294, "grad_norm": 0.5572891694522213, "learning_rate": 2.2994861341634008e-06, "loss": 0.01706705689430237, "step": 80415 }, { "epoch": 0.7568941176470588, "grad_norm": 0.35136144290740595, "learning_rate": 2.299414648510832e-06, "loss": 0.014797106385231018, "step": 80420 }, { "epoch": 0.7569411764705882, "grad_norm": 0.5395683160665454, "learning_rate": 2.299343169524816e-06, "loss": 0.018404965102672578, "step": 80425 }, { "epoch": 0.7569882352941176, "grad_norm": 0.5778465108874009, "learning_rate": 2.299271697204315e-06, "loss": 0.014540749788284301, "step": 80430 }, { "epoch": 0.757035294117647, "grad_norm": 0.5397981563655647, "learning_rate": 2.299200231548293e-06, "loss": 0.015630163252353668, "step": 80435 }, { "epoch": 0.7570823529411764, "grad_norm": 0.41494491970263137, "learning_rate": 2.2991287725557145e-06, "loss": 0.01320265531539917, "step": 80440 }, { "epoch": 0.7571294117647058, "grad_norm": 0.677557995944918, "learning_rate": 2.2990573202255445e-06, "loss": 0.017741644382476808, "step": 80445 }, { "epoch": 0.7571764705882353, "grad_norm": 0.5703023217496678, "learning_rate": 2.298985874556748e-06, "loss": 0.016626372933387756, "step": 80450 }, { "epoch": 0.7572235294117647, "grad_norm": 0.5001666465289649, "learning_rate": 2.298914435548289e-06, "loss": 0.012970031797885894, "step": 80455 }, { "epoch": 0.7572705882352941, "grad_norm": 0.5637587774885526, "learning_rate": 2.2988430031991334e-06, "loss": 0.014165359735488891, "step": 80460 }, { "epoch": 0.7573176470588235, "grad_norm": 0.33774345603728306, "learning_rate": 2.298771577508247e-06, "loss": 0.01311253011226654, "step": 80465 }, { "epoch": 0.7573647058823529, "grad_norm": 0.6398695704621236, "learning_rate": 2.2987001584745947e-06, "loss": 0.014294907450675964, "step": 80470 }, { "epoch": 0.7574117647058823, "grad_norm": 0.581543119154818, "learning_rate": 2.298628746097143e-06, "loss": 0.01993878036737442, "step": 80475 }, { "epoch": 0.7574588235294117, "grad_norm": 0.7628040352809394, "learning_rate": 2.298557340374858e-06, "loss": 0.018306519091129302, "step": 80480 }, { "epoch": 0.7575058823529411, "grad_norm": 0.6206201987427367, "learning_rate": 2.2984859413067062e-06, "loss": 0.01622401475906372, "step": 80485 }, { "epoch": 0.7575529411764705, "grad_norm": 0.4650823214602519, "learning_rate": 2.2984145488916535e-06, "loss": 0.01486128568649292, "step": 80490 }, { "epoch": 0.7576, "grad_norm": 0.8683814581277497, "learning_rate": 2.298343163128667e-06, "loss": 0.0159808948636055, "step": 80495 }, { "epoch": 0.7576470588235295, "grad_norm": 0.42451183923122693, "learning_rate": 2.2982717840167143e-06, "loss": 0.009992258995771408, "step": 80500 }, { "epoch": 0.7576941176470589, "grad_norm": 0.4750845162696169, "learning_rate": 2.298200411554762e-06, "loss": 0.012314802408218384, "step": 80505 }, { "epoch": 0.7577411764705883, "grad_norm": 0.33365791912117193, "learning_rate": 2.298129045741778e-06, "loss": 0.012899306416511536, "step": 80510 }, { "epoch": 0.7577882352941177, "grad_norm": 0.5234476797610422, "learning_rate": 2.2980576865767296e-06, "loss": 0.012495707720518112, "step": 80515 }, { "epoch": 0.757835294117647, "grad_norm": 0.39217461392873504, "learning_rate": 2.297986334058585e-06, "loss": 0.01666128635406494, "step": 80520 }, { "epoch": 0.7578823529411765, "grad_norm": 0.34886441647801647, "learning_rate": 2.2979149881863127e-06, "loss": 0.01200791448354721, "step": 80525 }, { "epoch": 0.7579294117647059, "grad_norm": 0.5333535642606795, "learning_rate": 2.2978436489588805e-06, "loss": 0.012535062432289124, "step": 80530 }, { "epoch": 0.7579764705882353, "grad_norm": 0.5481458687966902, "learning_rate": 2.297772316375257e-06, "loss": 0.015790218114852907, "step": 80535 }, { "epoch": 0.7580235294117647, "grad_norm": 0.49884731389546755, "learning_rate": 2.297700990434412e-06, "loss": 0.012865278124809264, "step": 80540 }, { "epoch": 0.7580705882352942, "grad_norm": 0.3769376009296489, "learning_rate": 2.297629671135313e-06, "loss": 0.014807066321372986, "step": 80545 }, { "epoch": 0.7581176470588236, "grad_norm": 0.5799117333586454, "learning_rate": 2.2975583584769305e-06, "loss": 0.014960326254367828, "step": 80550 }, { "epoch": 0.758164705882353, "grad_norm": 0.5314972594041784, "learning_rate": 2.297487052458234e-06, "loss": 0.01533818244934082, "step": 80555 }, { "epoch": 0.7582117647058824, "grad_norm": 0.3436630238669049, "learning_rate": 2.2974157530781927e-06, "loss": 0.011056320369243621, "step": 80560 }, { "epoch": 0.7582588235294118, "grad_norm": 0.47169814774136565, "learning_rate": 2.2973444603357765e-06, "loss": 0.012465132027864456, "step": 80565 }, { "epoch": 0.7583058823529412, "grad_norm": 0.6166473180926062, "learning_rate": 2.2972731742299562e-06, "loss": 0.01439913660287857, "step": 80570 }, { "epoch": 0.7583529411764706, "grad_norm": 0.552508040875459, "learning_rate": 2.297201894759702e-06, "loss": 0.01906084269285202, "step": 80575 }, { "epoch": 0.7584, "grad_norm": 0.42096815416127237, "learning_rate": 2.2971306219239834e-06, "loss": 0.01483057290315628, "step": 80580 }, { "epoch": 0.7584470588235294, "grad_norm": 0.5962436403804234, "learning_rate": 2.297059355721773e-06, "loss": 0.014024659991264343, "step": 80585 }, { "epoch": 0.7584941176470589, "grad_norm": 0.5154452344680024, "learning_rate": 2.2969880961520405e-06, "loss": 0.01399986445903778, "step": 80590 }, { "epoch": 0.7585411764705883, "grad_norm": 0.5294285999350825, "learning_rate": 2.2969168432137583e-06, "loss": 0.012400349974632264, "step": 80595 }, { "epoch": 0.7585882352941177, "grad_norm": 0.407311494320797, "learning_rate": 2.296845596905897e-06, "loss": 0.01569734811782837, "step": 80600 }, { "epoch": 0.7586352941176471, "grad_norm": 0.42607882157138127, "learning_rate": 2.2967743572274286e-06, "loss": 0.016706624627113344, "step": 80605 }, { "epoch": 0.7586823529411765, "grad_norm": 0.5739781367958713, "learning_rate": 2.2967031241773254e-06, "loss": 0.015272203087806701, "step": 80610 }, { "epoch": 0.7587294117647059, "grad_norm": 0.3146863605601089, "learning_rate": 2.29663189775456e-06, "loss": 0.01482110321521759, "step": 80615 }, { "epoch": 0.7587764705882353, "grad_norm": 0.6443337782337492, "learning_rate": 2.2965606779581033e-06, "loss": 0.0160816490650177, "step": 80620 }, { "epoch": 0.7588235294117647, "grad_norm": 0.6215574479385254, "learning_rate": 2.2964894647869295e-06, "loss": 0.016907316446304322, "step": 80625 }, { "epoch": 0.7588705882352941, "grad_norm": 0.44472474064901574, "learning_rate": 2.2964182582400103e-06, "loss": 0.022585850954055787, "step": 80630 }, { "epoch": 0.7589176470588235, "grad_norm": 0.2949924457295905, "learning_rate": 2.2963470583163195e-06, "loss": 0.01243421882390976, "step": 80635 }, { "epoch": 0.758964705882353, "grad_norm": 0.42203071261273223, "learning_rate": 2.2962758650148302e-06, "loss": 0.015358361601829528, "step": 80640 }, { "epoch": 0.7590117647058824, "grad_norm": 0.24859207396349284, "learning_rate": 2.296204678334516e-06, "loss": 0.01785271018743515, "step": 80645 }, { "epoch": 0.7590588235294118, "grad_norm": 0.5355749603495753, "learning_rate": 2.2961334982743503e-06, "loss": 0.012675662338733674, "step": 80650 }, { "epoch": 0.7591058823529412, "grad_norm": 0.6199530676513493, "learning_rate": 2.2960623248333077e-06, "loss": 0.014493505656719207, "step": 80655 }, { "epoch": 0.7591529411764706, "grad_norm": 0.6578025091116743, "learning_rate": 2.2959911580103618e-06, "loss": 0.019354087114334107, "step": 80660 }, { "epoch": 0.7592, "grad_norm": 0.4687453882837924, "learning_rate": 2.2959199978044872e-06, "loss": 0.013251194357872009, "step": 80665 }, { "epoch": 0.7592470588235294, "grad_norm": 0.5657201694033803, "learning_rate": 2.295848844214659e-06, "loss": 0.01859217882156372, "step": 80670 }, { "epoch": 0.7592941176470588, "grad_norm": 0.5491889294963497, "learning_rate": 2.2957776972398513e-06, "loss": 0.015447425842285156, "step": 80675 }, { "epoch": 0.7593411764705882, "grad_norm": 0.26785422468662834, "learning_rate": 2.2957065568790393e-06, "loss": 0.012567536532878875, "step": 80680 }, { "epoch": 0.7593882352941177, "grad_norm": 0.3766096964885763, "learning_rate": 2.2956354231311986e-06, "loss": 0.015878629684448243, "step": 80685 }, { "epoch": 0.7594352941176471, "grad_norm": 0.4000693437397788, "learning_rate": 2.2955642959953055e-06, "loss": 0.012541669607162475, "step": 80690 }, { "epoch": 0.7594823529411765, "grad_norm": 0.32308488989467765, "learning_rate": 2.2954931754703343e-06, "loss": 0.01623205542564392, "step": 80695 }, { "epoch": 0.7595294117647059, "grad_norm": 0.2995383565215506, "learning_rate": 2.2954220615552614e-06, "loss": 0.01356196254491806, "step": 80700 }, { "epoch": 0.7595764705882353, "grad_norm": 0.5084246394609813, "learning_rate": 2.2953509542490635e-06, "loss": 0.01815837323665619, "step": 80705 }, { "epoch": 0.7596235294117647, "grad_norm": 0.5235323201224097, "learning_rate": 2.2952798535507166e-06, "loss": 0.021824142336845397, "step": 80710 }, { "epoch": 0.7596705882352941, "grad_norm": 0.4617224884592164, "learning_rate": 2.2952087594591975e-06, "loss": 0.014732359349727631, "step": 80715 }, { "epoch": 0.7597176470588235, "grad_norm": 0.5897433083927243, "learning_rate": 2.2951376719734833e-06, "loss": 0.014371523261070251, "step": 80720 }, { "epoch": 0.7597647058823529, "grad_norm": 0.5149367370551883, "learning_rate": 2.2950665910925505e-06, "loss": 0.015478187799453735, "step": 80725 }, { "epoch": 0.7598117647058823, "grad_norm": 0.4419852227886967, "learning_rate": 2.294995516815377e-06, "loss": 0.015830188989639282, "step": 80730 }, { "epoch": 0.7598588235294118, "grad_norm": 0.653280232496624, "learning_rate": 2.29492444914094e-06, "loss": 0.014922669529914856, "step": 80735 }, { "epoch": 0.7599058823529412, "grad_norm": 0.5577404075500666, "learning_rate": 2.294853388068217e-06, "loss": 0.016428714990615843, "step": 80740 }, { "epoch": 0.7599529411764706, "grad_norm": 0.497547374141145, "learning_rate": 2.294782333596186e-06, "loss": 0.01278868168592453, "step": 80745 }, { "epoch": 0.76, "grad_norm": 0.5931347780960179, "learning_rate": 2.2947112857238265e-06, "loss": 0.013272684812545777, "step": 80750 }, { "epoch": 0.7600470588235294, "grad_norm": 0.6613172910916361, "learning_rate": 2.2946402444501153e-06, "loss": 0.016431754827499388, "step": 80755 }, { "epoch": 0.7600941176470588, "grad_norm": 0.7724858027377388, "learning_rate": 2.2945692097740314e-06, "loss": 0.01908167600631714, "step": 80760 }, { "epoch": 0.7601411764705882, "grad_norm": 0.3756234257721676, "learning_rate": 2.2944981816945538e-06, "loss": 0.011900202929973602, "step": 80765 }, { "epoch": 0.7601882352941176, "grad_norm": 0.4382571780696157, "learning_rate": 2.294427160210662e-06, "loss": 0.017718511819839477, "step": 80770 }, { "epoch": 0.760235294117647, "grad_norm": 0.6135559909286886, "learning_rate": 2.294356145321335e-06, "loss": 0.016174310445785524, "step": 80775 }, { "epoch": 0.7602823529411765, "grad_norm": 0.42469496663798406, "learning_rate": 2.2942851370255516e-06, "loss": 0.015909555554389953, "step": 80780 }, { "epoch": 0.7603294117647059, "grad_norm": 0.5086592406326923, "learning_rate": 2.294214135322293e-06, "loss": 0.01546151638031006, "step": 80785 }, { "epoch": 0.7603764705882353, "grad_norm": 0.5312311689939898, "learning_rate": 2.294143140210538e-06, "loss": 0.013203597068786621, "step": 80790 }, { "epoch": 0.7604235294117647, "grad_norm": 0.2849134021521312, "learning_rate": 2.2940721516892676e-06, "loss": 0.012990143895149232, "step": 80795 }, { "epoch": 0.7604705882352941, "grad_norm": 0.4992872138085724, "learning_rate": 2.294001169757461e-06, "loss": 0.019719195365905762, "step": 80800 }, { "epoch": 0.7605176470588235, "grad_norm": 0.4569552259147143, "learning_rate": 2.2939301944141002e-06, "loss": 0.0145011305809021, "step": 80805 }, { "epoch": 0.7605647058823529, "grad_norm": 0.4166057806041479, "learning_rate": 2.2938592256581653e-06, "loss": 0.01526503562927246, "step": 80810 }, { "epoch": 0.7606117647058823, "grad_norm": 0.3743296946939287, "learning_rate": 2.293788263488637e-06, "loss": 0.018615514039993286, "step": 80815 }, { "epoch": 0.7606588235294117, "grad_norm": 0.6088491336631348, "learning_rate": 2.2937173079044977e-06, "loss": 0.015079283714294433, "step": 80820 }, { "epoch": 0.7607058823529412, "grad_norm": 0.5173776249264909, "learning_rate": 2.293646358904728e-06, "loss": 0.014953052997589112, "step": 80825 }, { "epoch": 0.7607529411764706, "grad_norm": 0.6556879896011772, "learning_rate": 2.29357541648831e-06, "loss": 0.017412908375263214, "step": 80830 }, { "epoch": 0.7608, "grad_norm": 0.3693148065154649, "learning_rate": 2.2935044806542253e-06, "loss": 0.011827271431684494, "step": 80835 }, { "epoch": 0.7608470588235294, "grad_norm": 0.44314190433043577, "learning_rate": 2.293433551401457e-06, "loss": 0.01638369560241699, "step": 80840 }, { "epoch": 0.7608941176470588, "grad_norm": 0.4327922183899118, "learning_rate": 2.2933626287289865e-06, "loss": 0.017806294560432433, "step": 80845 }, { "epoch": 0.7609411764705882, "grad_norm": 0.5237010311859862, "learning_rate": 2.293291712635796e-06, "loss": 0.014173148572444916, "step": 80850 }, { "epoch": 0.7609882352941176, "grad_norm": 0.5789499256389706, "learning_rate": 2.29322080312087e-06, "loss": 0.011822350323200226, "step": 80855 }, { "epoch": 0.761035294117647, "grad_norm": 0.5007037888485592, "learning_rate": 2.2931499001831902e-06, "loss": 0.012809266149997712, "step": 80860 }, { "epoch": 0.7610823529411764, "grad_norm": 0.4975859171191314, "learning_rate": 2.293079003821741e-06, "loss": 0.01417931318283081, "step": 80865 }, { "epoch": 0.7611294117647058, "grad_norm": 0.5418806953690587, "learning_rate": 2.293008114035504e-06, "loss": 0.015919515490531923, "step": 80870 }, { "epoch": 0.7611764705882353, "grad_norm": 0.5367631872505152, "learning_rate": 2.2929372308234645e-06, "loss": 0.01627315580844879, "step": 80875 }, { "epoch": 0.7612235294117647, "grad_norm": 0.47570255314334026, "learning_rate": 2.2928663541846064e-06, "loss": 0.01394532173871994, "step": 80880 }, { "epoch": 0.7612705882352941, "grad_norm": 0.5790272008161418, "learning_rate": 2.292795484117913e-06, "loss": 0.014529670774936675, "step": 80885 }, { "epoch": 0.7613176470588235, "grad_norm": 0.5246971736041113, "learning_rate": 2.292724620622369e-06, "loss": 0.01574077010154724, "step": 80890 }, { "epoch": 0.7613647058823529, "grad_norm": 0.3348591461803125, "learning_rate": 2.2926537636969596e-06, "loss": 0.013697779178619385, "step": 80895 }, { "epoch": 0.7614117647058823, "grad_norm": 0.35524549314939996, "learning_rate": 2.292582913340669e-06, "loss": 0.01452983021736145, "step": 80900 }, { "epoch": 0.7614588235294117, "grad_norm": 0.3279721786786662, "learning_rate": 2.2925120695524824e-06, "loss": 0.010916593670845031, "step": 80905 }, { "epoch": 0.7615058823529411, "grad_norm": 0.6596868951311289, "learning_rate": 2.292441232331385e-06, "loss": 0.015170232951641082, "step": 80910 }, { "epoch": 0.7615529411764705, "grad_norm": 0.6020327861352258, "learning_rate": 2.292370401676362e-06, "loss": 0.016303719580173494, "step": 80915 }, { "epoch": 0.7616, "grad_norm": 0.616640718552985, "learning_rate": 2.2922995775863998e-06, "loss": 0.014315329492092133, "step": 80920 }, { "epoch": 0.7616470588235295, "grad_norm": 0.6313342644930466, "learning_rate": 2.2922287600604836e-06, "loss": 0.013458013534545898, "step": 80925 }, { "epoch": 0.7616941176470589, "grad_norm": 0.4441313574470096, "learning_rate": 2.2921579490975995e-06, "loss": 0.012476097792387009, "step": 80930 }, { "epoch": 0.7617411764705883, "grad_norm": 0.4520694068523988, "learning_rate": 2.2920871446967346e-06, "loss": 0.01639179587364197, "step": 80935 }, { "epoch": 0.7617882352941177, "grad_norm": 0.6267466253382767, "learning_rate": 2.292016346856875e-06, "loss": 0.015138104557991028, "step": 80940 }, { "epoch": 0.761835294117647, "grad_norm": 0.24002078334322577, "learning_rate": 2.291945555577007e-06, "loss": 0.011769062280654908, "step": 80945 }, { "epoch": 0.7618823529411765, "grad_norm": 0.5336246781567767, "learning_rate": 2.291874770856119e-06, "loss": 0.018840116262435914, "step": 80950 }, { "epoch": 0.7619294117647059, "grad_norm": 0.48433488753003845, "learning_rate": 2.2918039926931964e-06, "loss": 0.014396658539772034, "step": 80955 }, { "epoch": 0.7619764705882353, "grad_norm": 0.7463307684369765, "learning_rate": 2.291733221087228e-06, "loss": 0.017086988687515257, "step": 80960 }, { "epoch": 0.7620235294117647, "grad_norm": 0.3058837806622726, "learning_rate": 2.291662456037201e-06, "loss": 0.011137595772743225, "step": 80965 }, { "epoch": 0.7620705882352942, "grad_norm": 0.34019333157243864, "learning_rate": 2.291591697542103e-06, "loss": 0.017583636939525603, "step": 80970 }, { "epoch": 0.7621176470588236, "grad_norm": 0.40940358854406794, "learning_rate": 2.291520945600922e-06, "loss": 0.015959882736206056, "step": 80975 }, { "epoch": 0.762164705882353, "grad_norm": 0.378722928151311, "learning_rate": 2.291450200212648e-06, "loss": 0.013999906182289124, "step": 80980 }, { "epoch": 0.7622117647058824, "grad_norm": 0.32461578804592045, "learning_rate": 2.291379461376267e-06, "loss": 0.016434283554553987, "step": 80985 }, { "epoch": 0.7622588235294118, "grad_norm": 0.40997911777296014, "learning_rate": 2.291308729090769e-06, "loss": 0.013070440292358399, "step": 80990 }, { "epoch": 0.7623058823529412, "grad_norm": 0.4623711457296989, "learning_rate": 2.291238003355144e-06, "loss": 0.011940819025039674, "step": 80995 }, { "epoch": 0.7623529411764706, "grad_norm": 0.45207850087117724, "learning_rate": 2.2911672841683792e-06, "loss": 0.014322367310523988, "step": 81000 }, { "epoch": 0.7624, "grad_norm": 0.503377127106475, "learning_rate": 2.2910965715294654e-06, "loss": 0.014105895161628723, "step": 81005 }, { "epoch": 0.7624470588235294, "grad_norm": 0.3125594207400846, "learning_rate": 2.2910258654373916e-06, "loss": 0.015567117929458618, "step": 81010 }, { "epoch": 0.7624941176470589, "grad_norm": 1.1502313724898607, "learning_rate": 2.2909551658911478e-06, "loss": 0.01454891562461853, "step": 81015 }, { "epoch": 0.7625411764705883, "grad_norm": 0.5387510201025938, "learning_rate": 2.290884472889724e-06, "loss": 0.016148999333381653, "step": 81020 }, { "epoch": 0.7625882352941177, "grad_norm": 0.4792510492213143, "learning_rate": 2.2908137864321113e-06, "loss": 0.014185605943202973, "step": 81025 }, { "epoch": 0.7626352941176471, "grad_norm": 0.525271579949706, "learning_rate": 2.2907431065172985e-06, "loss": 0.01710308939218521, "step": 81030 }, { "epoch": 0.7626823529411765, "grad_norm": 0.492042745324636, "learning_rate": 2.2906724331442774e-06, "loss": 0.021808771789073943, "step": 81035 }, { "epoch": 0.7627294117647059, "grad_norm": 0.49463996772687713, "learning_rate": 2.2906017663120393e-06, "loss": 0.015974593162536622, "step": 81040 }, { "epoch": 0.7627764705882353, "grad_norm": 0.500333321613262, "learning_rate": 2.290531106019575e-06, "loss": 0.01916821748018265, "step": 81045 }, { "epoch": 0.7628235294117647, "grad_norm": 0.36279104839843174, "learning_rate": 2.290460452265875e-06, "loss": 0.01847119629383087, "step": 81050 }, { "epoch": 0.7628705882352941, "grad_norm": 0.5148659908537173, "learning_rate": 2.2903898050499316e-06, "loss": 0.013549013435840607, "step": 81055 }, { "epoch": 0.7629176470588235, "grad_norm": 0.48074312912137246, "learning_rate": 2.2903191643707374e-06, "loss": 0.01642405390739441, "step": 81060 }, { "epoch": 0.762964705882353, "grad_norm": 0.5201138327901185, "learning_rate": 2.2902485302272833e-06, "loss": 0.011613608151674271, "step": 81065 }, { "epoch": 0.7630117647058824, "grad_norm": 0.38150075945195244, "learning_rate": 2.290177902618562e-06, "loss": 0.014772579073905945, "step": 81070 }, { "epoch": 0.7630588235294118, "grad_norm": 0.6424250911093685, "learning_rate": 2.2901072815435655e-06, "loss": 0.014828625321388244, "step": 81075 }, { "epoch": 0.7631058823529412, "grad_norm": 0.47881456667679945, "learning_rate": 2.2900366670012873e-06, "loss": 0.016906234622001647, "step": 81080 }, { "epoch": 0.7631529411764706, "grad_norm": 0.5911398871999408, "learning_rate": 2.289966058990719e-06, "loss": 0.016580033302307128, "step": 81085 }, { "epoch": 0.7632, "grad_norm": 0.32744798067866393, "learning_rate": 2.2898954575108555e-06, "loss": 0.014068660140037537, "step": 81090 }, { "epoch": 0.7632470588235294, "grad_norm": 1.3011441874199676, "learning_rate": 2.2898248625606886e-06, "loss": 0.014792297780513764, "step": 81095 }, { "epoch": 0.7632941176470588, "grad_norm": 0.4637727656436283, "learning_rate": 2.2897542741392124e-06, "loss": 0.014409279823303223, "step": 81100 }, { "epoch": 0.7633411764705882, "grad_norm": 0.581596756961761, "learning_rate": 2.2896836922454206e-06, "loss": 0.017284116148948668, "step": 81105 }, { "epoch": 0.7633882352941177, "grad_norm": 0.606336767028915, "learning_rate": 2.2896131168783077e-06, "loss": 0.016317400336265563, "step": 81110 }, { "epoch": 0.7634352941176471, "grad_norm": 0.8956913724641713, "learning_rate": 2.2895425480368676e-06, "loss": 0.015404380857944489, "step": 81115 }, { "epoch": 0.7634823529411765, "grad_norm": 0.6546010386701456, "learning_rate": 2.289471985720094e-06, "loss": 0.011405295878648757, "step": 81120 }, { "epoch": 0.7635294117647059, "grad_norm": 0.8124104663708863, "learning_rate": 2.289401429926982e-06, "loss": 0.01789730191230774, "step": 81125 }, { "epoch": 0.7635764705882353, "grad_norm": 0.3109033834034378, "learning_rate": 2.289330880656527e-06, "loss": 0.011698608100414277, "step": 81130 }, { "epoch": 0.7636235294117647, "grad_norm": 0.4527222090328258, "learning_rate": 2.289260337907723e-06, "loss": 0.0134023517370224, "step": 81135 }, { "epoch": 0.7636705882352941, "grad_norm": 0.5520870307828505, "learning_rate": 2.2891898016795663e-06, "loss": 0.019122985005378724, "step": 81140 }, { "epoch": 0.7637176470588235, "grad_norm": 0.39326924479504716, "learning_rate": 2.289119271971052e-06, "loss": 0.014143247902393342, "step": 81145 }, { "epoch": 0.7637647058823529, "grad_norm": 0.4730150399901591, "learning_rate": 2.289048748781175e-06, "loss": 0.01651349812746048, "step": 81150 }, { "epoch": 0.7638117647058823, "grad_norm": 0.5129952166795125, "learning_rate": 2.2889782321089334e-06, "loss": 0.016656547784805298, "step": 81155 }, { "epoch": 0.7638588235294118, "grad_norm": 0.4759059188807508, "learning_rate": 2.2889077219533214e-06, "loss": 0.013510864973068238, "step": 81160 }, { "epoch": 0.7639058823529412, "grad_norm": 0.642197184173523, "learning_rate": 2.2888372183133354e-06, "loss": 0.014765413105487823, "step": 81165 }, { "epoch": 0.7639529411764706, "grad_norm": 0.6024555664682705, "learning_rate": 2.2887667211879734e-06, "loss": 0.0171112135052681, "step": 81170 }, { "epoch": 0.764, "grad_norm": 0.5053617251847616, "learning_rate": 2.2886962305762305e-06, "loss": 0.017389202117919923, "step": 81175 }, { "epoch": 0.7640470588235294, "grad_norm": 0.9358662393382091, "learning_rate": 2.288625746477105e-06, "loss": 0.018657854199409483, "step": 81180 }, { "epoch": 0.7640941176470588, "grad_norm": 0.41580579262861267, "learning_rate": 2.2885552688895933e-06, "loss": 0.012483689934015274, "step": 81185 }, { "epoch": 0.7641411764705882, "grad_norm": 0.37332333245745825, "learning_rate": 2.2884847978126934e-06, "loss": 0.016904622316360474, "step": 81190 }, { "epoch": 0.7641882352941176, "grad_norm": 0.555464308138122, "learning_rate": 2.288414333245403e-06, "loss": 0.019704490900039673, "step": 81195 }, { "epoch": 0.764235294117647, "grad_norm": 0.36469934286967176, "learning_rate": 2.288343875186719e-06, "loss": 0.013002325594425202, "step": 81200 }, { "epoch": 0.7642823529411765, "grad_norm": 0.4675375897031912, "learning_rate": 2.288273423635641e-06, "loss": 0.016867658495903014, "step": 81205 }, { "epoch": 0.7643294117647059, "grad_norm": 0.37010033478736837, "learning_rate": 2.2882029785911663e-06, "loss": 0.01676248908042908, "step": 81210 }, { "epoch": 0.7643764705882353, "grad_norm": 0.3488817516129845, "learning_rate": 2.2881325400522933e-06, "loss": 0.012870678305625915, "step": 81215 }, { "epoch": 0.7644235294117647, "grad_norm": 0.32797717252988906, "learning_rate": 2.2880621080180214e-06, "loss": 0.015096084773540496, "step": 81220 }, { "epoch": 0.7644705882352941, "grad_norm": 0.40891769136959283, "learning_rate": 2.2879916824873494e-06, "loss": 0.015463119745254517, "step": 81225 }, { "epoch": 0.7645176470588235, "grad_norm": 0.3814398515772629, "learning_rate": 2.2879212634592764e-06, "loss": 0.01588391661643982, "step": 81230 }, { "epoch": 0.7645647058823529, "grad_norm": 0.36983491625189274, "learning_rate": 2.2878508509328013e-06, "loss": 0.015479058027267456, "step": 81235 }, { "epoch": 0.7646117647058823, "grad_norm": 0.42589357270832556, "learning_rate": 2.2877804449069242e-06, "loss": 0.014075052738189698, "step": 81240 }, { "epoch": 0.7646588235294117, "grad_norm": 0.5168405722593639, "learning_rate": 2.2877100453806452e-06, "loss": 0.013087624311447143, "step": 81245 }, { "epoch": 0.7647058823529411, "grad_norm": 0.627645618357217, "learning_rate": 2.287639652352964e-06, "loss": 0.017538946866989136, "step": 81250 }, { "epoch": 0.7647529411764706, "grad_norm": 0.6322781737000039, "learning_rate": 2.28756926582288e-06, "loss": 0.01750824898481369, "step": 81255 }, { "epoch": 0.7648, "grad_norm": 0.3794637993252974, "learning_rate": 2.2874988857893955e-06, "loss": 0.015218488872051239, "step": 81260 }, { "epoch": 0.7648470588235294, "grad_norm": 0.4738610685211165, "learning_rate": 2.28742851225151e-06, "loss": 0.01858176589012146, "step": 81265 }, { "epoch": 0.7648941176470588, "grad_norm": 0.4547977752229467, "learning_rate": 2.287358145208224e-06, "loss": 0.014484132826328277, "step": 81270 }, { "epoch": 0.7649411764705882, "grad_norm": 0.23836645397523998, "learning_rate": 2.287287784658539e-06, "loss": 0.012167072296142578, "step": 81275 }, { "epoch": 0.7649882352941176, "grad_norm": 0.36536330356786995, "learning_rate": 2.2872174306014576e-06, "loss": 0.012413426488637924, "step": 81280 }, { "epoch": 0.765035294117647, "grad_norm": 0.4690169803583295, "learning_rate": 2.2871470830359797e-06, "loss": 0.019681909680366518, "step": 81285 }, { "epoch": 0.7650823529411764, "grad_norm": 0.33592859995940355, "learning_rate": 2.287076741961107e-06, "loss": 0.013419437408447265, "step": 81290 }, { "epoch": 0.7651294117647058, "grad_norm": 0.4440187940900234, "learning_rate": 2.287006407375843e-06, "loss": 0.01755371391773224, "step": 81295 }, { "epoch": 0.7651764705882353, "grad_norm": 0.5677478751011856, "learning_rate": 2.286936079279188e-06, "loss": 0.011064360290765763, "step": 81300 }, { "epoch": 0.7652235294117647, "grad_norm": 0.39245635405806706, "learning_rate": 2.286865757670146e-06, "loss": 0.013267990946769715, "step": 81305 }, { "epoch": 0.7652705882352941, "grad_norm": 0.5290312728859808, "learning_rate": 2.286795442547719e-06, "loss": 0.016128766536712646, "step": 81310 }, { "epoch": 0.7653176470588235, "grad_norm": 0.6412226956411857, "learning_rate": 2.28672513391091e-06, "loss": 0.017583636939525603, "step": 81315 }, { "epoch": 0.765364705882353, "grad_norm": 0.33361668416481466, "learning_rate": 2.2866548317587215e-06, "loss": 0.013423478603363037, "step": 81320 }, { "epoch": 0.7654117647058823, "grad_norm": 0.589875903305614, "learning_rate": 2.2865845360901566e-06, "loss": 0.018731895089149474, "step": 81325 }, { "epoch": 0.7654588235294117, "grad_norm": 0.5995982115823421, "learning_rate": 2.2865142469042202e-06, "loss": 0.015812024474143982, "step": 81330 }, { "epoch": 0.7655058823529411, "grad_norm": 0.5847534344530727, "learning_rate": 2.2864439641999147e-06, "loss": 0.01999242603778839, "step": 81335 }, { "epoch": 0.7655529411764705, "grad_norm": 0.43868418982166746, "learning_rate": 2.2863736879762438e-06, "loss": 0.014748786389827729, "step": 81340 }, { "epoch": 0.7656, "grad_norm": 0.576935617083489, "learning_rate": 2.2863034182322127e-06, "loss": 0.013873949646949768, "step": 81345 }, { "epoch": 0.7656470588235295, "grad_norm": 0.3959894094110433, "learning_rate": 2.286233154966825e-06, "loss": 0.013632944226264954, "step": 81350 }, { "epoch": 0.7656941176470589, "grad_norm": 0.47706970115116554, "learning_rate": 2.2861628981790857e-06, "loss": 0.01332072913646698, "step": 81355 }, { "epoch": 0.7657411764705883, "grad_norm": 0.45758979452580795, "learning_rate": 2.2860926478679993e-06, "loss": 0.016674989461898805, "step": 81360 }, { "epoch": 0.7657882352941177, "grad_norm": 0.4236636795100681, "learning_rate": 2.2860224040325703e-06, "loss": 0.014972391724586486, "step": 81365 }, { "epoch": 0.7658352941176471, "grad_norm": 0.6553893025573392, "learning_rate": 2.2859521666718053e-06, "loss": 0.018718361854553223, "step": 81370 }, { "epoch": 0.7658823529411765, "grad_norm": 0.4644733250860689, "learning_rate": 2.2858819357847085e-06, "loss": 0.014324025809764862, "step": 81375 }, { "epoch": 0.7659294117647059, "grad_norm": 0.4540657099095291, "learning_rate": 2.2858117113702847e-06, "loss": 0.014493176341056823, "step": 81380 }, { "epoch": 0.7659764705882353, "grad_norm": 0.7167699732171945, "learning_rate": 2.2857414934275416e-06, "loss": 0.012911629676818848, "step": 81385 }, { "epoch": 0.7660235294117647, "grad_norm": 0.29311955065009204, "learning_rate": 2.285671281955485e-06, "loss": 0.011780151724815368, "step": 81390 }, { "epoch": 0.7660705882352942, "grad_norm": 0.2170077588798803, "learning_rate": 2.28560107695312e-06, "loss": 0.011283881962299347, "step": 81395 }, { "epoch": 0.7661176470588236, "grad_norm": 0.45963094147454636, "learning_rate": 2.2855308784194536e-06, "loss": 0.013611458241939545, "step": 81400 }, { "epoch": 0.766164705882353, "grad_norm": 0.7142836239969809, "learning_rate": 2.285460686353493e-06, "loss": 0.01846776604652405, "step": 81405 }, { "epoch": 0.7662117647058824, "grad_norm": 0.6020022324730568, "learning_rate": 2.2853905007542443e-06, "loss": 0.014746296405792236, "step": 81410 }, { "epoch": 0.7662588235294118, "grad_norm": 0.37965836026727484, "learning_rate": 2.285320321620715e-06, "loss": 0.016947074234485625, "step": 81415 }, { "epoch": 0.7663058823529412, "grad_norm": 0.3823477243228205, "learning_rate": 2.2852501489519125e-06, "loss": 0.01205715760588646, "step": 81420 }, { "epoch": 0.7663529411764706, "grad_norm": 0.44238702102441985, "learning_rate": 2.285179982746844e-06, "loss": 0.015183846652507781, "step": 81425 }, { "epoch": 0.7664, "grad_norm": 0.5891109841257712, "learning_rate": 2.2851098230045178e-06, "loss": 0.01541581004858017, "step": 81430 }, { "epoch": 0.7664470588235294, "grad_norm": 0.5942860488122542, "learning_rate": 2.2850396697239412e-06, "loss": 0.014338377118110656, "step": 81435 }, { "epoch": 0.7664941176470588, "grad_norm": 0.4087408482315782, "learning_rate": 2.284969522904123e-06, "loss": 0.013441631197929382, "step": 81440 }, { "epoch": 0.7665411764705883, "grad_norm": 0.4123354179715775, "learning_rate": 2.2848993825440717e-06, "loss": 0.012528228759765624, "step": 81445 }, { "epoch": 0.7665882352941177, "grad_norm": 0.26579851347330247, "learning_rate": 2.2848292486427946e-06, "loss": 0.012746334075927734, "step": 81450 }, { "epoch": 0.7666352941176471, "grad_norm": 0.32074246851928273, "learning_rate": 2.2847591211993017e-06, "loss": 0.012751805782318115, "step": 81455 }, { "epoch": 0.7666823529411765, "grad_norm": 0.43585371808124923, "learning_rate": 2.2846890002126025e-06, "loss": 0.017711733281612397, "step": 81460 }, { "epoch": 0.7667294117647059, "grad_norm": 0.5516541313327988, "learning_rate": 2.2846188856817047e-06, "loss": 0.016511175036430358, "step": 81465 }, { "epoch": 0.7667764705882353, "grad_norm": 0.551811998664075, "learning_rate": 2.2845487776056185e-06, "loss": 0.015085278451442719, "step": 81470 }, { "epoch": 0.7668235294117647, "grad_norm": 0.6365764640770049, "learning_rate": 2.284478675983354e-06, "loss": 0.025182104110717772, "step": 81475 }, { "epoch": 0.7668705882352941, "grad_norm": 0.3862231867063629, "learning_rate": 2.2844085808139206e-06, "loss": 0.014483466744422913, "step": 81480 }, { "epoch": 0.7669176470588235, "grad_norm": 0.6234908367861884, "learning_rate": 2.2843384920963286e-06, "loss": 0.014692583680152893, "step": 81485 }, { "epoch": 0.766964705882353, "grad_norm": 0.4957060585707897, "learning_rate": 2.284268409829588e-06, "loss": 0.017062202095985413, "step": 81490 }, { "epoch": 0.7670117647058824, "grad_norm": 0.5664543886812043, "learning_rate": 2.2841983340127095e-06, "loss": 0.017352980375289918, "step": 81495 }, { "epoch": 0.7670588235294118, "grad_norm": 0.39226474018879703, "learning_rate": 2.2841282646447038e-06, "loss": 0.016327624022960664, "step": 81500 }, { "epoch": 0.7671058823529412, "grad_norm": 0.3506409462547446, "learning_rate": 2.2840582017245822e-06, "loss": 0.014775648713111877, "step": 81505 }, { "epoch": 0.7671529411764706, "grad_norm": 0.4344803571409265, "learning_rate": 2.2839881452513555e-06, "loss": 0.01608889698982239, "step": 81510 }, { "epoch": 0.7672, "grad_norm": 0.45955593118980215, "learning_rate": 2.2839180952240347e-06, "loss": 0.013658283650875092, "step": 81515 }, { "epoch": 0.7672470588235294, "grad_norm": 0.6605075826537825, "learning_rate": 2.283848051641632e-06, "loss": 0.011053608357906341, "step": 81520 }, { "epoch": 0.7672941176470588, "grad_norm": 0.40326928024667263, "learning_rate": 2.2837780145031597e-06, "loss": 0.0136106938123703, "step": 81525 }, { "epoch": 0.7673411764705882, "grad_norm": 0.5689733638430327, "learning_rate": 2.2837079838076282e-06, "loss": 0.014877021312713623, "step": 81530 }, { "epoch": 0.7673882352941176, "grad_norm": 0.4658837192384062, "learning_rate": 2.283637959554051e-06, "loss": 0.01568332314491272, "step": 81535 }, { "epoch": 0.7674352941176471, "grad_norm": 0.4911197583396102, "learning_rate": 2.28356794174144e-06, "loss": 0.013792288303375245, "step": 81540 }, { "epoch": 0.7674823529411765, "grad_norm": 0.4968491670365995, "learning_rate": 2.2834979303688076e-06, "loss": 0.01314491182565689, "step": 81545 }, { "epoch": 0.7675294117647059, "grad_norm": 0.6052085154916823, "learning_rate": 2.283427925435167e-06, "loss": 0.019699564576148985, "step": 81550 }, { "epoch": 0.7675764705882353, "grad_norm": 0.45734767609887694, "learning_rate": 2.283357926939532e-06, "loss": 0.016204389929771423, "step": 81555 }, { "epoch": 0.7676235294117647, "grad_norm": 0.4518700177695991, "learning_rate": 2.2832879348809146e-06, "loss": 0.015309962630271911, "step": 81560 }, { "epoch": 0.7676705882352941, "grad_norm": 0.623392509066431, "learning_rate": 2.2832179492583287e-06, "loss": 0.015114840865135194, "step": 81565 }, { "epoch": 0.7677176470588235, "grad_norm": 0.5792470525243311, "learning_rate": 2.2831479700707886e-06, "loss": 0.014904564619064331, "step": 81570 }, { "epoch": 0.7677647058823529, "grad_norm": 0.4848585413149217, "learning_rate": 2.2830779973173078e-06, "loss": 0.01338648349046707, "step": 81575 }, { "epoch": 0.7678117647058823, "grad_norm": 1.0724751505461578, "learning_rate": 2.2830080309969003e-06, "loss": 0.024484238028526305, "step": 81580 }, { "epoch": 0.7678588235294118, "grad_norm": 0.44257634876651725, "learning_rate": 2.2829380711085806e-06, "loss": 0.016390001773834227, "step": 81585 }, { "epoch": 0.7679058823529412, "grad_norm": 0.3508876720446106, "learning_rate": 2.2828681176513625e-06, "loss": 0.012548789381980896, "step": 81590 }, { "epoch": 0.7679529411764706, "grad_norm": 0.6318922473112946, "learning_rate": 2.2827981706242623e-06, "loss": 0.013755452632904053, "step": 81595 }, { "epoch": 0.768, "grad_norm": 0.6411247073571827, "learning_rate": 2.282728230026293e-06, "loss": 0.013891029357910156, "step": 81600 }, { "epoch": 0.7680470588235294, "grad_norm": 0.5503947981682044, "learning_rate": 2.2826582958564716e-06, "loss": 0.015044143795967102, "step": 81605 }, { "epoch": 0.7680941176470588, "grad_norm": 0.35658995472712873, "learning_rate": 2.282588368113813e-06, "loss": 0.01477394700050354, "step": 81610 }, { "epoch": 0.7681411764705882, "grad_norm": 0.4320150726023122, "learning_rate": 2.282518446797332e-06, "loss": 0.01785276234149933, "step": 81615 }, { "epoch": 0.7681882352941176, "grad_norm": 0.4004646949996088, "learning_rate": 2.282448531906045e-06, "loss": 0.013516247272491455, "step": 81620 }, { "epoch": 0.768235294117647, "grad_norm": 0.41740892131751645, "learning_rate": 2.2823786234389676e-06, "loss": 0.012337884306907654, "step": 81625 }, { "epoch": 0.7682823529411764, "grad_norm": 0.5418446149469321, "learning_rate": 2.282308721395117e-06, "loss": 0.013698002696037293, "step": 81630 }, { "epoch": 0.7683294117647059, "grad_norm": 0.5668004119808158, "learning_rate": 2.2822388257735093e-06, "loss": 0.015348580479621888, "step": 81635 }, { "epoch": 0.7683764705882353, "grad_norm": 0.3689760018053639, "learning_rate": 2.2821689365731604e-06, "loss": 0.0158807635307312, "step": 81640 }, { "epoch": 0.7684235294117647, "grad_norm": 0.5740542192475686, "learning_rate": 2.282099053793088e-06, "loss": 0.016976362466812132, "step": 81645 }, { "epoch": 0.7684705882352941, "grad_norm": 0.5053703408172162, "learning_rate": 2.2820291774323085e-06, "loss": 0.015065804123878479, "step": 81650 }, { "epoch": 0.7685176470588235, "grad_norm": 0.46625962932608106, "learning_rate": 2.28195930748984e-06, "loss": 0.016387033462524413, "step": 81655 }, { "epoch": 0.7685647058823529, "grad_norm": 0.40168880483165065, "learning_rate": 2.281889443964699e-06, "loss": 0.020943735539913178, "step": 81660 }, { "epoch": 0.7686117647058823, "grad_norm": 0.6427511922929674, "learning_rate": 2.281819586855904e-06, "loss": 0.018187156319618224, "step": 81665 }, { "epoch": 0.7686588235294117, "grad_norm": 0.5885819000597134, "learning_rate": 2.2817497361624725e-06, "loss": 0.014188703894615174, "step": 81670 }, { "epoch": 0.7687058823529411, "grad_norm": 0.4047915588433722, "learning_rate": 2.2816798918834233e-06, "loss": 0.01742728054523468, "step": 81675 }, { "epoch": 0.7687529411764706, "grad_norm": 0.4518717115506205, "learning_rate": 2.2816100540177743e-06, "loss": 0.014782445132732391, "step": 81680 }, { "epoch": 0.7688, "grad_norm": 0.687400753169558, "learning_rate": 2.281540222564543e-06, "loss": 0.01656412035226822, "step": 81685 }, { "epoch": 0.7688470588235294, "grad_norm": 0.4682841050261574, "learning_rate": 2.28147039752275e-06, "loss": 0.01609833836555481, "step": 81690 }, { "epoch": 0.7688941176470588, "grad_norm": 0.29209689853741583, "learning_rate": 2.281400578891413e-06, "loss": 0.01329438090324402, "step": 81695 }, { "epoch": 0.7689411764705882, "grad_norm": 0.5098304940274548, "learning_rate": 2.2813307666695517e-06, "loss": 0.01208624616265297, "step": 81700 }, { "epoch": 0.7689882352941176, "grad_norm": 0.7440380656434427, "learning_rate": 2.2812609608561855e-06, "loss": 0.015574142336845398, "step": 81705 }, { "epoch": 0.769035294117647, "grad_norm": 0.5702903841517661, "learning_rate": 2.281191161450334e-06, "loss": 0.014145588874816895, "step": 81710 }, { "epoch": 0.7690823529411764, "grad_norm": 0.7455623667110599, "learning_rate": 2.2811213684510167e-06, "loss": 0.013637804985046386, "step": 81715 }, { "epoch": 0.7691294117647058, "grad_norm": 0.5638842354811706, "learning_rate": 2.2810515818572537e-06, "loss": 0.01817850172519684, "step": 81720 }, { "epoch": 0.7691764705882352, "grad_norm": 0.6507360722648182, "learning_rate": 2.2809818016680653e-06, "loss": 0.014639931917190551, "step": 81725 }, { "epoch": 0.7692235294117648, "grad_norm": 1.002362889361704, "learning_rate": 2.2809120278824725e-06, "loss": 0.014823856949806213, "step": 81730 }, { "epoch": 0.7692705882352942, "grad_norm": 0.3499950752594395, "learning_rate": 2.280842260499495e-06, "loss": 0.011343605816364288, "step": 81735 }, { "epoch": 0.7693176470588236, "grad_norm": 0.40756809371658664, "learning_rate": 2.2807724995181543e-06, "loss": 0.012567096948623657, "step": 81740 }, { "epoch": 0.769364705882353, "grad_norm": 0.44515886050994374, "learning_rate": 2.2807027449374713e-06, "loss": 0.016523724794387816, "step": 81745 }, { "epoch": 0.7694117647058824, "grad_norm": 0.5115917945637153, "learning_rate": 2.2806329967564674e-06, "loss": 0.013175253570079804, "step": 81750 }, { "epoch": 0.7694588235294118, "grad_norm": 0.32612404704588216, "learning_rate": 2.2805632549741643e-06, "loss": 0.013110017776489258, "step": 81755 }, { "epoch": 0.7695058823529412, "grad_norm": 0.3906125337758128, "learning_rate": 2.2804935195895823e-06, "loss": 0.012835356593132018, "step": 81760 }, { "epoch": 0.7695529411764706, "grad_norm": 0.8147956747041437, "learning_rate": 2.2804237906017444e-06, "loss": 0.019891974329948426, "step": 81765 }, { "epoch": 0.7696, "grad_norm": 0.42843738507237034, "learning_rate": 2.2803540680096736e-06, "loss": 0.01942625343799591, "step": 81770 }, { "epoch": 0.7696470588235295, "grad_norm": 0.41082150266929235, "learning_rate": 2.2802843518123902e-06, "loss": 0.015015725791454316, "step": 81775 }, { "epoch": 0.7696941176470589, "grad_norm": 0.6050458185698379, "learning_rate": 2.280214642008919e-06, "loss": 0.01623207926750183, "step": 81780 }, { "epoch": 0.7697411764705883, "grad_norm": 0.5129423223222429, "learning_rate": 2.2801449385982807e-06, "loss": 0.015718239545822143, "step": 81785 }, { "epoch": 0.7697882352941177, "grad_norm": 0.7300621986838948, "learning_rate": 2.2800752415794996e-06, "loss": 0.017510583996772765, "step": 81790 }, { "epoch": 0.7698352941176471, "grad_norm": 0.8676752276628816, "learning_rate": 2.280005550951598e-06, "loss": 0.016200438141822815, "step": 81795 }, { "epoch": 0.7698823529411765, "grad_norm": 0.4696356104352799, "learning_rate": 2.2799358667136005e-06, "loss": 0.015038782358169555, "step": 81800 }, { "epoch": 0.7699294117647059, "grad_norm": 0.3761883913458274, "learning_rate": 2.279866188864529e-06, "loss": 0.016558563709259032, "step": 81805 }, { "epoch": 0.7699764705882353, "grad_norm": 0.571872615323255, "learning_rate": 2.2797965174034085e-06, "loss": 0.01641874611377716, "step": 81810 }, { "epoch": 0.7700235294117647, "grad_norm": 0.5194792951874136, "learning_rate": 2.2797268523292624e-06, "loss": 0.012007507681846618, "step": 81815 }, { "epoch": 0.7700705882352941, "grad_norm": 0.6388084313616563, "learning_rate": 2.279657193641115e-06, "loss": 0.01330522894859314, "step": 81820 }, { "epoch": 0.7701176470588236, "grad_norm": 0.4008909170746848, "learning_rate": 2.2795875413379913e-06, "loss": 0.014013896882534026, "step": 81825 }, { "epoch": 0.770164705882353, "grad_norm": 0.5113451750808278, "learning_rate": 2.279517895418915e-06, "loss": 0.01586799919605255, "step": 81830 }, { "epoch": 0.7702117647058824, "grad_norm": 0.5641909402892316, "learning_rate": 2.2794482558829115e-06, "loss": 0.01614478826522827, "step": 81835 }, { "epoch": 0.7702588235294118, "grad_norm": 0.5577063618470124, "learning_rate": 2.279378622729006e-06, "loss": 0.015876249969005586, "step": 81840 }, { "epoch": 0.7703058823529412, "grad_norm": 0.34124680310105737, "learning_rate": 2.2793089959562236e-06, "loss": 0.011897116899490356, "step": 81845 }, { "epoch": 0.7703529411764706, "grad_norm": 0.4658014995087131, "learning_rate": 2.27923937556359e-06, "loss": 0.014909675717353821, "step": 81850 }, { "epoch": 0.7704, "grad_norm": 0.7321414080206595, "learning_rate": 2.2791697615501297e-06, "loss": 0.016278399527072905, "step": 81855 }, { "epoch": 0.7704470588235294, "grad_norm": 0.329671170428005, "learning_rate": 2.2791001539148692e-06, "loss": 0.012905049324035644, "step": 81860 }, { "epoch": 0.7704941176470588, "grad_norm": 0.9438357936594807, "learning_rate": 2.2790305526568355e-06, "loss": 0.01587417870759964, "step": 81865 }, { "epoch": 0.7705411764705883, "grad_norm": 0.6714887206955269, "learning_rate": 2.278960957775054e-06, "loss": 0.01306394636631012, "step": 81870 }, { "epoch": 0.7705882352941177, "grad_norm": 0.5759181366424818, "learning_rate": 2.2788913692685516e-06, "loss": 0.018143674731254576, "step": 81875 }, { "epoch": 0.7706352941176471, "grad_norm": 0.5453370997895689, "learning_rate": 2.2788217871363546e-06, "loss": 0.014881381392478943, "step": 81880 }, { "epoch": 0.7706823529411765, "grad_norm": 0.45827768828524135, "learning_rate": 2.27875221137749e-06, "loss": 0.016788360476493836, "step": 81885 }, { "epoch": 0.7707294117647059, "grad_norm": 0.4310211185705401, "learning_rate": 2.278682641990985e-06, "loss": 0.018383944034576417, "step": 81890 }, { "epoch": 0.7707764705882353, "grad_norm": 0.6585025086013009, "learning_rate": 2.278613078975867e-06, "loss": 0.01446879506111145, "step": 81895 }, { "epoch": 0.7708235294117647, "grad_norm": 0.3796906618662447, "learning_rate": 2.278543522331163e-06, "loss": 0.015151235461235046, "step": 81900 }, { "epoch": 0.7708705882352941, "grad_norm": 0.5089960488304938, "learning_rate": 2.278473972055902e-06, "loss": 0.015016943216323853, "step": 81905 }, { "epoch": 0.7709176470588235, "grad_norm": 0.49433423288556255, "learning_rate": 2.278404428149111e-06, "loss": 0.01803426295518875, "step": 81910 }, { "epoch": 0.770964705882353, "grad_norm": 0.3567623191922325, "learning_rate": 2.2783348906098184e-06, "loss": 0.015686817467212677, "step": 81915 }, { "epoch": 0.7710117647058824, "grad_norm": 0.5601552119053076, "learning_rate": 2.2782653594370526e-06, "loss": 0.024380892515182495, "step": 81920 }, { "epoch": 0.7710588235294118, "grad_norm": 0.5221910216451531, "learning_rate": 2.278195834629842e-06, "loss": 0.01647413372993469, "step": 81925 }, { "epoch": 0.7711058823529412, "grad_norm": 0.6952544733563656, "learning_rate": 2.278126316187215e-06, "loss": 0.01640598475933075, "step": 81930 }, { "epoch": 0.7711529411764706, "grad_norm": 0.478368273311954, "learning_rate": 2.2780568041082015e-06, "loss": 0.014893095195293426, "step": 81935 }, { "epoch": 0.7712, "grad_norm": 0.536351553881591, "learning_rate": 2.27798729839183e-06, "loss": 0.01578054130077362, "step": 81940 }, { "epoch": 0.7712470588235294, "grad_norm": 0.6612887384553205, "learning_rate": 2.2779177990371307e-06, "loss": 0.014635145664215088, "step": 81945 }, { "epoch": 0.7712941176470588, "grad_norm": 0.35638619819958306, "learning_rate": 2.2778483060431323e-06, "loss": 0.011780835688114166, "step": 81950 }, { "epoch": 0.7713411764705882, "grad_norm": 0.530716484471057, "learning_rate": 2.2777788194088653e-06, "loss": 0.017256897687911988, "step": 81955 }, { "epoch": 0.7713882352941176, "grad_norm": 2.1673592734481044, "learning_rate": 2.2777093391333593e-06, "loss": 0.016900938749313355, "step": 81960 }, { "epoch": 0.7714352941176471, "grad_norm": 0.4007593202599856, "learning_rate": 2.277639865215645e-06, "loss": 0.016685059666633605, "step": 81965 }, { "epoch": 0.7714823529411765, "grad_norm": 0.40777583460641065, "learning_rate": 2.2775703976547523e-06, "loss": 0.012045939266681672, "step": 81970 }, { "epoch": 0.7715294117647059, "grad_norm": 0.3104148213971777, "learning_rate": 2.277500936449712e-06, "loss": 0.012001663446426392, "step": 81975 }, { "epoch": 0.7715764705882353, "grad_norm": 0.6169966723503881, "learning_rate": 2.2774314815995555e-06, "loss": 0.010975942015647888, "step": 81980 }, { "epoch": 0.7716235294117647, "grad_norm": 0.3943614123997249, "learning_rate": 2.2773620331033124e-06, "loss": 0.01718711405992508, "step": 81985 }, { "epoch": 0.7716705882352941, "grad_norm": 0.4355366659170938, "learning_rate": 2.277292590960016e-06, "loss": 0.015152890980243684, "step": 81990 }, { "epoch": 0.7717176470588235, "grad_norm": 0.6311948838735304, "learning_rate": 2.277223155168696e-06, "loss": 0.018161417543888093, "step": 81995 }, { "epoch": 0.7717647058823529, "grad_norm": 0.45104344063103, "learning_rate": 2.2771537257283854e-06, "loss": 0.015698114037513734, "step": 82000 }, { "epoch": 0.7718117647058823, "grad_norm": 0.4698096726348246, "learning_rate": 2.2770843026381154e-06, "loss": 0.015021038055419923, "step": 82005 }, { "epoch": 0.7718588235294118, "grad_norm": 0.5698313200115431, "learning_rate": 2.277014885896918e-06, "loss": 0.02463441789150238, "step": 82010 }, { "epoch": 0.7719058823529412, "grad_norm": 0.5899213885701436, "learning_rate": 2.276945475503826e-06, "loss": 0.01697799563407898, "step": 82015 }, { "epoch": 0.7719529411764706, "grad_norm": 0.5267791661459295, "learning_rate": 2.2768760714578716e-06, "loss": 0.018189631402492523, "step": 82020 }, { "epoch": 0.772, "grad_norm": 0.4370318510265888, "learning_rate": 2.276806673758087e-06, "loss": 0.016877952218055724, "step": 82025 }, { "epoch": 0.7720470588235294, "grad_norm": 0.5399018965511709, "learning_rate": 2.2767372824035067e-06, "loss": 0.016306591033935548, "step": 82030 }, { "epoch": 0.7720941176470588, "grad_norm": 0.6084046831695817, "learning_rate": 2.2766678973931623e-06, "loss": 0.015091034770011901, "step": 82035 }, { "epoch": 0.7721411764705882, "grad_norm": 0.3981162579171577, "learning_rate": 2.276598518726087e-06, "loss": 0.01795065701007843, "step": 82040 }, { "epoch": 0.7721882352941176, "grad_norm": 0.40256954012653945, "learning_rate": 2.2765291464013164e-06, "loss": 0.014111629128456116, "step": 82045 }, { "epoch": 0.772235294117647, "grad_norm": 0.7248903443845728, "learning_rate": 2.276459780417882e-06, "loss": 0.016819149255752563, "step": 82050 }, { "epoch": 0.7722823529411764, "grad_norm": 0.45343434083585493, "learning_rate": 2.276390420774818e-06, "loss": 0.01746070683002472, "step": 82055 }, { "epoch": 0.7723294117647059, "grad_norm": 0.4566190481521477, "learning_rate": 2.2763210674711604e-06, "loss": 0.013374470174312592, "step": 82060 }, { "epoch": 0.7723764705882353, "grad_norm": 0.3592836286462983, "learning_rate": 2.2762517205059416e-06, "loss": 0.013974374532699585, "step": 82065 }, { "epoch": 0.7724235294117647, "grad_norm": 0.3939298070989909, "learning_rate": 2.276182379878197e-06, "loss": 0.01509021818637848, "step": 82070 }, { "epoch": 0.7724705882352941, "grad_norm": 0.4239264367106519, "learning_rate": 2.2761130455869615e-06, "loss": 0.01873636245727539, "step": 82075 }, { "epoch": 0.7725176470588235, "grad_norm": 0.418872773992709, "learning_rate": 2.2760437176312695e-06, "loss": 0.01262291669845581, "step": 82080 }, { "epoch": 0.7725647058823529, "grad_norm": 0.6295727976360052, "learning_rate": 2.2759743960101565e-06, "loss": 0.014680179953575134, "step": 82085 }, { "epoch": 0.7726117647058823, "grad_norm": 0.5035969152630535, "learning_rate": 2.2759050807226583e-06, "loss": 0.012575934827327728, "step": 82090 }, { "epoch": 0.7726588235294117, "grad_norm": 0.6974884906495347, "learning_rate": 2.27583577176781e-06, "loss": 0.012485899031162262, "step": 82095 }, { "epoch": 0.7727058823529411, "grad_norm": 0.5704977704597676, "learning_rate": 2.2757664691446475e-06, "loss": 0.01672501564025879, "step": 82100 }, { "epoch": 0.7727529411764706, "grad_norm": 0.4532817994705222, "learning_rate": 2.2756971728522066e-06, "loss": 0.015404506027698517, "step": 82105 }, { "epoch": 0.7728, "grad_norm": 0.6691185483150303, "learning_rate": 2.2756278828895244e-06, "loss": 0.01469811499118805, "step": 82110 }, { "epoch": 0.7728470588235294, "grad_norm": 0.3247618418010437, "learning_rate": 2.275558599255636e-06, "loss": 0.015890702605247498, "step": 82115 }, { "epoch": 0.7728941176470588, "grad_norm": 0.4568497875581984, "learning_rate": 2.275489321949579e-06, "loss": 0.015885236859321594, "step": 82120 }, { "epoch": 0.7729411764705882, "grad_norm": 0.7520949197453836, "learning_rate": 2.27542005097039e-06, "loss": 0.01718648076057434, "step": 82125 }, { "epoch": 0.7729882352941176, "grad_norm": 0.5419846942162079, "learning_rate": 2.2753507863171065e-06, "loss": 0.014490054547786712, "step": 82130 }, { "epoch": 0.773035294117647, "grad_norm": 0.42517110975034933, "learning_rate": 2.2752815279887645e-06, "loss": 0.014680303633213043, "step": 82135 }, { "epoch": 0.7730823529411764, "grad_norm": 0.7749304575097717, "learning_rate": 2.275212275984402e-06, "loss": 0.018200090527534483, "step": 82140 }, { "epoch": 0.7731294117647058, "grad_norm": 0.5636131762143172, "learning_rate": 2.275143030303058e-06, "loss": 0.0176794171333313, "step": 82145 }, { "epoch": 0.7731764705882352, "grad_norm": 0.5593398972502915, "learning_rate": 2.275073790943769e-06, "loss": 0.011715862900018692, "step": 82150 }, { "epoch": 0.7732235294117648, "grad_norm": 0.4737866599287606, "learning_rate": 2.275004557905572e-06, "loss": 0.016309718787670135, "step": 82155 }, { "epoch": 0.7732705882352942, "grad_norm": 0.393481987396268, "learning_rate": 2.2749353311875072e-06, "loss": 0.01248793676495552, "step": 82160 }, { "epoch": 0.7733176470588236, "grad_norm": 0.5778395460994312, "learning_rate": 2.2748661107886124e-06, "loss": 0.014165592193603516, "step": 82165 }, { "epoch": 0.773364705882353, "grad_norm": 0.4965985673330174, "learning_rate": 2.274796896707927e-06, "loss": 0.013058091700077056, "step": 82170 }, { "epoch": 0.7734117647058824, "grad_norm": 0.4461591515597521, "learning_rate": 2.2747276889444884e-06, "loss": 0.0169424369931221, "step": 82175 }, { "epoch": 0.7734588235294118, "grad_norm": 0.6091994088131233, "learning_rate": 2.2746584874973365e-06, "loss": 0.011260069906711578, "step": 82180 }, { "epoch": 0.7735058823529412, "grad_norm": 0.5366588793757539, "learning_rate": 2.2745892923655107e-06, "loss": 0.01633780300617218, "step": 82185 }, { "epoch": 0.7735529411764706, "grad_norm": 0.38511823587342525, "learning_rate": 2.2745201035480504e-06, "loss": 0.01112213134765625, "step": 82190 }, { "epoch": 0.7736, "grad_norm": 0.3682736334872447, "learning_rate": 2.274450921043995e-06, "loss": 0.022550797462463378, "step": 82195 }, { "epoch": 0.7736470588235295, "grad_norm": 0.39166260593954333, "learning_rate": 2.274381744852385e-06, "loss": 0.013404764235019684, "step": 82200 }, { "epoch": 0.7736941176470589, "grad_norm": 0.4711821706886015, "learning_rate": 2.27431257497226e-06, "loss": 0.01100437119603157, "step": 82205 }, { "epoch": 0.7737411764705883, "grad_norm": 0.3789103683769093, "learning_rate": 2.2742434114026606e-06, "loss": 0.013858462870121002, "step": 82210 }, { "epoch": 0.7737882352941177, "grad_norm": 0.44842220169656627, "learning_rate": 2.274174254142627e-06, "loss": 0.014188045263290405, "step": 82215 }, { "epoch": 0.7738352941176471, "grad_norm": 0.41912013810888993, "learning_rate": 2.2741051031912003e-06, "loss": 0.01745734512805939, "step": 82220 }, { "epoch": 0.7738823529411765, "grad_norm": 0.5551896236657919, "learning_rate": 2.274035958547421e-06, "loss": 0.014027485251426696, "step": 82225 }, { "epoch": 0.7739294117647059, "grad_norm": 0.7146516988371787, "learning_rate": 2.2739668202103306e-06, "loss": 0.017083173990249632, "step": 82230 }, { "epoch": 0.7739764705882353, "grad_norm": 0.5276169245041881, "learning_rate": 2.27389768817897e-06, "loss": 0.0158852219581604, "step": 82235 }, { "epoch": 0.7740235294117647, "grad_norm": 0.39265987793933704, "learning_rate": 2.2738285624523814e-06, "loss": 0.010471229255199433, "step": 82240 }, { "epoch": 0.7740705882352941, "grad_norm": 0.4701988730106008, "learning_rate": 2.2737594430296064e-06, "loss": 0.0154347226023674, "step": 82245 }, { "epoch": 0.7741176470588236, "grad_norm": 0.49433004732931873, "learning_rate": 2.273690329909686e-06, "loss": 0.01424117386341095, "step": 82250 }, { "epoch": 0.774164705882353, "grad_norm": 0.39790194382359906, "learning_rate": 2.273621223091664e-06, "loss": 0.011506050825119019, "step": 82255 }, { "epoch": 0.7742117647058824, "grad_norm": 0.4963111671092845, "learning_rate": 2.2735521225745817e-06, "loss": 0.012751838564872742, "step": 82260 }, { "epoch": 0.7742588235294118, "grad_norm": 0.4005764331035659, "learning_rate": 2.2734830283574815e-06, "loss": 0.013704365491867066, "step": 82265 }, { "epoch": 0.7743058823529412, "grad_norm": 0.5037252770013401, "learning_rate": 2.2734139404394067e-06, "loss": 0.01431901752948761, "step": 82270 }, { "epoch": 0.7743529411764706, "grad_norm": 0.24779741802727265, "learning_rate": 2.2733448588194e-06, "loss": 0.014664950966835021, "step": 82275 }, { "epoch": 0.7744, "grad_norm": 0.28841803114075437, "learning_rate": 2.2732757834965043e-06, "loss": 0.011885316669940948, "step": 82280 }, { "epoch": 0.7744470588235294, "grad_norm": 0.6931468822899071, "learning_rate": 2.2732067144697637e-06, "loss": 0.016670167446136475, "step": 82285 }, { "epoch": 0.7744941176470588, "grad_norm": 0.33945757751676325, "learning_rate": 2.2731376517382216e-06, "loss": 0.0124330073595047, "step": 82290 }, { "epoch": 0.7745411764705883, "grad_norm": 0.3200888393301454, "learning_rate": 2.2730685953009207e-06, "loss": 0.01109597533941269, "step": 82295 }, { "epoch": 0.7745882352941177, "grad_norm": 0.4882210414131534, "learning_rate": 2.2729995451569065e-06, "loss": 0.014091843366622924, "step": 82300 }, { "epoch": 0.7746352941176471, "grad_norm": 0.6303324026102195, "learning_rate": 2.272930501305222e-06, "loss": 0.018728940188884734, "step": 82305 }, { "epoch": 0.7746823529411765, "grad_norm": 0.40272972240208044, "learning_rate": 2.2728614637449125e-06, "loss": 0.014647594094276429, "step": 82310 }, { "epoch": 0.7747294117647059, "grad_norm": 0.3675714490232888, "learning_rate": 2.272792432475022e-06, "loss": 0.012002348899841309, "step": 82315 }, { "epoch": 0.7747764705882353, "grad_norm": 0.45800843249593404, "learning_rate": 2.2727234074945963e-06, "loss": 0.01951904594898224, "step": 82320 }, { "epoch": 0.7748235294117647, "grad_norm": 0.4722290503309804, "learning_rate": 2.2726543888026785e-06, "loss": 0.014169590175151825, "step": 82325 }, { "epoch": 0.7748705882352941, "grad_norm": 0.5113202955065704, "learning_rate": 2.272585376398315e-06, "loss": 0.01502135992050171, "step": 82330 }, { "epoch": 0.7749176470588235, "grad_norm": 0.5293478036531943, "learning_rate": 2.2725163702805514e-06, "loss": 0.01329108327627182, "step": 82335 }, { "epoch": 0.7749647058823529, "grad_norm": 0.4895273017091548, "learning_rate": 2.2724473704484334e-06, "loss": 0.016207939386367796, "step": 82340 }, { "epoch": 0.7750117647058824, "grad_norm": 0.4103829091380923, "learning_rate": 2.2723783769010054e-06, "loss": 0.01466713547706604, "step": 82345 }, { "epoch": 0.7750588235294118, "grad_norm": 0.7036977503945641, "learning_rate": 2.272309389637315e-06, "loss": 0.02061464488506317, "step": 82350 }, { "epoch": 0.7751058823529412, "grad_norm": 0.3188941642300644, "learning_rate": 2.272240408656408e-06, "loss": 0.01363469809293747, "step": 82355 }, { "epoch": 0.7751529411764706, "grad_norm": 0.542899161741682, "learning_rate": 2.2721714339573302e-06, "loss": 0.014181782305240632, "step": 82360 }, { "epoch": 0.7752, "grad_norm": 0.37380446968551406, "learning_rate": 2.272102465539129e-06, "loss": 0.01492893397808075, "step": 82365 }, { "epoch": 0.7752470588235294, "grad_norm": 0.4346494170609753, "learning_rate": 2.2720335034008512e-06, "loss": 0.014000701904296874, "step": 82370 }, { "epoch": 0.7752941176470588, "grad_norm": 0.5853060163592522, "learning_rate": 2.2719645475415436e-06, "loss": 0.011864306777715683, "step": 82375 }, { "epoch": 0.7753411764705882, "grad_norm": 0.5382189761602653, "learning_rate": 2.2718955979602527e-06, "loss": 0.017268443107604982, "step": 82380 }, { "epoch": 0.7753882352941176, "grad_norm": 0.493052008532085, "learning_rate": 2.2718266546560265e-06, "loss": 0.023009198904037475, "step": 82385 }, { "epoch": 0.7754352941176471, "grad_norm": 0.48481705667131575, "learning_rate": 2.2717577176279134e-06, "loss": 0.016186654567718506, "step": 82390 }, { "epoch": 0.7754823529411765, "grad_norm": 0.3557721862884742, "learning_rate": 2.27168878687496e-06, "loss": 0.011143562197685242, "step": 82395 }, { "epoch": 0.7755294117647059, "grad_norm": 0.4294666590143717, "learning_rate": 2.2716198623962156e-06, "loss": 0.015894851088523863, "step": 82400 }, { "epoch": 0.7755764705882353, "grad_norm": 0.7346126761822783, "learning_rate": 2.271550944190727e-06, "loss": 0.01536129117012024, "step": 82405 }, { "epoch": 0.7756235294117647, "grad_norm": 0.6102532957058252, "learning_rate": 2.271482032257544e-06, "loss": 0.015288141369819642, "step": 82410 }, { "epoch": 0.7756705882352941, "grad_norm": 0.332171857330329, "learning_rate": 2.2714131265957144e-06, "loss": 0.020269522070884706, "step": 82415 }, { "epoch": 0.7757176470588235, "grad_norm": 0.7756368365964396, "learning_rate": 2.271344227204287e-06, "loss": 0.016181723773479463, "step": 82420 }, { "epoch": 0.7757647058823529, "grad_norm": 0.6806095379798218, "learning_rate": 2.271275334082312e-06, "loss": 0.012654779851436615, "step": 82425 }, { "epoch": 0.7758117647058823, "grad_norm": 0.43218576621356014, "learning_rate": 2.2712064472288367e-06, "loss": 0.016910526156425475, "step": 82430 }, { "epoch": 0.7758588235294117, "grad_norm": 0.5692759387497669, "learning_rate": 2.271137566642912e-06, "loss": 0.015007439255714416, "step": 82435 }, { "epoch": 0.7759058823529412, "grad_norm": 0.5169865473906048, "learning_rate": 2.2710686923235874e-06, "loss": 0.021829411387443542, "step": 82440 }, { "epoch": 0.7759529411764706, "grad_norm": 0.5176277507707093, "learning_rate": 2.2709998242699123e-06, "loss": 0.014860445261001587, "step": 82445 }, { "epoch": 0.776, "grad_norm": 0.42601633583433074, "learning_rate": 2.2709309624809377e-06, "loss": 0.015319328010082244, "step": 82450 }, { "epoch": 0.7760470588235294, "grad_norm": 0.4181896498951742, "learning_rate": 2.270862106955712e-06, "loss": 0.015323927998542786, "step": 82455 }, { "epoch": 0.7760941176470588, "grad_norm": 0.6414869266749235, "learning_rate": 2.2707932576932875e-06, "loss": 0.012697336077690125, "step": 82460 }, { "epoch": 0.7761411764705882, "grad_norm": 0.4299700845261801, "learning_rate": 2.2707244146927137e-06, "loss": 0.01573035418987274, "step": 82465 }, { "epoch": 0.7761882352941176, "grad_norm": 0.47746857999685643, "learning_rate": 2.2706555779530427e-06, "loss": 0.008966987580060959, "step": 82470 }, { "epoch": 0.776235294117647, "grad_norm": 0.33270657027439854, "learning_rate": 2.2705867474733244e-06, "loss": 0.015429189801216126, "step": 82475 }, { "epoch": 0.7762823529411764, "grad_norm": 0.6777267428212701, "learning_rate": 2.2705179232526107e-06, "loss": 0.015963256359100342, "step": 82480 }, { "epoch": 0.7763294117647059, "grad_norm": 0.40002913406176926, "learning_rate": 2.2704491052899525e-06, "loss": 0.016642661392688753, "step": 82485 }, { "epoch": 0.7763764705882353, "grad_norm": 0.40248517521070326, "learning_rate": 2.270380293584402e-06, "loss": 0.01478331834077835, "step": 82490 }, { "epoch": 0.7764235294117647, "grad_norm": 0.485044180862633, "learning_rate": 2.27031148813501e-06, "loss": 0.013143163919448853, "step": 82495 }, { "epoch": 0.7764705882352941, "grad_norm": 0.4972788393693498, "learning_rate": 2.270242688940831e-06, "loss": 0.012941089272499085, "step": 82500 }, { "epoch": 0.7765176470588235, "grad_norm": 0.4758279294621441, "learning_rate": 2.270173896000915e-06, "loss": 0.01968929320573807, "step": 82505 }, { "epoch": 0.7765647058823529, "grad_norm": 0.5116451491513067, "learning_rate": 2.270105109314315e-06, "loss": 0.014118362963199616, "step": 82510 }, { "epoch": 0.7766117647058823, "grad_norm": 0.3782251212876619, "learning_rate": 2.270036328880084e-06, "loss": 0.0134578138589859, "step": 82515 }, { "epoch": 0.7766588235294117, "grad_norm": 0.42251559179946147, "learning_rate": 2.269967554697275e-06, "loss": 0.01113406866788864, "step": 82520 }, { "epoch": 0.7767058823529411, "grad_norm": 0.5033662656446403, "learning_rate": 2.2698987867649403e-06, "loss": 0.01107572615146637, "step": 82525 }, { "epoch": 0.7767529411764705, "grad_norm": 0.49036769767470717, "learning_rate": 2.269830025082134e-06, "loss": 0.01863398253917694, "step": 82530 }, { "epoch": 0.7768, "grad_norm": 0.5529794624943651, "learning_rate": 2.2697612696479098e-06, "loss": 0.013464030623435975, "step": 82535 }, { "epoch": 0.7768470588235294, "grad_norm": 0.3999044384475526, "learning_rate": 2.2696925204613204e-06, "loss": 0.012323644757270814, "step": 82540 }, { "epoch": 0.7768941176470588, "grad_norm": 0.5111521554894232, "learning_rate": 2.26962377752142e-06, "loss": 0.013692611455917358, "step": 82545 }, { "epoch": 0.7769411764705882, "grad_norm": 0.6197895814138813, "learning_rate": 2.269555040827263e-06, "loss": 0.015257130563259124, "step": 82550 }, { "epoch": 0.7769882352941176, "grad_norm": 0.430126868644032, "learning_rate": 2.2694863103779035e-06, "loss": 0.01391029953956604, "step": 82555 }, { "epoch": 0.777035294117647, "grad_norm": 0.28483174529435745, "learning_rate": 2.2694175861723956e-06, "loss": 0.012804725766181945, "step": 82560 }, { "epoch": 0.7770823529411764, "grad_norm": 0.5611664661185704, "learning_rate": 2.2693488682097947e-06, "loss": 0.013039124011993409, "step": 82565 }, { "epoch": 0.7771294117647058, "grad_norm": 0.6019296929603607, "learning_rate": 2.2692801564891552e-06, "loss": 0.019303563237190246, "step": 82570 }, { "epoch": 0.7771764705882352, "grad_norm": 0.34498405465389786, "learning_rate": 2.2692114510095325e-06, "loss": 0.017353734374046324, "step": 82575 }, { "epoch": 0.7772235294117648, "grad_norm": 0.5201657287835922, "learning_rate": 2.2691427517699815e-06, "loss": 0.01505821943283081, "step": 82580 }, { "epoch": 0.7772705882352942, "grad_norm": 0.5323600933690912, "learning_rate": 2.269074058769558e-06, "loss": 0.01654234230518341, "step": 82585 }, { "epoch": 0.7773176470588236, "grad_norm": 0.61910435012244, "learning_rate": 2.2690053720073177e-06, "loss": 0.018645861744880678, "step": 82590 }, { "epoch": 0.777364705882353, "grad_norm": 0.46689465083766757, "learning_rate": 2.268936691482316e-06, "loss": 0.01463681012392044, "step": 82595 }, { "epoch": 0.7774117647058824, "grad_norm": 0.6219856315352128, "learning_rate": 2.2688680171936096e-06, "loss": 0.01866481304168701, "step": 82600 }, { "epoch": 0.7774588235294118, "grad_norm": 0.5979406117449326, "learning_rate": 2.2687993491402545e-06, "loss": 0.013019658625125885, "step": 82605 }, { "epoch": 0.7775058823529412, "grad_norm": 0.5477478285947026, "learning_rate": 2.268730687321307e-06, "loss": 0.015813452005386353, "step": 82610 }, { "epoch": 0.7775529411764706, "grad_norm": 0.3085705256810614, "learning_rate": 2.268662031735824e-06, "loss": 0.017020827531814574, "step": 82615 }, { "epoch": 0.7776, "grad_norm": 0.5542465966127241, "learning_rate": 2.268593382382863e-06, "loss": 0.013619467616081238, "step": 82620 }, { "epoch": 0.7776470588235294, "grad_norm": 0.40846329406021953, "learning_rate": 2.2685247392614795e-06, "loss": 0.012456869333982467, "step": 82625 }, { "epoch": 0.7776941176470589, "grad_norm": 0.7041713646972816, "learning_rate": 2.268456102370732e-06, "loss": 0.01839771866798401, "step": 82630 }, { "epoch": 0.7777411764705883, "grad_norm": 0.9597046525533502, "learning_rate": 2.268387471709678e-06, "loss": 0.016901490092277528, "step": 82635 }, { "epoch": 0.7777882352941177, "grad_norm": 0.4324414298820944, "learning_rate": 2.2683188472773747e-06, "loss": 0.016246691346168518, "step": 82640 }, { "epoch": 0.7778352941176471, "grad_norm": 0.8008021168963129, "learning_rate": 2.26825022907288e-06, "loss": 0.015474216639995575, "step": 82645 }, { "epoch": 0.7778823529411765, "grad_norm": 0.308843808870145, "learning_rate": 2.2681816170952525e-06, "loss": 0.013689881563186646, "step": 82650 }, { "epoch": 0.7779294117647059, "grad_norm": 0.38301474385735595, "learning_rate": 2.26811301134355e-06, "loss": 0.013999837636947631, "step": 82655 }, { "epoch": 0.7779764705882353, "grad_norm": 0.4955910829737011, "learning_rate": 2.268044411816831e-06, "loss": 0.013434988260269166, "step": 82660 }, { "epoch": 0.7780235294117647, "grad_norm": 0.6943399262834054, "learning_rate": 2.2679758185141544e-06, "loss": 0.019138401746749877, "step": 82665 }, { "epoch": 0.7780705882352941, "grad_norm": 0.29367467518333906, "learning_rate": 2.2679072314345783e-06, "loss": 0.01271323412656784, "step": 82670 }, { "epoch": 0.7781176470588236, "grad_norm": 0.4615522411093352, "learning_rate": 2.2678386505771635e-06, "loss": 0.014411036670207978, "step": 82675 }, { "epoch": 0.778164705882353, "grad_norm": 0.7746677335723292, "learning_rate": 2.2677700759409677e-06, "loss": 0.015152421593666077, "step": 82680 }, { "epoch": 0.7782117647058824, "grad_norm": 0.8045479225154907, "learning_rate": 2.2677015075250507e-06, "loss": 0.013843286037445068, "step": 82685 }, { "epoch": 0.7782588235294118, "grad_norm": 0.28781359917470084, "learning_rate": 2.2676329453284723e-06, "loss": 0.014639332890510559, "step": 82690 }, { "epoch": 0.7783058823529412, "grad_norm": 2.2507777397583957, "learning_rate": 2.2675643893502926e-06, "loss": 0.015609869360923767, "step": 82695 }, { "epoch": 0.7783529411764706, "grad_norm": 0.6003387533440795, "learning_rate": 2.2674958395895715e-06, "loss": 0.013284595310688018, "step": 82700 }, { "epoch": 0.7784, "grad_norm": 0.508554615911681, "learning_rate": 2.2674272960453696e-06, "loss": 0.011788851022720337, "step": 82705 }, { "epoch": 0.7784470588235294, "grad_norm": 0.3352957601927915, "learning_rate": 2.267358758716747e-06, "loss": 0.012098640203475952, "step": 82710 }, { "epoch": 0.7784941176470588, "grad_norm": 0.5809173388264008, "learning_rate": 2.267290227602764e-06, "loss": 0.010743946582078934, "step": 82715 }, { "epoch": 0.7785411764705882, "grad_norm": 0.3924779190585513, "learning_rate": 2.267221702702482e-06, "loss": 0.014136752486228943, "step": 82720 }, { "epoch": 0.7785882352941177, "grad_norm": 0.7197547785487347, "learning_rate": 2.267153184014962e-06, "loss": 0.01783352196216583, "step": 82725 }, { "epoch": 0.7786352941176471, "grad_norm": 0.46340565453888466, "learning_rate": 2.267084671539265e-06, "loss": 0.013523201644420623, "step": 82730 }, { "epoch": 0.7786823529411765, "grad_norm": 0.3338494290582553, "learning_rate": 2.267016165274453e-06, "loss": 0.01417265385389328, "step": 82735 }, { "epoch": 0.7787294117647059, "grad_norm": 0.3912084480438689, "learning_rate": 2.2669476652195868e-06, "loss": 0.01619928777217865, "step": 82740 }, { "epoch": 0.7787764705882353, "grad_norm": 0.48831919265632434, "learning_rate": 2.266879171373729e-06, "loss": 0.018596789240837096, "step": 82745 }, { "epoch": 0.7788235294117647, "grad_norm": 0.5876861371252714, "learning_rate": 2.266810683735942e-06, "loss": 0.01503431499004364, "step": 82750 }, { "epoch": 0.7788705882352941, "grad_norm": 0.3363285099135063, "learning_rate": 2.2667422023052866e-06, "loss": 0.014124594628810883, "step": 82755 }, { "epoch": 0.7789176470588235, "grad_norm": 0.6108399141867032, "learning_rate": 2.2666737270808263e-06, "loss": 0.014429429173469543, "step": 82760 }, { "epoch": 0.7789647058823529, "grad_norm": 0.6309968403510625, "learning_rate": 2.266605258061624e-06, "loss": 0.01933795362710953, "step": 82765 }, { "epoch": 0.7790117647058824, "grad_norm": 0.43472838012249493, "learning_rate": 2.266536795246743e-06, "loss": 0.016038811206817626, "step": 82770 }, { "epoch": 0.7790588235294118, "grad_norm": 0.4540835752841497, "learning_rate": 2.2664683386352437e-06, "loss": 0.014171838760375977, "step": 82775 }, { "epoch": 0.7791058823529412, "grad_norm": 0.37817522150601907, "learning_rate": 2.266399888226192e-06, "loss": 0.013135193288326264, "step": 82780 }, { "epoch": 0.7791529411764706, "grad_norm": 0.5008341129760648, "learning_rate": 2.2663314440186508e-06, "loss": 0.014413638412952423, "step": 82785 }, { "epoch": 0.7792, "grad_norm": 0.5998268002823653, "learning_rate": 2.266263006011683e-06, "loss": 0.014155767858028412, "step": 82790 }, { "epoch": 0.7792470588235294, "grad_norm": 0.7403567241013138, "learning_rate": 2.2661945742043526e-06, "loss": 0.017202720046043396, "step": 82795 }, { "epoch": 0.7792941176470588, "grad_norm": 0.4745009797313362, "learning_rate": 2.2661261485957244e-06, "loss": 0.012639796733856202, "step": 82800 }, { "epoch": 0.7793411764705882, "grad_norm": 0.46492040444416866, "learning_rate": 2.2660577291848625e-06, "loss": 0.013087062537670136, "step": 82805 }, { "epoch": 0.7793882352941176, "grad_norm": 0.335672489600684, "learning_rate": 2.2659893159708305e-06, "loss": 0.011604957282543182, "step": 82810 }, { "epoch": 0.779435294117647, "grad_norm": 0.3058600512284599, "learning_rate": 2.2659209089526933e-06, "loss": 0.01174914687871933, "step": 82815 }, { "epoch": 0.7794823529411765, "grad_norm": 0.7420788174476567, "learning_rate": 2.2658525081295167e-06, "loss": 0.0170762836933136, "step": 82820 }, { "epoch": 0.7795294117647059, "grad_norm": 0.25459765054505984, "learning_rate": 2.2657841135003642e-06, "loss": 0.013334476947784423, "step": 82825 }, { "epoch": 0.7795764705882353, "grad_norm": 0.588028023527573, "learning_rate": 2.265715725064302e-06, "loss": 0.014925542473793029, "step": 82830 }, { "epoch": 0.7796235294117647, "grad_norm": 0.5642000773659829, "learning_rate": 2.265647342820396e-06, "loss": 0.01652478575706482, "step": 82835 }, { "epoch": 0.7796705882352941, "grad_norm": 0.4258358390221614, "learning_rate": 2.26557896676771e-06, "loss": 0.01588815599679947, "step": 82840 }, { "epoch": 0.7797176470588235, "grad_norm": 0.2761420952270486, "learning_rate": 2.265510596905312e-06, "loss": 0.010753375291824342, "step": 82845 }, { "epoch": 0.7797647058823529, "grad_norm": 0.7933355305610928, "learning_rate": 2.2654422332322663e-06, "loss": 0.015048182010650635, "step": 82850 }, { "epoch": 0.7798117647058823, "grad_norm": 0.9153249218235809, "learning_rate": 2.2653738757476397e-06, "loss": 0.01976418197154999, "step": 82855 }, { "epoch": 0.7798588235294117, "grad_norm": 0.3844701437355436, "learning_rate": 2.2653055244504994e-06, "loss": 0.01461191326379776, "step": 82860 }, { "epoch": 0.7799058823529412, "grad_norm": 0.4528292647977298, "learning_rate": 2.265237179339911e-06, "loss": 0.014089727401733398, "step": 82865 }, { "epoch": 0.7799529411764706, "grad_norm": 0.5184740005237451, "learning_rate": 2.2651688404149415e-06, "loss": 0.017905250191688538, "step": 82870 }, { "epoch": 0.78, "grad_norm": 0.581432100501558, "learning_rate": 2.265100507674658e-06, "loss": 0.01642262488603592, "step": 82875 }, { "epoch": 0.7800470588235294, "grad_norm": 0.49580196886832806, "learning_rate": 2.2650321811181277e-06, "loss": 0.014294973015785218, "step": 82880 }, { "epoch": 0.7800941176470588, "grad_norm": 0.5665859524374317, "learning_rate": 2.2649638607444184e-06, "loss": 0.015653030574321748, "step": 82885 }, { "epoch": 0.7801411764705882, "grad_norm": 0.56801011749381, "learning_rate": 2.2648955465525965e-06, "loss": 0.01764487326145172, "step": 82890 }, { "epoch": 0.7801882352941176, "grad_norm": 0.5943062754918965, "learning_rate": 2.264827238541731e-06, "loss": 0.0130377396941185, "step": 82895 }, { "epoch": 0.780235294117647, "grad_norm": 0.37241819851431235, "learning_rate": 2.2647589367108898e-06, "loss": 0.01502050757408142, "step": 82900 }, { "epoch": 0.7802823529411764, "grad_norm": 0.6344842191078303, "learning_rate": 2.26469064105914e-06, "loss": 0.014753174781799317, "step": 82905 }, { "epoch": 0.7803294117647058, "grad_norm": 0.34648222621691344, "learning_rate": 2.264622351585551e-06, "loss": 0.01241009756922722, "step": 82910 }, { "epoch": 0.7803764705882353, "grad_norm": 0.39946867471747133, "learning_rate": 2.2645540682891916e-06, "loss": 0.014550289511680603, "step": 82915 }, { "epoch": 0.7804235294117647, "grad_norm": 0.3263806492812355, "learning_rate": 2.26448579116913e-06, "loss": 0.012655377388000488, "step": 82920 }, { "epoch": 0.7804705882352941, "grad_norm": 0.5668497111467471, "learning_rate": 2.2644175202244344e-06, "loss": 0.010678699612617493, "step": 82925 }, { "epoch": 0.7805176470588235, "grad_norm": 0.4544761641366169, "learning_rate": 2.264349255454176e-06, "loss": 0.015892453491687775, "step": 82930 }, { "epoch": 0.7805647058823529, "grad_norm": 0.554081246451228, "learning_rate": 2.264280996857422e-06, "loss": 0.014963982999324799, "step": 82935 }, { "epoch": 0.7806117647058823, "grad_norm": 0.5466723276208596, "learning_rate": 2.2642127444332437e-06, "loss": 0.014244911074638367, "step": 82940 }, { "epoch": 0.7806588235294117, "grad_norm": 0.3124638927546531, "learning_rate": 2.264144498180709e-06, "loss": 0.009072432667016983, "step": 82945 }, { "epoch": 0.7807058823529411, "grad_norm": 0.44413559827471083, "learning_rate": 2.26407625809889e-06, "loss": 0.012076833844184875, "step": 82950 }, { "epoch": 0.7807529411764705, "grad_norm": 0.3039327657371069, "learning_rate": 2.264008024186855e-06, "loss": 0.015691547095775603, "step": 82955 }, { "epoch": 0.7808, "grad_norm": 0.7324505422773976, "learning_rate": 2.263939796443676e-06, "loss": 0.018734723329544067, "step": 82960 }, { "epoch": 0.7808470588235294, "grad_norm": 0.4173854440549616, "learning_rate": 2.263871574868422e-06, "loss": 0.016432049870491027, "step": 82965 }, { "epoch": 0.7808941176470588, "grad_norm": 0.9838345620248438, "learning_rate": 2.263803359460164e-06, "loss": 0.013219507038593292, "step": 82970 }, { "epoch": 0.7809411764705882, "grad_norm": 0.4178418939492055, "learning_rate": 2.263735150217974e-06, "loss": 0.01375662088394165, "step": 82975 }, { "epoch": 0.7809882352941176, "grad_norm": 0.4600842831600164, "learning_rate": 2.2636669471409227e-06, "loss": 0.011743679642677307, "step": 82980 }, { "epoch": 0.781035294117647, "grad_norm": 0.4409356727600542, "learning_rate": 2.2635987502280803e-06, "loss": 0.017737075686454773, "step": 82985 }, { "epoch": 0.7810823529411764, "grad_norm": 0.491336681585623, "learning_rate": 2.26353055947852e-06, "loss": 0.01094595193862915, "step": 82990 }, { "epoch": 0.7811294117647058, "grad_norm": 0.7147928319747434, "learning_rate": 2.263462374891312e-06, "loss": 0.013632068037986755, "step": 82995 }, { "epoch": 0.7811764705882352, "grad_norm": 0.6027153410724682, "learning_rate": 2.2633941964655292e-06, "loss": 0.012109342962503433, "step": 83000 }, { "epoch": 0.7812235294117648, "grad_norm": 0.4633527280716973, "learning_rate": 2.263326024200243e-06, "loss": 0.011629971861839294, "step": 83005 }, { "epoch": 0.7812705882352942, "grad_norm": 0.548216521527408, "learning_rate": 2.263257858094527e-06, "loss": 0.01474919617176056, "step": 83010 }, { "epoch": 0.7813176470588236, "grad_norm": 0.35369017917553547, "learning_rate": 2.2631896981474527e-06, "loss": 0.012040059268474578, "step": 83015 }, { "epoch": 0.781364705882353, "grad_norm": 0.563297890911351, "learning_rate": 2.2631215443580923e-06, "loss": 0.013901521265506745, "step": 83020 }, { "epoch": 0.7814117647058824, "grad_norm": 0.48008493526118695, "learning_rate": 2.2630533967255196e-06, "loss": 0.015574461221694947, "step": 83025 }, { "epoch": 0.7814588235294118, "grad_norm": 0.7186952345778247, "learning_rate": 2.262985255248807e-06, "loss": 0.017579355835914613, "step": 83030 }, { "epoch": 0.7815058823529412, "grad_norm": 0.5594250271387866, "learning_rate": 2.2629171199270288e-06, "loss": 0.01517798900604248, "step": 83035 }, { "epoch": 0.7815529411764706, "grad_norm": 0.5725106084257134, "learning_rate": 2.2628489907592577e-06, "loss": 0.013363274931907653, "step": 83040 }, { "epoch": 0.7816, "grad_norm": 0.4500347455867965, "learning_rate": 2.262780867744567e-06, "loss": 0.012237155437469482, "step": 83045 }, { "epoch": 0.7816470588235294, "grad_norm": 0.4040360045250071, "learning_rate": 2.2627127508820318e-06, "loss": 0.015975505113601685, "step": 83050 }, { "epoch": 0.7816941176470589, "grad_norm": 0.510052772493655, "learning_rate": 2.262644640170725e-06, "loss": 0.012547394633293152, "step": 83055 }, { "epoch": 0.7817411764705883, "grad_norm": 0.5702464223572172, "learning_rate": 2.262576535609721e-06, "loss": 0.015782906115055083, "step": 83060 }, { "epoch": 0.7817882352941177, "grad_norm": 0.5762748132561056, "learning_rate": 2.2625084371980947e-06, "loss": 0.01670840084552765, "step": 83065 }, { "epoch": 0.7818352941176471, "grad_norm": 0.4600775153132789, "learning_rate": 2.262440344934921e-06, "loss": 0.014705045521259308, "step": 83070 }, { "epoch": 0.7818823529411765, "grad_norm": 0.5346157250275634, "learning_rate": 2.262372258819274e-06, "loss": 0.019096614420413972, "step": 83075 }, { "epoch": 0.7819294117647059, "grad_norm": 0.3526584246724549, "learning_rate": 2.2623041788502284e-06, "loss": 0.015106789767742157, "step": 83080 }, { "epoch": 0.7819764705882353, "grad_norm": 0.26721515996941814, "learning_rate": 2.262236105026861e-06, "loss": 0.017139826714992524, "step": 83085 }, { "epoch": 0.7820235294117647, "grad_norm": 0.5578230819420326, "learning_rate": 2.262168037348246e-06, "loss": 0.018452876806259157, "step": 83090 }, { "epoch": 0.7820705882352941, "grad_norm": 0.6263860845280562, "learning_rate": 2.262099975813459e-06, "loss": 0.016242878139019014, "step": 83095 }, { "epoch": 0.7821176470588236, "grad_norm": 0.4674319332290937, "learning_rate": 2.2620319204215766e-06, "loss": 0.015053004026412964, "step": 83100 }, { "epoch": 0.782164705882353, "grad_norm": 0.45321413936244986, "learning_rate": 2.261963871171674e-06, "loss": 0.01578163206577301, "step": 83105 }, { "epoch": 0.7822117647058824, "grad_norm": 0.34527741065673534, "learning_rate": 2.2618958280628283e-06, "loss": 0.012651157379150391, "step": 83110 }, { "epoch": 0.7822588235294118, "grad_norm": 0.6050933333149588, "learning_rate": 2.261827791094115e-06, "loss": 0.01606452465057373, "step": 83115 }, { "epoch": 0.7823058823529412, "grad_norm": 0.645007636531767, "learning_rate": 2.2617597602646104e-06, "loss": 0.01296372413635254, "step": 83120 }, { "epoch": 0.7823529411764706, "grad_norm": 0.413466004215327, "learning_rate": 2.2616917355733927e-06, "loss": 0.01609131395816803, "step": 83125 }, { "epoch": 0.7824, "grad_norm": 0.6790771413345095, "learning_rate": 2.261623717019538e-06, "loss": 0.015266454219818116, "step": 83130 }, { "epoch": 0.7824470588235294, "grad_norm": 0.3635846520149262, "learning_rate": 2.2615557046021226e-06, "loss": 0.014802908897399903, "step": 83135 }, { "epoch": 0.7824941176470588, "grad_norm": 0.4178359928859499, "learning_rate": 2.2614876983202262e-06, "loss": 0.012256868183612823, "step": 83140 }, { "epoch": 0.7825411764705882, "grad_norm": 0.37514369716924645, "learning_rate": 2.261419698172924e-06, "loss": 0.014180076122283936, "step": 83145 }, { "epoch": 0.7825882352941177, "grad_norm": 0.306853535131661, "learning_rate": 2.2613517041592958e-06, "loss": 0.012559354305267334, "step": 83150 }, { "epoch": 0.7826352941176471, "grad_norm": 0.4987281359266835, "learning_rate": 2.2612837162784173e-06, "loss": 0.012510858476161957, "step": 83155 }, { "epoch": 0.7826823529411765, "grad_norm": 0.4097033105925199, "learning_rate": 2.2612157345293682e-06, "loss": 0.013353723287582397, "step": 83160 }, { "epoch": 0.7827294117647059, "grad_norm": 0.3063049695986295, "learning_rate": 2.2611477589112267e-06, "loss": 0.016722874343395235, "step": 83165 }, { "epoch": 0.7827764705882353, "grad_norm": 0.2939879515563358, "learning_rate": 2.2610797894230715e-06, "loss": 0.013147282600402831, "step": 83170 }, { "epoch": 0.7828235294117647, "grad_norm": 0.33358071986716503, "learning_rate": 2.2610118260639797e-06, "loss": 0.015475921332836151, "step": 83175 }, { "epoch": 0.7828705882352941, "grad_norm": 0.5759325857108237, "learning_rate": 2.260943868833033e-06, "loss": 0.015496188402175903, "step": 83180 }, { "epoch": 0.7829176470588235, "grad_norm": 0.4997873702752533, "learning_rate": 2.260875917729308e-06, "loss": 0.014506883919239044, "step": 83185 }, { "epoch": 0.7829647058823529, "grad_norm": 0.31120907660252484, "learning_rate": 2.2608079727518855e-06, "loss": 0.01905716359615326, "step": 83190 }, { "epoch": 0.7830117647058824, "grad_norm": 0.7442653423850615, "learning_rate": 2.260740033899844e-06, "loss": 0.01738225370645523, "step": 83195 }, { "epoch": 0.7830588235294118, "grad_norm": 0.6253630364160767, "learning_rate": 2.2606721011722638e-06, "loss": 0.015516966581344604, "step": 83200 }, { "epoch": 0.7831058823529412, "grad_norm": 0.6627323317789171, "learning_rate": 2.260604174568225e-06, "loss": 0.016772694885730743, "step": 83205 }, { "epoch": 0.7831529411764706, "grad_norm": 0.422694874021583, "learning_rate": 2.260536254086807e-06, "loss": 0.012162640690803528, "step": 83210 }, { "epoch": 0.7832, "grad_norm": 0.4153577020921761, "learning_rate": 2.2604683397270908e-06, "loss": 0.012714731693267822, "step": 83215 }, { "epoch": 0.7832470588235294, "grad_norm": 0.5192692552753247, "learning_rate": 2.260400431488156e-06, "loss": 0.014474889636039734, "step": 83220 }, { "epoch": 0.7832941176470588, "grad_norm": 0.5935590321031752, "learning_rate": 2.2603325293690844e-06, "loss": 0.01665411740541458, "step": 83225 }, { "epoch": 0.7833411764705882, "grad_norm": 0.3451971438546452, "learning_rate": 2.260264633368956e-06, "loss": 0.01355276107788086, "step": 83230 }, { "epoch": 0.7833882352941176, "grad_norm": 0.4479500705661043, "learning_rate": 2.260196743486852e-06, "loss": 0.01395595520734787, "step": 83235 }, { "epoch": 0.783435294117647, "grad_norm": 0.6146618937626226, "learning_rate": 2.2601288597218533e-06, "loss": 0.016508150100708007, "step": 83240 }, { "epoch": 0.7834823529411765, "grad_norm": 0.5436249285743423, "learning_rate": 2.260060982073042e-06, "loss": 0.01814844012260437, "step": 83245 }, { "epoch": 0.7835294117647059, "grad_norm": 0.34055809951396276, "learning_rate": 2.2599931105394996e-06, "loss": 0.010019199550151825, "step": 83250 }, { "epoch": 0.7835764705882353, "grad_norm": 0.7459068842292591, "learning_rate": 2.259925245120308e-06, "loss": 0.018681088089942934, "step": 83255 }, { "epoch": 0.7836235294117647, "grad_norm": 0.5524240979139587, "learning_rate": 2.2598573858145484e-06, "loss": 0.016096505522727966, "step": 83260 }, { "epoch": 0.7836705882352941, "grad_norm": 0.6952796410908848, "learning_rate": 2.2597895326213043e-06, "loss": 0.01692318618297577, "step": 83265 }, { "epoch": 0.7837176470588235, "grad_norm": 0.3951213409509914, "learning_rate": 2.259721685539657e-06, "loss": 0.015696480870246887, "step": 83270 }, { "epoch": 0.7837647058823529, "grad_norm": 0.4459781391477532, "learning_rate": 2.25965384456869e-06, "loss": 0.020004263520240782, "step": 83275 }, { "epoch": 0.7838117647058823, "grad_norm": 0.4033514117058265, "learning_rate": 2.259586009707485e-06, "loss": 0.014260087907314301, "step": 83280 }, { "epoch": 0.7838588235294117, "grad_norm": 0.46598367090760057, "learning_rate": 2.259518180955126e-06, "loss": 0.01976983845233917, "step": 83285 }, { "epoch": 0.7839058823529412, "grad_norm": 0.5296765494870503, "learning_rate": 2.2594503583106953e-06, "loss": 0.014916548132896423, "step": 83290 }, { "epoch": 0.7839529411764706, "grad_norm": 0.3476897291073947, "learning_rate": 2.2593825417732767e-06, "loss": 0.014167511463165283, "step": 83295 }, { "epoch": 0.784, "grad_norm": 0.3560232595562518, "learning_rate": 2.2593147313419543e-06, "loss": 0.015554344654083252, "step": 83300 }, { "epoch": 0.7840470588235294, "grad_norm": 0.537567400830658, "learning_rate": 2.259246927015811e-06, "loss": 0.015708129107952117, "step": 83305 }, { "epoch": 0.7840941176470588, "grad_norm": 0.5335348137918914, "learning_rate": 2.259179128793931e-06, "loss": 0.01777857542037964, "step": 83310 }, { "epoch": 0.7841411764705882, "grad_norm": 0.5160047601597012, "learning_rate": 2.259111336675399e-06, "loss": 0.016583281755447387, "step": 83315 }, { "epoch": 0.7841882352941176, "grad_norm": 0.4772363626029191, "learning_rate": 2.2590435506592984e-06, "loss": 0.015423479676246642, "step": 83320 }, { "epoch": 0.784235294117647, "grad_norm": 0.7233494601209063, "learning_rate": 2.258975770744714e-06, "loss": 0.01652381122112274, "step": 83325 }, { "epoch": 0.7842823529411764, "grad_norm": 0.48229678561578726, "learning_rate": 2.258907996930731e-06, "loss": 0.013454413414001465, "step": 83330 }, { "epoch": 0.7843294117647058, "grad_norm": 0.5526319191230235, "learning_rate": 2.2588402292164343e-06, "loss": 0.01359800398349762, "step": 83335 }, { "epoch": 0.7843764705882353, "grad_norm": 0.22398244406992088, "learning_rate": 2.258772467600908e-06, "loss": 0.011282722651958465, "step": 83340 }, { "epoch": 0.7844235294117647, "grad_norm": 0.5886706940681303, "learning_rate": 2.2587047120832377e-06, "loss": 0.016801774501800537, "step": 83345 }, { "epoch": 0.7844705882352941, "grad_norm": 0.42421176792490367, "learning_rate": 2.25863696266251e-06, "loss": 0.014294371008872986, "step": 83350 }, { "epoch": 0.7845176470588235, "grad_norm": 0.820591853220021, "learning_rate": 2.2585692193378096e-06, "loss": 0.02178107500076294, "step": 83355 }, { "epoch": 0.7845647058823529, "grad_norm": 0.377648442252082, "learning_rate": 2.2585014821082226e-06, "loss": 0.01554441899061203, "step": 83360 }, { "epoch": 0.7846117647058823, "grad_norm": 0.6323433208840452, "learning_rate": 2.258433750972835e-06, "loss": 0.016419735550880433, "step": 83365 }, { "epoch": 0.7846588235294117, "grad_norm": 0.2816496398702484, "learning_rate": 2.258366025930733e-06, "loss": 0.013823209702968598, "step": 83370 }, { "epoch": 0.7847058823529411, "grad_norm": 0.5161132463814225, "learning_rate": 2.258298306981003e-06, "loss": 0.017851778864860536, "step": 83375 }, { "epoch": 0.7847529411764705, "grad_norm": 0.3453119088090421, "learning_rate": 2.2582305941227317e-06, "loss": 0.01990375965833664, "step": 83380 }, { "epoch": 0.7848, "grad_norm": 0.4640645129086288, "learning_rate": 2.258162887355006e-06, "loss": 0.012846285104751587, "step": 83385 }, { "epoch": 0.7848470588235295, "grad_norm": 0.5719180038880513, "learning_rate": 2.258095186676913e-06, "loss": 0.01636545956134796, "step": 83390 }, { "epoch": 0.7848941176470589, "grad_norm": 0.47528550315963786, "learning_rate": 2.2580274920875396e-06, "loss": 0.015099230408668517, "step": 83395 }, { "epoch": 0.7849411764705883, "grad_norm": 0.3640640826625271, "learning_rate": 2.2579598035859734e-06, "loss": 0.012578141689300538, "step": 83400 }, { "epoch": 0.7849882352941177, "grad_norm": 0.47429380582875935, "learning_rate": 2.257892121171302e-06, "loss": 0.020850060880184172, "step": 83405 }, { "epoch": 0.785035294117647, "grad_norm": 0.473907255237291, "learning_rate": 2.2578244448426135e-06, "loss": 0.012327136099338531, "step": 83410 }, { "epoch": 0.7850823529411765, "grad_norm": 0.5007040823001035, "learning_rate": 2.257756774598995e-06, "loss": 0.012750691175460816, "step": 83415 }, { "epoch": 0.7851294117647059, "grad_norm": 0.5467483088969047, "learning_rate": 2.2576891104395352e-06, "loss": 0.021988479793071745, "step": 83420 }, { "epoch": 0.7851764705882353, "grad_norm": 0.46003145736333717, "learning_rate": 2.2576214523633226e-06, "loss": 0.013279806077480315, "step": 83425 }, { "epoch": 0.7852235294117647, "grad_norm": 0.491896160040431, "learning_rate": 2.2575538003694456e-06, "loss": 0.01946282386779785, "step": 83430 }, { "epoch": 0.7852705882352942, "grad_norm": 0.3756666441205893, "learning_rate": 2.2574861544569933e-06, "loss": 0.016938506066799162, "step": 83435 }, { "epoch": 0.7853176470588236, "grad_norm": 0.7482059155104468, "learning_rate": 2.2574185146250534e-06, "loss": 0.0181170254945755, "step": 83440 }, { "epoch": 0.785364705882353, "grad_norm": 0.26373388181759067, "learning_rate": 2.2573508808727166e-06, "loss": 0.012253949046134948, "step": 83445 }, { "epoch": 0.7854117647058824, "grad_norm": 0.6151027010841129, "learning_rate": 2.257283253199071e-06, "loss": 0.0142516627907753, "step": 83450 }, { "epoch": 0.7854588235294118, "grad_norm": 0.545624837858551, "learning_rate": 2.2572156316032067e-06, "loss": 0.016080662608146667, "step": 83455 }, { "epoch": 0.7855058823529412, "grad_norm": 0.5854132109454994, "learning_rate": 2.257148016084213e-06, "loss": 0.01665700078010559, "step": 83460 }, { "epoch": 0.7855529411764706, "grad_norm": 0.6105613761302585, "learning_rate": 2.2570804066411805e-06, "loss": 0.01559084951877594, "step": 83465 }, { "epoch": 0.7856, "grad_norm": 0.32263033381994544, "learning_rate": 2.2570128032731987e-06, "loss": 0.014162853360176086, "step": 83470 }, { "epoch": 0.7856470588235294, "grad_norm": 0.39532085659777405, "learning_rate": 2.2569452059793576e-06, "loss": 0.010201908648014069, "step": 83475 }, { "epoch": 0.7856941176470589, "grad_norm": 0.7169038329120809, "learning_rate": 2.2568776147587482e-06, "loss": 0.01882089376449585, "step": 83480 }, { "epoch": 0.7857411764705883, "grad_norm": 0.6510472947208837, "learning_rate": 2.2568100296104613e-06, "loss": 0.015818828344345094, "step": 83485 }, { "epoch": 0.7857882352941177, "grad_norm": 0.6272945911286986, "learning_rate": 2.256742450533587e-06, "loss": 0.016268768906593324, "step": 83490 }, { "epoch": 0.7858352941176471, "grad_norm": 0.5955726004582164, "learning_rate": 2.256674877527217e-06, "loss": 0.017655393481254576, "step": 83495 }, { "epoch": 0.7858823529411765, "grad_norm": 0.26803462557212515, "learning_rate": 2.2566073105904427e-06, "loss": 0.010402702540159226, "step": 83500 }, { "epoch": 0.7859294117647059, "grad_norm": 0.5237438712695116, "learning_rate": 2.2565397497223548e-06, "loss": 0.013645121455192566, "step": 83505 }, { "epoch": 0.7859764705882353, "grad_norm": 0.4116183195414975, "learning_rate": 2.2564721949220445e-06, "loss": 0.016799396276473998, "step": 83510 }, { "epoch": 0.7860235294117647, "grad_norm": 0.6633497773845886, "learning_rate": 2.2564046461886053e-06, "loss": 0.017487743496894838, "step": 83515 }, { "epoch": 0.7860705882352941, "grad_norm": 0.4832043456651185, "learning_rate": 2.2563371035211276e-06, "loss": 0.01479727029800415, "step": 83520 }, { "epoch": 0.7861176470588235, "grad_norm": 0.3454559519975048, "learning_rate": 2.2562695669187036e-06, "loss": 0.013900727033615112, "step": 83525 }, { "epoch": 0.786164705882353, "grad_norm": 0.34641486902451707, "learning_rate": 2.256202036380427e-06, "loss": 0.012296921014785767, "step": 83530 }, { "epoch": 0.7862117647058824, "grad_norm": 2.854777614277282, "learning_rate": 2.256134511905389e-06, "loss": 0.017100876569747923, "step": 83535 }, { "epoch": 0.7862588235294118, "grad_norm": 0.4068498868858539, "learning_rate": 2.2560669934926834e-06, "loss": 0.013556286692619324, "step": 83540 }, { "epoch": 0.7863058823529412, "grad_norm": 0.5439603401695864, "learning_rate": 2.255999481141402e-06, "loss": 0.013005167245864868, "step": 83545 }, { "epoch": 0.7863529411764706, "grad_norm": 0.5388829806638721, "learning_rate": 2.2559319748506388e-06, "loss": 0.017534723877906798, "step": 83550 }, { "epoch": 0.7864, "grad_norm": 0.6223001131902873, "learning_rate": 2.2558644746194872e-06, "loss": 0.012700532376766206, "step": 83555 }, { "epoch": 0.7864470588235294, "grad_norm": 0.4281647492575546, "learning_rate": 2.2557969804470396e-06, "loss": 0.011428475379943848, "step": 83560 }, { "epoch": 0.7864941176470588, "grad_norm": 0.6536066204861976, "learning_rate": 2.2557294923323907e-06, "loss": 0.01457454264163971, "step": 83565 }, { "epoch": 0.7865411764705882, "grad_norm": 0.3493539595184174, "learning_rate": 2.255662010274634e-06, "loss": 0.013665379583835601, "step": 83570 }, { "epoch": 0.7865882352941177, "grad_norm": 0.5185673393345961, "learning_rate": 2.2555945342728635e-06, "loss": 0.013700558245182038, "step": 83575 }, { "epoch": 0.7866352941176471, "grad_norm": 0.6361222683033977, "learning_rate": 2.2555270643261744e-06, "loss": 0.01649772673845291, "step": 83580 }, { "epoch": 0.7866823529411765, "grad_norm": 0.6647150174683595, "learning_rate": 2.255459600433659e-06, "loss": 0.016587769985198973, "step": 83585 }, { "epoch": 0.7867294117647059, "grad_norm": 0.32706317907955734, "learning_rate": 2.2553921425944144e-06, "loss": 0.017416587471961974, "step": 83590 }, { "epoch": 0.7867764705882353, "grad_norm": 0.5252151519362737, "learning_rate": 2.255324690807534e-06, "loss": 0.013286645710468292, "step": 83595 }, { "epoch": 0.7868235294117647, "grad_norm": 0.29963015770442875, "learning_rate": 2.255257245072113e-06, "loss": 0.010001379251480102, "step": 83600 }, { "epoch": 0.7868705882352941, "grad_norm": 0.5471449782264765, "learning_rate": 2.255189805387247e-06, "loss": 0.014779040217399597, "step": 83605 }, { "epoch": 0.7869176470588235, "grad_norm": 0.3232791575536828, "learning_rate": 2.255122371752031e-06, "loss": 0.014490893483161927, "step": 83610 }, { "epoch": 0.7869647058823529, "grad_norm": 0.5020372744232295, "learning_rate": 2.2550549441655605e-06, "loss": 0.014843320846557618, "step": 83615 }, { "epoch": 0.7870117647058823, "grad_norm": 1.0279781570029256, "learning_rate": 2.2549875226269326e-06, "loss": 0.012110660970211028, "step": 83620 }, { "epoch": 0.7870588235294118, "grad_norm": 0.4409501751060254, "learning_rate": 2.2549201071352407e-06, "loss": 0.013600495457649232, "step": 83625 }, { "epoch": 0.7871058823529412, "grad_norm": 0.45084936823425226, "learning_rate": 2.254852697689583e-06, "loss": 0.013438594341278077, "step": 83630 }, { "epoch": 0.7871529411764706, "grad_norm": 0.545203207435933, "learning_rate": 2.2547852942890553e-06, "loss": 0.014981016516685486, "step": 83635 }, { "epoch": 0.7872, "grad_norm": 0.5686350128143376, "learning_rate": 2.2547178969327543e-06, "loss": 0.014201214909553528, "step": 83640 }, { "epoch": 0.7872470588235294, "grad_norm": 0.3054633518760955, "learning_rate": 2.254650505619776e-06, "loss": 0.012986266613006591, "step": 83645 }, { "epoch": 0.7872941176470588, "grad_norm": 0.3787549429251356, "learning_rate": 2.2545831203492187e-06, "loss": 0.014727695286273957, "step": 83650 }, { "epoch": 0.7873411764705882, "grad_norm": 0.4739049695529895, "learning_rate": 2.2545157411201775e-06, "loss": 0.016581419110298156, "step": 83655 }, { "epoch": 0.7873882352941176, "grad_norm": 0.4688379811934785, "learning_rate": 2.2544483679317513e-06, "loss": 0.011986840516328812, "step": 83660 }, { "epoch": 0.787435294117647, "grad_norm": 0.579179364561393, "learning_rate": 2.254381000783037e-06, "loss": 0.016604584455490113, "step": 83665 }, { "epoch": 0.7874823529411765, "grad_norm": 0.42445578864754, "learning_rate": 2.2543136396731326e-06, "loss": 0.01429368257522583, "step": 83670 }, { "epoch": 0.7875294117647059, "grad_norm": 0.6652109933647493, "learning_rate": 2.254246284601136e-06, "loss": 0.02031199187040329, "step": 83675 }, { "epoch": 0.7875764705882353, "grad_norm": 1.4593989203324989, "learning_rate": 2.254178935566144e-06, "loss": 0.014466239511966706, "step": 83680 }, { "epoch": 0.7876235294117647, "grad_norm": 0.5460362690240969, "learning_rate": 2.254111592567256e-06, "loss": 0.014432360231876374, "step": 83685 }, { "epoch": 0.7876705882352941, "grad_norm": 0.42709247203867234, "learning_rate": 2.254044255603571e-06, "loss": 0.013410744071006776, "step": 83690 }, { "epoch": 0.7877176470588235, "grad_norm": 0.6144010820470803, "learning_rate": 2.2539769246741862e-06, "loss": 0.014709113538265229, "step": 83695 }, { "epoch": 0.7877647058823529, "grad_norm": 0.5575287377955455, "learning_rate": 2.253909599778201e-06, "loss": 0.014535075426101685, "step": 83700 }, { "epoch": 0.7878117647058823, "grad_norm": 0.48556529724954267, "learning_rate": 2.2538422809147142e-06, "loss": 0.015969130396842956, "step": 83705 }, { "epoch": 0.7878588235294117, "grad_norm": 0.3762861550142146, "learning_rate": 2.2537749680828256e-06, "loss": 0.016479101777076722, "step": 83710 }, { "epoch": 0.7879058823529411, "grad_norm": 0.5838122443287878, "learning_rate": 2.253707661281634e-06, "loss": 0.015862807631492615, "step": 83715 }, { "epoch": 0.7879529411764706, "grad_norm": 0.3741014651119936, "learning_rate": 2.253640360510239e-06, "loss": 0.013433802127838134, "step": 83720 }, { "epoch": 0.788, "grad_norm": 0.6251221661185425, "learning_rate": 2.25357306576774e-06, "loss": 0.01607281416654587, "step": 83725 }, { "epoch": 0.7880470588235294, "grad_norm": 0.3171303622802679, "learning_rate": 2.253505777053238e-06, "loss": 0.011113781481981277, "step": 83730 }, { "epoch": 0.7880941176470588, "grad_norm": 0.587845841210236, "learning_rate": 2.253438494365832e-06, "loss": 0.014681723713874818, "step": 83735 }, { "epoch": 0.7881411764705882, "grad_norm": 0.20203533641530258, "learning_rate": 2.2533712177046225e-06, "loss": 0.012778198719024659, "step": 83740 }, { "epoch": 0.7881882352941176, "grad_norm": 0.3822180841319474, "learning_rate": 2.2533039470687106e-06, "loss": 0.016964396834373473, "step": 83745 }, { "epoch": 0.788235294117647, "grad_norm": 0.3625681835966031, "learning_rate": 2.2532366824571968e-06, "loss": 0.01242060661315918, "step": 83750 }, { "epoch": 0.7882823529411764, "grad_norm": 0.33131959340237266, "learning_rate": 2.2531694238691808e-06, "loss": 0.01524764597415924, "step": 83755 }, { "epoch": 0.7883294117647058, "grad_norm": 0.3459832449628339, "learning_rate": 2.2531021713037655e-06, "loss": 0.010423216223716735, "step": 83760 }, { "epoch": 0.7883764705882353, "grad_norm": 0.6138845511618357, "learning_rate": 2.2530349247600515e-06, "loss": 0.016091585159301758, "step": 83765 }, { "epoch": 0.7884235294117647, "grad_norm": 0.4021417685776906, "learning_rate": 2.2529676842371395e-06, "loss": 0.014815208315849305, "step": 83770 }, { "epoch": 0.7884705882352941, "grad_norm": 0.4934810876439133, "learning_rate": 2.252900449734132e-06, "loss": 0.017127540707588196, "step": 83775 }, { "epoch": 0.7885176470588235, "grad_norm": 0.5901788016378701, "learning_rate": 2.25283322125013e-06, "loss": 0.014985063672065735, "step": 83780 }, { "epoch": 0.788564705882353, "grad_norm": 0.374309818724141, "learning_rate": 2.252765998784236e-06, "loss": 0.011574208736419678, "step": 83785 }, { "epoch": 0.7886117647058823, "grad_norm": 0.553146013546805, "learning_rate": 2.252698782335552e-06, "loss": 0.02016497850418091, "step": 83790 }, { "epoch": 0.7886588235294117, "grad_norm": 0.4739656907090261, "learning_rate": 2.2526315719031803e-06, "loss": 0.014607906341552734, "step": 83795 }, { "epoch": 0.7887058823529411, "grad_norm": 0.6354340015686498, "learning_rate": 2.2525643674862243e-06, "loss": 0.016934943199157716, "step": 83800 }, { "epoch": 0.7887529411764705, "grad_norm": 0.5547737591767865, "learning_rate": 2.2524971690837854e-06, "loss": 0.01610236018896103, "step": 83805 }, { "epoch": 0.7888, "grad_norm": 0.5326770441199068, "learning_rate": 2.2524299766949674e-06, "loss": 0.014390289783477783, "step": 83810 }, { "epoch": 0.7888470588235295, "grad_norm": 0.5130063130981581, "learning_rate": 2.2523627903188733e-06, "loss": 0.010119101405143738, "step": 83815 }, { "epoch": 0.7888941176470589, "grad_norm": 0.35926373129170697, "learning_rate": 2.2522956099546058e-06, "loss": 0.010234251618385315, "step": 83820 }, { "epoch": 0.7889411764705883, "grad_norm": 0.3526147072308864, "learning_rate": 2.2522284356012694e-06, "loss": 0.011712226271629333, "step": 83825 }, { "epoch": 0.7889882352941177, "grad_norm": 0.5563508549462937, "learning_rate": 2.2521612672579673e-06, "loss": 0.013299039006233216, "step": 83830 }, { "epoch": 0.7890352941176471, "grad_norm": 0.4705564752788515, "learning_rate": 2.252094104923803e-06, "loss": 0.00987970232963562, "step": 83835 }, { "epoch": 0.7890823529411765, "grad_norm": 0.4052094292509566, "learning_rate": 2.252026948597881e-06, "loss": 0.0152602881193161, "step": 83840 }, { "epoch": 0.7891294117647059, "grad_norm": 0.5245043632502386, "learning_rate": 2.2519597982793056e-06, "loss": 0.018911872804164887, "step": 83845 }, { "epoch": 0.7891764705882353, "grad_norm": 0.42014203702419445, "learning_rate": 2.251892653967181e-06, "loss": 0.014807474613189698, "step": 83850 }, { "epoch": 0.7892235294117647, "grad_norm": 0.4954471176163755, "learning_rate": 2.2518255156606117e-06, "loss": 0.01306910663843155, "step": 83855 }, { "epoch": 0.7892705882352942, "grad_norm": 0.4254496303900178, "learning_rate": 2.251758383358703e-06, "loss": 0.014195472002029419, "step": 83860 }, { "epoch": 0.7893176470588236, "grad_norm": 0.5452783041696477, "learning_rate": 2.2516912570605594e-06, "loss": 0.016005712747573852, "step": 83865 }, { "epoch": 0.789364705882353, "grad_norm": 0.4475960855365549, "learning_rate": 2.2516241367652863e-06, "loss": 0.02124616801738739, "step": 83870 }, { "epoch": 0.7894117647058824, "grad_norm": 0.6803271529822574, "learning_rate": 2.2515570224719883e-06, "loss": 0.014989069104194641, "step": 83875 }, { "epoch": 0.7894588235294118, "grad_norm": 0.5015836345108688, "learning_rate": 2.2514899141797726e-06, "loss": 0.015599001944065095, "step": 83880 }, { "epoch": 0.7895058823529412, "grad_norm": 0.459210046470499, "learning_rate": 2.2514228118877435e-06, "loss": 0.01422801911830902, "step": 83885 }, { "epoch": 0.7895529411764706, "grad_norm": 0.4355852581066968, "learning_rate": 2.2513557155950073e-06, "loss": 0.011598330736160279, "step": 83890 }, { "epoch": 0.7896, "grad_norm": 0.8251299186663203, "learning_rate": 2.2512886253006705e-06, "loss": 0.01686139404773712, "step": 83895 }, { "epoch": 0.7896470588235294, "grad_norm": 0.4610273211181646, "learning_rate": 2.251221541003839e-06, "loss": 0.01313754916191101, "step": 83900 }, { "epoch": 0.7896941176470588, "grad_norm": 0.7774301143623621, "learning_rate": 2.251154462703619e-06, "loss": 0.013353596627712249, "step": 83905 }, { "epoch": 0.7897411764705883, "grad_norm": 0.3743702846164502, "learning_rate": 2.2510873903991182e-06, "loss": 0.014231276512145997, "step": 83910 }, { "epoch": 0.7897882352941177, "grad_norm": 0.7177870150538074, "learning_rate": 2.251020324089442e-06, "loss": 0.014855889976024628, "step": 83915 }, { "epoch": 0.7898352941176471, "grad_norm": 0.5779958642284104, "learning_rate": 2.250953263773699e-06, "loss": 0.01761784553527832, "step": 83920 }, { "epoch": 0.7898823529411765, "grad_norm": 0.4516594299240157, "learning_rate": 2.2508862094509952e-06, "loss": 0.022985224425792695, "step": 83925 }, { "epoch": 0.7899294117647059, "grad_norm": 0.42801917039567855, "learning_rate": 2.2508191611204384e-06, "loss": 0.019364088773727417, "step": 83930 }, { "epoch": 0.7899764705882353, "grad_norm": 0.27609747304241883, "learning_rate": 2.250752118781136e-06, "loss": 0.012928569316864013, "step": 83935 }, { "epoch": 0.7900235294117647, "grad_norm": 0.5673371547899111, "learning_rate": 2.2506850824321966e-06, "loss": 0.015098947286605834, "step": 83940 }, { "epoch": 0.7900705882352941, "grad_norm": 0.3557310382411572, "learning_rate": 2.2506180520727275e-06, "loss": 0.012049703299999237, "step": 83945 }, { "epoch": 0.7901176470588235, "grad_norm": 0.5098881547960525, "learning_rate": 2.250551027701837e-06, "loss": 0.0126572847366333, "step": 83950 }, { "epoch": 0.790164705882353, "grad_norm": 0.612241707273563, "learning_rate": 2.2504840093186337e-06, "loss": 0.011894060671329499, "step": 83955 }, { "epoch": 0.7902117647058824, "grad_norm": 0.42960411008021654, "learning_rate": 2.2504169969222252e-06, "loss": 0.014083415269851685, "step": 83960 }, { "epoch": 0.7902588235294118, "grad_norm": 0.2970731994435227, "learning_rate": 2.250349990511721e-06, "loss": 0.01362941861152649, "step": 83965 }, { "epoch": 0.7903058823529412, "grad_norm": 0.6917388737606177, "learning_rate": 2.2502829900862303e-06, "loss": 0.0221463680267334, "step": 83970 }, { "epoch": 0.7903529411764706, "grad_norm": 0.3743829537844807, "learning_rate": 2.250215995644861e-06, "loss": 0.017620638012886047, "step": 83975 }, { "epoch": 0.7904, "grad_norm": 0.43002977481941224, "learning_rate": 2.2501490071867236e-06, "loss": 0.010365522652864455, "step": 83980 }, { "epoch": 0.7904470588235294, "grad_norm": 0.5459160433484899, "learning_rate": 2.250082024710927e-06, "loss": 0.013936188817024232, "step": 83985 }, { "epoch": 0.7904941176470588, "grad_norm": 0.5339715361121835, "learning_rate": 2.250015048216581e-06, "loss": 0.016508294641971587, "step": 83990 }, { "epoch": 0.7905411764705882, "grad_norm": 0.42532323762938673, "learning_rate": 2.2499480777027947e-06, "loss": 0.014751285314559937, "step": 83995 }, { "epoch": 0.7905882352941176, "grad_norm": 0.9337882061336982, "learning_rate": 2.2498811131686792e-06, "loss": 0.012721502780914306, "step": 84000 }, { "epoch": 0.7906352941176471, "grad_norm": 0.4503678233410306, "learning_rate": 2.2498141546133445e-06, "loss": 0.012017978727817536, "step": 84005 }, { "epoch": 0.7906823529411765, "grad_norm": 0.39686629808419116, "learning_rate": 2.2497472020359004e-06, "loss": 0.016884922981262207, "step": 84010 }, { "epoch": 0.7907294117647059, "grad_norm": 0.6405768054749161, "learning_rate": 2.249680255435458e-06, "loss": 0.01386641263961792, "step": 84015 }, { "epoch": 0.7907764705882353, "grad_norm": 0.5004164089475379, "learning_rate": 2.249613314811127e-06, "loss": 0.015556116402149201, "step": 84020 }, { "epoch": 0.7908235294117647, "grad_norm": 0.42518636006809774, "learning_rate": 2.24954638016202e-06, "loss": 0.01888214945793152, "step": 84025 }, { "epoch": 0.7908705882352941, "grad_norm": 0.4880405046006576, "learning_rate": 2.2494794514872476e-06, "loss": 0.013417604565620422, "step": 84030 }, { "epoch": 0.7909176470588235, "grad_norm": 0.41949729128947794, "learning_rate": 2.2494125287859205e-06, "loss": 0.016697971522808074, "step": 84035 }, { "epoch": 0.7909647058823529, "grad_norm": 0.5232330822487111, "learning_rate": 2.24934561205715e-06, "loss": 0.013075941801071167, "step": 84040 }, { "epoch": 0.7910117647058823, "grad_norm": 0.3580657466930677, "learning_rate": 2.2492787013000482e-06, "loss": 0.015227386355400085, "step": 84045 }, { "epoch": 0.7910588235294118, "grad_norm": 0.438803219138227, "learning_rate": 2.2492117965137276e-06, "loss": 0.015708789229393005, "step": 84050 }, { "epoch": 0.7911058823529412, "grad_norm": 0.49285683948581244, "learning_rate": 2.2491448976972995e-06, "loss": 0.012472379952669144, "step": 84055 }, { "epoch": 0.7911529411764706, "grad_norm": 0.44879597939266536, "learning_rate": 2.249078004849876e-06, "loss": 0.014735208451747894, "step": 84060 }, { "epoch": 0.7912, "grad_norm": 0.7222697524832854, "learning_rate": 2.2490111179705706e-06, "loss": 0.014795354008674622, "step": 84065 }, { "epoch": 0.7912470588235294, "grad_norm": 0.45972187342202264, "learning_rate": 2.2489442370584944e-06, "loss": 0.012358614802360534, "step": 84070 }, { "epoch": 0.7912941176470588, "grad_norm": 0.4544026641077043, "learning_rate": 2.2488773621127613e-06, "loss": 0.01289432793855667, "step": 84075 }, { "epoch": 0.7913411764705882, "grad_norm": 0.4321778583843472, "learning_rate": 2.248810493132484e-06, "loss": 0.013913708925247192, "step": 84080 }, { "epoch": 0.7913882352941176, "grad_norm": 0.35481756152329685, "learning_rate": 2.248743630116775e-06, "loss": 0.017639511823654176, "step": 84085 }, { "epoch": 0.791435294117647, "grad_norm": 0.5336668951861457, "learning_rate": 2.248676773064749e-06, "loss": 0.015552377700805664, "step": 84090 }, { "epoch": 0.7914823529411765, "grad_norm": 0.5496611802460883, "learning_rate": 2.248609921975518e-06, "loss": 0.012301240861415864, "step": 84095 }, { "epoch": 0.7915294117647059, "grad_norm": 0.3545589140003687, "learning_rate": 2.2485430768481965e-06, "loss": 0.01447947919368744, "step": 84100 }, { "epoch": 0.7915764705882353, "grad_norm": 0.7056622923079542, "learning_rate": 2.248476237681899e-06, "loss": 0.02194719612598419, "step": 84105 }, { "epoch": 0.7916235294117647, "grad_norm": 0.4872872712433435, "learning_rate": 2.2484094044757388e-06, "loss": 0.014962029457092286, "step": 84110 }, { "epoch": 0.7916705882352941, "grad_norm": 0.387460822405936, "learning_rate": 2.2483425772288297e-06, "loss": 0.012297070771455764, "step": 84115 }, { "epoch": 0.7917176470588235, "grad_norm": 0.6098442262647487, "learning_rate": 2.248275755940287e-06, "loss": 0.013078130781650543, "step": 84120 }, { "epoch": 0.7917647058823529, "grad_norm": 0.32707274421748933, "learning_rate": 2.248208940609225e-06, "loss": 0.013872267305850982, "step": 84125 }, { "epoch": 0.7918117647058823, "grad_norm": 0.406879794237852, "learning_rate": 2.2481421312347587e-06, "loss": 0.016916343569755556, "step": 84130 }, { "epoch": 0.7918588235294117, "grad_norm": 0.502592575880032, "learning_rate": 2.248075327816003e-06, "loss": 0.013189341127872466, "step": 84135 }, { "epoch": 0.7919058823529411, "grad_norm": 0.4801383947520627, "learning_rate": 2.2480085303520728e-06, "loss": 0.01175302192568779, "step": 84140 }, { "epoch": 0.7919529411764706, "grad_norm": 0.49922096728582904, "learning_rate": 2.247941738842084e-06, "loss": 0.018336760997772216, "step": 84145 }, { "epoch": 0.792, "grad_norm": 0.4999259424303817, "learning_rate": 2.2478749532851517e-06, "loss": 0.01638123095035553, "step": 84150 }, { "epoch": 0.7920470588235294, "grad_norm": 0.450534290360668, "learning_rate": 2.2478081736803917e-06, "loss": 0.013977861404418946, "step": 84155 }, { "epoch": 0.7920941176470588, "grad_norm": 0.726638125578063, "learning_rate": 2.2477414000269203e-06, "loss": 0.01712618172168732, "step": 84160 }, { "epoch": 0.7921411764705882, "grad_norm": 0.42473839736657737, "learning_rate": 2.247674632323853e-06, "loss": 0.015969252586364745, "step": 84165 }, { "epoch": 0.7921882352941176, "grad_norm": 0.435525877287074, "learning_rate": 2.2476078705703067e-06, "loss": 0.016298171877861024, "step": 84170 }, { "epoch": 0.792235294117647, "grad_norm": 0.4573010988410259, "learning_rate": 2.2475411147653975e-06, "loss": 0.011269405484199524, "step": 84175 }, { "epoch": 0.7922823529411764, "grad_norm": 0.5027737213810528, "learning_rate": 2.2474743649082423e-06, "loss": 0.012597192823886872, "step": 84180 }, { "epoch": 0.7923294117647058, "grad_norm": 0.4871589716705852, "learning_rate": 2.2474076209979577e-06, "loss": 0.014972345530986786, "step": 84185 }, { "epoch": 0.7923764705882353, "grad_norm": 0.5392665500244508, "learning_rate": 2.247340883033661e-06, "loss": 0.016198897361755372, "step": 84190 }, { "epoch": 0.7924235294117647, "grad_norm": 0.734489915888876, "learning_rate": 2.247274151014469e-06, "loss": 0.013784533739089966, "step": 84195 }, { "epoch": 0.7924705882352941, "grad_norm": 0.49032220396181153, "learning_rate": 2.2472074249394994e-06, "loss": 0.017333841323852538, "step": 84200 }, { "epoch": 0.7925176470588235, "grad_norm": 0.319346749587699, "learning_rate": 2.2471407048078694e-06, "loss": 0.011983823031187057, "step": 84205 }, { "epoch": 0.792564705882353, "grad_norm": 0.3433526692684478, "learning_rate": 2.2470739906186974e-06, "loss": 0.0152716726064682, "step": 84210 }, { "epoch": 0.7926117647058823, "grad_norm": 0.6818418638953361, "learning_rate": 2.247007282371101e-06, "loss": 0.01438983529806137, "step": 84215 }, { "epoch": 0.7926588235294117, "grad_norm": 0.5318129727153005, "learning_rate": 2.246940580064198e-06, "loss": 0.017474082112312318, "step": 84220 }, { "epoch": 0.7927058823529411, "grad_norm": 0.5114059589467601, "learning_rate": 2.2468738836971075e-06, "loss": 0.016084693372249603, "step": 84225 }, { "epoch": 0.7927529411764705, "grad_norm": 0.45885097388291896, "learning_rate": 2.2468071932689473e-06, "loss": 0.01307048201560974, "step": 84230 }, { "epoch": 0.7928, "grad_norm": 0.3554410071336057, "learning_rate": 2.2467405087788358e-06, "loss": 0.01619393825531006, "step": 84235 }, { "epoch": 0.7928470588235295, "grad_norm": 0.3950649296333548, "learning_rate": 2.2466738302258933e-06, "loss": 0.012118585407733917, "step": 84240 }, { "epoch": 0.7928941176470589, "grad_norm": 0.41299476831694376, "learning_rate": 2.2466071576092367e-06, "loss": 0.014747889339923858, "step": 84245 }, { "epoch": 0.7929411764705883, "grad_norm": 0.48579195954484433, "learning_rate": 2.246540490927987e-06, "loss": 0.011826993525028228, "step": 84250 }, { "epoch": 0.7929882352941177, "grad_norm": 0.3731402647543058, "learning_rate": 2.2464738301812627e-06, "loss": 0.014127486944198608, "step": 84255 }, { "epoch": 0.7930352941176471, "grad_norm": 0.8266930845706548, "learning_rate": 2.246407175368184e-06, "loss": 0.019349367916584016, "step": 84260 }, { "epoch": 0.7930823529411765, "grad_norm": 0.37323079176946583, "learning_rate": 2.2463405264878705e-06, "loss": 0.013884159922599792, "step": 84265 }, { "epoch": 0.7931294117647059, "grad_norm": 0.49226667368511995, "learning_rate": 2.2462738835394416e-06, "loss": 0.013771709799766541, "step": 84270 }, { "epoch": 0.7931764705882353, "grad_norm": 0.4565534385791206, "learning_rate": 2.246207246522018e-06, "loss": 0.01337628960609436, "step": 84275 }, { "epoch": 0.7932235294117647, "grad_norm": 0.47806783805501935, "learning_rate": 2.24614061543472e-06, "loss": 0.01597311645746231, "step": 84280 }, { "epoch": 0.7932705882352942, "grad_norm": 0.41500813121843555, "learning_rate": 2.2460739902766675e-06, "loss": 0.013720965385437012, "step": 84285 }, { "epoch": 0.7933176470588236, "grad_norm": 0.5520251994661365, "learning_rate": 2.246007371046982e-06, "loss": 0.016641995310783385, "step": 84290 }, { "epoch": 0.793364705882353, "grad_norm": 0.6565690431186747, "learning_rate": 2.2459407577447845e-06, "loss": 0.01727881133556366, "step": 84295 }, { "epoch": 0.7934117647058824, "grad_norm": 0.570797259234101, "learning_rate": 2.2458741503691946e-06, "loss": 0.01587620973587036, "step": 84300 }, { "epoch": 0.7934588235294118, "grad_norm": 0.2829303094866281, "learning_rate": 2.245807548919335e-06, "loss": 0.016133622825145723, "step": 84305 }, { "epoch": 0.7935058823529412, "grad_norm": 0.3391862782236627, "learning_rate": 2.2457409533943266e-06, "loss": 0.01313641220331192, "step": 84310 }, { "epoch": 0.7935529411764706, "grad_norm": 0.504371999844526, "learning_rate": 2.2456743637932906e-06, "loss": 0.01638251543045044, "step": 84315 }, { "epoch": 0.7936, "grad_norm": 0.28674425770897466, "learning_rate": 2.2456077801153502e-06, "loss": 0.016467732191085816, "step": 84320 }, { "epoch": 0.7936470588235294, "grad_norm": 0.43993436167522537, "learning_rate": 2.2455412023596253e-06, "loss": 0.01466420739889145, "step": 84325 }, { "epoch": 0.7936941176470588, "grad_norm": 0.38977696892687413, "learning_rate": 2.2454746305252392e-06, "loss": 0.010166692733764648, "step": 84330 }, { "epoch": 0.7937411764705883, "grad_norm": 0.6727262361695165, "learning_rate": 2.245408064611315e-06, "loss": 0.015223126113414764, "step": 84335 }, { "epoch": 0.7937882352941177, "grad_norm": 0.5650953558015996, "learning_rate": 2.2453415046169734e-06, "loss": 0.014120221138000488, "step": 84340 }, { "epoch": 0.7938352941176471, "grad_norm": 0.33049396229643957, "learning_rate": 2.245274950541338e-06, "loss": 0.011520943790674209, "step": 84345 }, { "epoch": 0.7938823529411765, "grad_norm": 0.6754496923730375, "learning_rate": 2.245208402383532e-06, "loss": 0.01290154606103897, "step": 84350 }, { "epoch": 0.7939294117647059, "grad_norm": 0.5333672186546972, "learning_rate": 2.245141860142678e-06, "loss": 0.014786508679389954, "step": 84355 }, { "epoch": 0.7939764705882353, "grad_norm": 0.33603691236598626, "learning_rate": 2.2450753238178995e-06, "loss": 0.011533963680267333, "step": 84360 }, { "epoch": 0.7940235294117647, "grad_norm": 0.5077104159645214, "learning_rate": 2.24500879340832e-06, "loss": 0.016158407926559447, "step": 84365 }, { "epoch": 0.7940705882352941, "grad_norm": 0.4845510944261286, "learning_rate": 2.2449422689130625e-06, "loss": 0.01527792364358902, "step": 84370 }, { "epoch": 0.7941176470588235, "grad_norm": 0.6603068530532417, "learning_rate": 2.244875750331251e-06, "loss": 0.015576770901679993, "step": 84375 }, { "epoch": 0.794164705882353, "grad_norm": 0.36520952566304826, "learning_rate": 2.24480923766201e-06, "loss": 0.016487473249435426, "step": 84380 }, { "epoch": 0.7942117647058824, "grad_norm": 0.5386358584205949, "learning_rate": 2.2447427309044636e-06, "loss": 0.011941885948181153, "step": 84385 }, { "epoch": 0.7942588235294118, "grad_norm": 0.44903469100721344, "learning_rate": 2.244676230057735e-06, "loss": 0.014281612634658814, "step": 84390 }, { "epoch": 0.7943058823529412, "grad_norm": 0.41928933733714, "learning_rate": 2.24460973512095e-06, "loss": 0.009828799217939378, "step": 84395 }, { "epoch": 0.7943529411764706, "grad_norm": 0.47331670434333406, "learning_rate": 2.2445432460932323e-06, "loss": 0.015390637516975402, "step": 84400 }, { "epoch": 0.7944, "grad_norm": 0.5036923603858627, "learning_rate": 2.2444767629737076e-06, "loss": 0.017867374420166015, "step": 84405 }, { "epoch": 0.7944470588235294, "grad_norm": 0.41318216688451065, "learning_rate": 2.244410285761501e-06, "loss": 0.013896486163139344, "step": 84410 }, { "epoch": 0.7944941176470588, "grad_norm": 0.3981592177890311, "learning_rate": 2.2443438144557365e-06, "loss": 0.014691494405269623, "step": 84415 }, { "epoch": 0.7945411764705882, "grad_norm": 0.343929081808088, "learning_rate": 2.244277349055541e-06, "loss": 0.012087533622980118, "step": 84420 }, { "epoch": 0.7945882352941176, "grad_norm": 0.496735974927661, "learning_rate": 2.2442108895600388e-06, "loss": 0.01771382838487625, "step": 84425 }, { "epoch": 0.7946352941176471, "grad_norm": 0.5748456717162058, "learning_rate": 2.244144435968357e-06, "loss": 0.015926721692085265, "step": 84430 }, { "epoch": 0.7946823529411765, "grad_norm": 0.42399511071627966, "learning_rate": 2.244077988279621e-06, "loss": 0.014262549579143524, "step": 84435 }, { "epoch": 0.7947294117647059, "grad_norm": 0.41708140335210936, "learning_rate": 2.2440115464929564e-06, "loss": 0.014984813332557679, "step": 84440 }, { "epoch": 0.7947764705882353, "grad_norm": 0.28408475122419696, "learning_rate": 2.24394511060749e-06, "loss": 0.01240074187517166, "step": 84445 }, { "epoch": 0.7948235294117647, "grad_norm": 0.46503679381859897, "learning_rate": 2.2438786806223485e-06, "loss": 0.01305142641067505, "step": 84450 }, { "epoch": 0.7948705882352941, "grad_norm": 0.5225616616297108, "learning_rate": 2.243812256536658e-06, "loss": 0.019227313995361327, "step": 84455 }, { "epoch": 0.7949176470588235, "grad_norm": 0.4979999943799903, "learning_rate": 2.243745838349546e-06, "loss": 0.01370031237602234, "step": 84460 }, { "epoch": 0.7949647058823529, "grad_norm": 0.4055175803665546, "learning_rate": 2.2436794260601393e-06, "loss": 0.015376105904579163, "step": 84465 }, { "epoch": 0.7950117647058823, "grad_norm": 0.40352563088272714, "learning_rate": 2.2436130196675644e-06, "loss": 0.0144828662276268, "step": 84470 }, { "epoch": 0.7950588235294118, "grad_norm": 0.47608927158540965, "learning_rate": 2.2435466191709503e-06, "loss": 0.015619739890098572, "step": 84475 }, { "epoch": 0.7951058823529412, "grad_norm": 0.6102419317375906, "learning_rate": 2.243480224569423e-06, "loss": 0.015273855626583099, "step": 84480 }, { "epoch": 0.7951529411764706, "grad_norm": 0.6354691540732598, "learning_rate": 2.243413835862111e-06, "loss": 0.015771925449371338, "step": 84485 }, { "epoch": 0.7952, "grad_norm": 0.38197060313472164, "learning_rate": 2.2433474530481427e-06, "loss": 0.01495664268732071, "step": 84490 }, { "epoch": 0.7952470588235294, "grad_norm": 0.5602722739177716, "learning_rate": 2.2432810761266447e-06, "loss": 0.01448158621788025, "step": 84495 }, { "epoch": 0.7952941176470588, "grad_norm": 0.4495380306953589, "learning_rate": 2.2432147050967475e-06, "loss": 0.011409324407577515, "step": 84500 }, { "epoch": 0.7953411764705882, "grad_norm": 0.5368954145315533, "learning_rate": 2.243148339957578e-06, "loss": 0.014562290906906129, "step": 84505 }, { "epoch": 0.7953882352941176, "grad_norm": 0.44872328548600987, "learning_rate": 2.2430819807082644e-06, "loss": 0.01791727840900421, "step": 84510 }, { "epoch": 0.795435294117647, "grad_norm": 0.5238095647661354, "learning_rate": 2.2430156273479374e-06, "loss": 0.01663903295993805, "step": 84515 }, { "epoch": 0.7954823529411764, "grad_norm": 0.37894246533997056, "learning_rate": 2.242949279875725e-06, "loss": 0.0167059525847435, "step": 84520 }, { "epoch": 0.7955294117647059, "grad_norm": 0.5924051893355037, "learning_rate": 2.2428829382907562e-06, "loss": 0.013031128048896789, "step": 84525 }, { "epoch": 0.7955764705882353, "grad_norm": 0.42197061640893396, "learning_rate": 2.242816602592161e-06, "loss": 0.013730329275131226, "step": 84530 }, { "epoch": 0.7956235294117647, "grad_norm": 0.8242384395248299, "learning_rate": 2.2427502727790683e-06, "loss": 0.01373235583305359, "step": 84535 }, { "epoch": 0.7956705882352941, "grad_norm": 0.4623133423584264, "learning_rate": 2.242683948850608e-06, "loss": 0.012352235615253448, "step": 84540 }, { "epoch": 0.7957176470588235, "grad_norm": 0.4680460068263659, "learning_rate": 2.2426176308059105e-06, "loss": 0.020151284337043763, "step": 84545 }, { "epoch": 0.7957647058823529, "grad_norm": 0.6228021080577069, "learning_rate": 2.2425513186441056e-06, "loss": 0.022146591544151308, "step": 84550 }, { "epoch": 0.7958117647058823, "grad_norm": 0.43767062600992546, "learning_rate": 2.2424850123643237e-06, "loss": 0.02064107060432434, "step": 84555 }, { "epoch": 0.7958588235294117, "grad_norm": 0.41017599270254385, "learning_rate": 2.242418711965695e-06, "loss": 0.016783221065998076, "step": 84560 }, { "epoch": 0.7959058823529411, "grad_norm": 0.4282877655790145, "learning_rate": 2.2423524174473503e-06, "loss": 0.014358057081699372, "step": 84565 }, { "epoch": 0.7959529411764706, "grad_norm": 0.3323236809180409, "learning_rate": 2.2422861288084207e-06, "loss": 0.01014457643032074, "step": 84570 }, { "epoch": 0.796, "grad_norm": 0.5002894046013903, "learning_rate": 2.242219846048037e-06, "loss": 0.012135882675647736, "step": 84575 }, { "epoch": 0.7960470588235294, "grad_norm": 0.38942055686685284, "learning_rate": 2.24215356916533e-06, "loss": 0.016538926959037782, "step": 84580 }, { "epoch": 0.7960941176470588, "grad_norm": 0.4780747075103832, "learning_rate": 2.2420872981594323e-06, "loss": 0.016828957200050353, "step": 84585 }, { "epoch": 0.7961411764705882, "grad_norm": 0.5629424465458954, "learning_rate": 2.242021033029474e-06, "loss": 0.011580422520637512, "step": 84590 }, { "epoch": 0.7961882352941176, "grad_norm": 0.704477891011487, "learning_rate": 2.2419547737745875e-06, "loss": 0.017776186764240264, "step": 84595 }, { "epoch": 0.796235294117647, "grad_norm": 0.6826195210831889, "learning_rate": 2.2418885203939046e-06, "loss": 0.01821070909500122, "step": 84600 }, { "epoch": 0.7962823529411764, "grad_norm": 0.5501610378645694, "learning_rate": 2.241822272886558e-06, "loss": 0.01766153275966644, "step": 84605 }, { "epoch": 0.7963294117647058, "grad_norm": 0.3989483814316202, "learning_rate": 2.2417560312516786e-06, "loss": 0.01328597664833069, "step": 84610 }, { "epoch": 0.7963764705882352, "grad_norm": 0.6638993561055153, "learning_rate": 2.2416897954884e-06, "loss": 0.015779057145118715, "step": 84615 }, { "epoch": 0.7964235294117648, "grad_norm": 0.46665231487780384, "learning_rate": 2.2416235655958543e-06, "loss": 0.011285410076379777, "step": 84620 }, { "epoch": 0.7964705882352942, "grad_norm": 0.4231301220782857, "learning_rate": 2.241557341573175e-06, "loss": 0.02307345122098923, "step": 84625 }, { "epoch": 0.7965176470588236, "grad_norm": 0.4684804914238384, "learning_rate": 2.241491123419494e-06, "loss": 0.016898369789123534, "step": 84630 }, { "epoch": 0.796564705882353, "grad_norm": 0.5356355769999641, "learning_rate": 2.2414249111339452e-06, "loss": 0.013325035572052002, "step": 84635 }, { "epoch": 0.7966117647058824, "grad_norm": 0.38830144768229835, "learning_rate": 2.241358704715662e-06, "loss": 0.015831127762794495, "step": 84640 }, { "epoch": 0.7966588235294118, "grad_norm": 0.584927622317091, "learning_rate": 2.2412925041637776e-06, "loss": 0.015482109785079957, "step": 84645 }, { "epoch": 0.7967058823529412, "grad_norm": 0.47883578043965475, "learning_rate": 2.2412263094774257e-06, "loss": 0.014809067547321319, "step": 84650 }, { "epoch": 0.7967529411764706, "grad_norm": 0.44330013124508155, "learning_rate": 2.2411601206557407e-06, "loss": 0.015890969336032866, "step": 84655 }, { "epoch": 0.7968, "grad_norm": 0.602230642309197, "learning_rate": 2.2410939376978556e-06, "loss": 0.014787781238555908, "step": 84660 }, { "epoch": 0.7968470588235295, "grad_norm": 0.47053435406656813, "learning_rate": 2.2410277606029063e-06, "loss": 0.01369350403547287, "step": 84665 }, { "epoch": 0.7968941176470589, "grad_norm": 0.6294767531959793, "learning_rate": 2.2409615893700256e-06, "loss": 0.02259039133787155, "step": 84670 }, { "epoch": 0.7969411764705883, "grad_norm": 0.27644118285035185, "learning_rate": 2.2408954239983486e-06, "loss": 0.014402520656585694, "step": 84675 }, { "epoch": 0.7969882352941177, "grad_norm": 0.3941281514470443, "learning_rate": 2.2408292644870106e-06, "loss": 0.012618449330329896, "step": 84680 }, { "epoch": 0.7970352941176471, "grad_norm": 0.4342295692144506, "learning_rate": 2.240763110835146e-06, "loss": 0.015753906965255738, "step": 84685 }, { "epoch": 0.7970823529411765, "grad_norm": 0.44056423602755646, "learning_rate": 2.2406969630418897e-06, "loss": 0.013168084621429443, "step": 84690 }, { "epoch": 0.7971294117647059, "grad_norm": 0.44400300494082096, "learning_rate": 2.240630821106378e-06, "loss": 0.013925713300704957, "step": 84695 }, { "epoch": 0.7971764705882353, "grad_norm": 0.5635507946249562, "learning_rate": 2.2405646850277457e-06, "loss": 0.014563314616680145, "step": 84700 }, { "epoch": 0.7972235294117647, "grad_norm": 0.29628873540911305, "learning_rate": 2.2404985548051286e-06, "loss": 0.012633928656578064, "step": 84705 }, { "epoch": 0.7972705882352941, "grad_norm": 0.5093905437464791, "learning_rate": 2.2404324304376622e-06, "loss": 0.016232040524482728, "step": 84710 }, { "epoch": 0.7973176470588236, "grad_norm": 0.6223436558955127, "learning_rate": 2.240366311924483e-06, "loss": 0.014534065127372741, "step": 84715 }, { "epoch": 0.797364705882353, "grad_norm": 0.5724301040186096, "learning_rate": 2.2403001992647274e-06, "loss": 0.014115771651268006, "step": 84720 }, { "epoch": 0.7974117647058824, "grad_norm": 0.44035024772943965, "learning_rate": 2.240234092457531e-06, "loss": 0.013261052966117858, "step": 84725 }, { "epoch": 0.7974588235294118, "grad_norm": 0.37910363170891614, "learning_rate": 2.240167991502031e-06, "loss": 0.017192035913467407, "step": 84730 }, { "epoch": 0.7975058823529412, "grad_norm": 0.48334068889240034, "learning_rate": 2.240101896397364e-06, "loss": 0.01249801516532898, "step": 84735 }, { "epoch": 0.7975529411764706, "grad_norm": 0.5986357145947678, "learning_rate": 2.240035807142667e-06, "loss": 0.014270713925361634, "step": 84740 }, { "epoch": 0.7976, "grad_norm": 0.3457605836729522, "learning_rate": 2.2399697237370768e-06, "loss": 0.015701285004615782, "step": 84745 }, { "epoch": 0.7976470588235294, "grad_norm": 0.5815824495736394, "learning_rate": 2.239903646179731e-06, "loss": 0.01662530153989792, "step": 84750 }, { "epoch": 0.7976941176470588, "grad_norm": 0.713625759697948, "learning_rate": 2.239837574469767e-06, "loss": 0.013486933708190919, "step": 84755 }, { "epoch": 0.7977411764705883, "grad_norm": 0.4920303440701536, "learning_rate": 2.239771508606322e-06, "loss": 0.015549373626708985, "step": 84760 }, { "epoch": 0.7977882352941177, "grad_norm": 0.292207901633946, "learning_rate": 2.2397054485885344e-06, "loss": 0.011985423415899277, "step": 84765 }, { "epoch": 0.7978352941176471, "grad_norm": 0.4484782643395968, "learning_rate": 2.239639394415542e-06, "loss": 0.01728551685810089, "step": 84770 }, { "epoch": 0.7978823529411765, "grad_norm": 0.39687079498153427, "learning_rate": 2.2395733460864827e-06, "loss": 0.01209498792886734, "step": 84775 }, { "epoch": 0.7979294117647059, "grad_norm": 0.4925897521523785, "learning_rate": 2.239507303600495e-06, "loss": 0.01393234133720398, "step": 84780 }, { "epoch": 0.7979764705882353, "grad_norm": 0.5425988375813776, "learning_rate": 2.239441266956718e-06, "loss": 0.014268018305301666, "step": 84785 }, { "epoch": 0.7980235294117647, "grad_norm": 0.3876301004827463, "learning_rate": 2.2393752361542892e-06, "loss": 0.012061402201652527, "step": 84790 }, { "epoch": 0.7980705882352941, "grad_norm": 0.6351468832810621, "learning_rate": 2.239309211192348e-06, "loss": 0.01461944580078125, "step": 84795 }, { "epoch": 0.7981176470588235, "grad_norm": 0.34594422036570827, "learning_rate": 2.239243192070034e-06, "loss": 0.01723046600818634, "step": 84800 }, { "epoch": 0.7981647058823529, "grad_norm": 0.49226043375978895, "learning_rate": 2.2391771787864863e-06, "loss": 0.01615961790084839, "step": 84805 }, { "epoch": 0.7982117647058824, "grad_norm": 0.5724165649485871, "learning_rate": 2.2391111713408438e-06, "loss": 0.014214156568050385, "step": 84810 }, { "epoch": 0.7982588235294118, "grad_norm": 0.4158927520296598, "learning_rate": 2.2390451697322464e-06, "loss": 0.024061110615730286, "step": 84815 }, { "epoch": 0.7983058823529412, "grad_norm": 0.4339272605726752, "learning_rate": 2.238979173959834e-06, "loss": 0.017360299825668335, "step": 84820 }, { "epoch": 0.7983529411764706, "grad_norm": 0.3901290264999537, "learning_rate": 2.238913184022746e-06, "loss": 0.011780153214931487, "step": 84825 }, { "epoch": 0.7984, "grad_norm": 0.49462349857600574, "learning_rate": 2.2388471999201233e-06, "loss": 0.015759405493736268, "step": 84830 }, { "epoch": 0.7984470588235294, "grad_norm": 0.45934182100384613, "learning_rate": 2.2387812216511058e-06, "loss": 0.011692275106906892, "step": 84835 }, { "epoch": 0.7984941176470588, "grad_norm": 0.474331425850753, "learning_rate": 2.238715249214834e-06, "loss": 0.012652531266212463, "step": 84840 }, { "epoch": 0.7985411764705882, "grad_norm": 0.46754415375082276, "learning_rate": 2.2386492826104476e-06, "loss": 0.014625644683837891, "step": 84845 }, { "epoch": 0.7985882352941176, "grad_norm": 0.6525862139098169, "learning_rate": 2.238583321837089e-06, "loss": 0.014517749845981597, "step": 84850 }, { "epoch": 0.7986352941176471, "grad_norm": 0.345948220138917, "learning_rate": 2.2385173668938988e-06, "loss": 0.01332319676876068, "step": 84855 }, { "epoch": 0.7986823529411765, "grad_norm": 0.5520055135987693, "learning_rate": 2.2384514177800175e-06, "loss": 0.023197036981582642, "step": 84860 }, { "epoch": 0.7987294117647059, "grad_norm": 0.4536176892419956, "learning_rate": 2.2383854744945873e-06, "loss": 0.01265462338924408, "step": 84865 }, { "epoch": 0.7987764705882353, "grad_norm": 0.3217827122238342, "learning_rate": 2.238319537036749e-06, "loss": 0.012762372195720673, "step": 84870 }, { "epoch": 0.7988235294117647, "grad_norm": 0.2495639056138323, "learning_rate": 2.238253605405644e-06, "loss": 0.010331287980079651, "step": 84875 }, { "epoch": 0.7988705882352941, "grad_norm": 0.3379489700170347, "learning_rate": 2.238187679600416e-06, "loss": 0.011941250413656235, "step": 84880 }, { "epoch": 0.7989176470588235, "grad_norm": 0.39915931787584386, "learning_rate": 2.238121759620205e-06, "loss": 0.013325551152229309, "step": 84885 }, { "epoch": 0.7989647058823529, "grad_norm": 0.489045263762905, "learning_rate": 2.238055845464155e-06, "loss": 0.015222124755382538, "step": 84890 }, { "epoch": 0.7990117647058823, "grad_norm": 0.4141805274401321, "learning_rate": 2.2379899371314072e-06, "loss": 0.011513682454824448, "step": 84895 }, { "epoch": 0.7990588235294117, "grad_norm": 0.5541638519018066, "learning_rate": 2.2379240346211046e-06, "loss": 0.015371263027191162, "step": 84900 }, { "epoch": 0.7991058823529412, "grad_norm": 0.4196723642299563, "learning_rate": 2.2378581379323896e-06, "loss": 0.017321795225143433, "step": 84905 }, { "epoch": 0.7991529411764706, "grad_norm": 0.4509097180986011, "learning_rate": 2.237792247064406e-06, "loss": 0.014917175471782684, "step": 84910 }, { "epoch": 0.7992, "grad_norm": 0.3818421866885619, "learning_rate": 2.237726362016297e-06, "loss": 0.016004417836666108, "step": 84915 }, { "epoch": 0.7992470588235294, "grad_norm": 0.38423553459329157, "learning_rate": 2.2376604827872047e-06, "loss": 0.015192413330078125, "step": 84920 }, { "epoch": 0.7992941176470588, "grad_norm": 0.5428074779554946, "learning_rate": 2.237594609376273e-06, "loss": 0.013835284113883971, "step": 84925 }, { "epoch": 0.7993411764705882, "grad_norm": 0.4869235995614345, "learning_rate": 2.237528741782646e-06, "loss": 0.013034467399120332, "step": 84930 }, { "epoch": 0.7993882352941176, "grad_norm": 0.5142297032464666, "learning_rate": 2.2374628800054674e-06, "loss": 0.01762067973613739, "step": 84935 }, { "epoch": 0.799435294117647, "grad_norm": 0.5728127042281957, "learning_rate": 2.237397024043881e-06, "loss": 0.017294177412986757, "step": 84940 }, { "epoch": 0.7994823529411764, "grad_norm": 0.7782566480880486, "learning_rate": 2.2373311738970312e-06, "loss": 0.018984054028987885, "step": 84945 }, { "epoch": 0.7995294117647059, "grad_norm": 0.5435119386695187, "learning_rate": 2.237265329564062e-06, "loss": 0.012038156390190125, "step": 84950 }, { "epoch": 0.7995764705882353, "grad_norm": 0.38668489759437863, "learning_rate": 2.2371994910441193e-06, "loss": 0.012829041481018067, "step": 84955 }, { "epoch": 0.7996235294117647, "grad_norm": 0.5304744412384081, "learning_rate": 2.237133658336346e-06, "loss": 0.011557596921920776, "step": 84960 }, { "epoch": 0.7996705882352941, "grad_norm": 0.424538456598029, "learning_rate": 2.237067831439888e-06, "loss": 0.01225694864988327, "step": 84965 }, { "epoch": 0.7997176470588235, "grad_norm": 0.5891845311797178, "learning_rate": 2.2370020103538897e-06, "loss": 0.013805583119392395, "step": 84970 }, { "epoch": 0.7997647058823529, "grad_norm": 0.49450651654291744, "learning_rate": 2.2369361950774974e-06, "loss": 0.01418745219707489, "step": 84975 }, { "epoch": 0.7998117647058823, "grad_norm": 0.4434134850059383, "learning_rate": 2.236870385609855e-06, "loss": 0.013906502723693847, "step": 84980 }, { "epoch": 0.7998588235294117, "grad_norm": 0.7187013890977202, "learning_rate": 2.2368045819501096e-06, "loss": 0.01607625186443329, "step": 84985 }, { "epoch": 0.7999058823529411, "grad_norm": 0.455918749677916, "learning_rate": 2.236738784097406e-06, "loss": 0.01239590123295784, "step": 84990 }, { "epoch": 0.7999529411764705, "grad_norm": 0.6015009209511254, "learning_rate": 2.236672992050891e-06, "loss": 0.014037995040416718, "step": 84995 }, { "epoch": 0.8, "grad_norm": 0.6408438722047319, "learning_rate": 2.23660720580971e-06, "loss": 0.018147936463356017, "step": 85000 }, { "epoch": 0.8000470588235294, "grad_norm": 0.2214162207077125, "learning_rate": 2.23654142537301e-06, "loss": 0.012538526952266694, "step": 85005 }, { "epoch": 0.8000941176470588, "grad_norm": 0.7095609755156348, "learning_rate": 2.2364756507399367e-06, "loss": 0.023755164444446565, "step": 85010 }, { "epoch": 0.8001411764705882, "grad_norm": 0.5607042610324786, "learning_rate": 2.2364098819096368e-06, "loss": 0.010477468371391296, "step": 85015 }, { "epoch": 0.8001882352941176, "grad_norm": 0.3841330931710934, "learning_rate": 2.2363441188812576e-06, "loss": 0.013365092873573303, "step": 85020 }, { "epoch": 0.800235294117647, "grad_norm": 0.4333828640321765, "learning_rate": 2.236278361653946e-06, "loss": 0.013974379003047942, "step": 85025 }, { "epoch": 0.8002823529411764, "grad_norm": 0.5120814477062862, "learning_rate": 2.236212610226849e-06, "loss": 0.01383136361837387, "step": 85030 }, { "epoch": 0.8003294117647058, "grad_norm": 0.6060576382418131, "learning_rate": 2.2361468645991143e-06, "loss": 0.016075325012207032, "step": 85035 }, { "epoch": 0.8003764705882352, "grad_norm": 0.2871004599548171, "learning_rate": 2.2360811247698888e-06, "loss": 0.014324790239334107, "step": 85040 }, { "epoch": 0.8004235294117648, "grad_norm": 0.4576928661889814, "learning_rate": 2.236015390738321e-06, "loss": 0.015483894944190979, "step": 85045 }, { "epoch": 0.8004705882352942, "grad_norm": 0.5520327010840859, "learning_rate": 2.2359496625035577e-06, "loss": 0.011659836769104004, "step": 85050 }, { "epoch": 0.8005176470588236, "grad_norm": 0.6563468888814452, "learning_rate": 2.2358839400647483e-06, "loss": 0.013886131346225739, "step": 85055 }, { "epoch": 0.800564705882353, "grad_norm": 1.4392702324190065, "learning_rate": 2.23581822342104e-06, "loss": 0.014340505003929138, "step": 85060 }, { "epoch": 0.8006117647058824, "grad_norm": 0.40167349480916525, "learning_rate": 2.2357525125715816e-06, "loss": 0.012841521203517914, "step": 85065 }, { "epoch": 0.8006588235294118, "grad_norm": 0.2645497513520411, "learning_rate": 2.235686807515522e-06, "loss": 0.017988765239715578, "step": 85070 }, { "epoch": 0.8007058823529412, "grad_norm": 0.510124889715577, "learning_rate": 2.2356211082520094e-06, "loss": 0.014199818670749664, "step": 85075 }, { "epoch": 0.8007529411764706, "grad_norm": 0.41685800203230783, "learning_rate": 2.235555414780193e-06, "loss": 0.013290172815322876, "step": 85080 }, { "epoch": 0.8008, "grad_norm": 0.5620563375598637, "learning_rate": 2.2354897270992216e-06, "loss": 0.012751506268978119, "step": 85085 }, { "epoch": 0.8008470588235294, "grad_norm": 0.4906520757745231, "learning_rate": 2.235424045208245e-06, "loss": 0.013658063113689422, "step": 85090 }, { "epoch": 0.8008941176470589, "grad_norm": 0.4632004352477166, "learning_rate": 2.2353583691064118e-06, "loss": 0.013622128963470459, "step": 85095 }, { "epoch": 0.8009411764705883, "grad_norm": 0.5120687447547504, "learning_rate": 2.2352926987928732e-06, "loss": 0.016798433661460877, "step": 85100 }, { "epoch": 0.8009882352941177, "grad_norm": 0.47838147727850117, "learning_rate": 2.235227034266777e-06, "loss": 0.014858803153038025, "step": 85105 }, { "epoch": 0.8010352941176471, "grad_norm": 0.6655052102912682, "learning_rate": 2.2351613755272754e-06, "loss": 0.013598379492759705, "step": 85110 }, { "epoch": 0.8010823529411765, "grad_norm": 0.25624330384138505, "learning_rate": 2.235095722573516e-06, "loss": 0.014332515001296998, "step": 85115 }, { "epoch": 0.8011294117647059, "grad_norm": 0.49896667024918645, "learning_rate": 2.2350300754046515e-06, "loss": 0.01735255867242813, "step": 85120 }, { "epoch": 0.8011764705882353, "grad_norm": 0.8169535671120118, "learning_rate": 2.2349644340198313e-06, "loss": 0.01631505787372589, "step": 85125 }, { "epoch": 0.8012235294117647, "grad_norm": 0.5646859066916009, "learning_rate": 2.234898798418206e-06, "loss": 0.015990108251571655, "step": 85130 }, { "epoch": 0.8012705882352941, "grad_norm": 0.37463061847176865, "learning_rate": 2.2348331685989266e-06, "loss": 0.01454513967037201, "step": 85135 }, { "epoch": 0.8013176470588236, "grad_norm": 0.3936583647449063, "learning_rate": 2.2347675445611443e-06, "loss": 0.010751909017562867, "step": 85140 }, { "epoch": 0.801364705882353, "grad_norm": 0.5547919404557659, "learning_rate": 2.2347019263040103e-06, "loss": 0.015878117084503172, "step": 85145 }, { "epoch": 0.8014117647058824, "grad_norm": 0.641733265192045, "learning_rate": 2.234636313826676e-06, "loss": 0.016172096133232117, "step": 85150 }, { "epoch": 0.8014588235294118, "grad_norm": 0.5302590140661284, "learning_rate": 2.2345707071282924e-06, "loss": 0.015492329001426696, "step": 85155 }, { "epoch": 0.8015058823529412, "grad_norm": 0.3194259682855182, "learning_rate": 2.234505106208012e-06, "loss": 0.01221017986536026, "step": 85160 }, { "epoch": 0.8015529411764706, "grad_norm": 0.28838563541577666, "learning_rate": 2.234439511064986e-06, "loss": 0.01337834894657135, "step": 85165 }, { "epoch": 0.8016, "grad_norm": 0.41779953550449295, "learning_rate": 2.2343739216983667e-06, "loss": 0.01917058676481247, "step": 85170 }, { "epoch": 0.8016470588235294, "grad_norm": 0.6776350715657387, "learning_rate": 2.234308338107307e-06, "loss": 0.01486228108406067, "step": 85175 }, { "epoch": 0.8016941176470588, "grad_norm": 0.2514076128930533, "learning_rate": 2.234242760290958e-06, "loss": 0.011576396971940994, "step": 85180 }, { "epoch": 0.8017411764705883, "grad_norm": 0.2798437089415463, "learning_rate": 2.234177188248474e-06, "loss": 0.011391930282115936, "step": 85185 }, { "epoch": 0.8017882352941177, "grad_norm": 0.35245866812720195, "learning_rate": 2.2341116219790067e-06, "loss": 0.012995532155036927, "step": 85190 }, { "epoch": 0.8018352941176471, "grad_norm": 0.5306560394084066, "learning_rate": 2.234046061481709e-06, "loss": 0.013138076663017273, "step": 85195 }, { "epoch": 0.8018823529411765, "grad_norm": 0.41951781072061894, "learning_rate": 2.233980506755734e-06, "loss": 0.011962345242500306, "step": 85200 }, { "epoch": 0.8019294117647059, "grad_norm": 0.5052994200710146, "learning_rate": 2.2339149578002363e-06, "loss": 0.01780380606651306, "step": 85205 }, { "epoch": 0.8019764705882353, "grad_norm": 0.7170468031388713, "learning_rate": 2.2338494146143674e-06, "loss": 0.012123891711235046, "step": 85210 }, { "epoch": 0.8020235294117647, "grad_norm": 0.5317437475116683, "learning_rate": 2.233783877197282e-06, "loss": 0.015481686592102051, "step": 85215 }, { "epoch": 0.8020705882352941, "grad_norm": 0.4422187006065717, "learning_rate": 2.233718345548134e-06, "loss": 0.015526849031448364, "step": 85220 }, { "epoch": 0.8021176470588235, "grad_norm": 0.3710170827240237, "learning_rate": 2.233652819666077e-06, "loss": 0.014522027969360352, "step": 85225 }, { "epoch": 0.8021647058823529, "grad_norm": 0.6169034123407501, "learning_rate": 2.233587299550265e-06, "loss": 0.014022809267044068, "step": 85230 }, { "epoch": 0.8022117647058824, "grad_norm": 0.6632605340231263, "learning_rate": 2.233521785199853e-06, "loss": 0.01705460250377655, "step": 85235 }, { "epoch": 0.8022588235294118, "grad_norm": 0.4469509272446183, "learning_rate": 2.2334562766139956e-06, "loss": 0.017431725561618806, "step": 85240 }, { "epoch": 0.8023058823529412, "grad_norm": 1.0131884549727095, "learning_rate": 2.2333907737918466e-06, "loss": 0.018941891193389893, "step": 85245 }, { "epoch": 0.8023529411764706, "grad_norm": 0.28384290780508276, "learning_rate": 2.233325276732561e-06, "loss": 0.013528218865394593, "step": 85250 }, { "epoch": 0.8024, "grad_norm": 0.4899722035180238, "learning_rate": 2.233259785435295e-06, "loss": 0.015240910649299621, "step": 85255 }, { "epoch": 0.8024470588235294, "grad_norm": 0.3011851671603086, "learning_rate": 2.233194299899203e-06, "loss": 0.01343272477388382, "step": 85260 }, { "epoch": 0.8024941176470588, "grad_norm": 0.48864483755503996, "learning_rate": 2.2331288201234396e-06, "loss": 0.015638861060142516, "step": 85265 }, { "epoch": 0.8025411764705882, "grad_norm": 0.4862625263821264, "learning_rate": 2.233063346107161e-06, "loss": 0.011657318472862244, "step": 85270 }, { "epoch": 0.8025882352941176, "grad_norm": 0.5621477406300648, "learning_rate": 2.2329978778495235e-06, "loss": 0.015693354606628417, "step": 85275 }, { "epoch": 0.8026352941176471, "grad_norm": 0.3901333256435683, "learning_rate": 2.232932415349683e-06, "loss": 0.012576419115066528, "step": 85280 }, { "epoch": 0.8026823529411765, "grad_norm": 0.4663823463708826, "learning_rate": 2.2328669586067937e-06, "loss": 0.013267163932323457, "step": 85285 }, { "epoch": 0.8027294117647059, "grad_norm": 0.8489191059433253, "learning_rate": 2.2328015076200142e-06, "loss": 0.01607087254524231, "step": 85290 }, { "epoch": 0.8027764705882353, "grad_norm": 0.3650209852861441, "learning_rate": 2.2327360623885e-06, "loss": 0.01406865417957306, "step": 85295 }, { "epoch": 0.8028235294117647, "grad_norm": 0.5109666123619985, "learning_rate": 2.2326706229114072e-06, "loss": 0.019971728324890137, "step": 85300 }, { "epoch": 0.8028705882352941, "grad_norm": 0.6634090411696129, "learning_rate": 2.232605189187893e-06, "loss": 0.01200818344950676, "step": 85305 }, { "epoch": 0.8029176470588235, "grad_norm": 0.6266735908110224, "learning_rate": 2.232539761217115e-06, "loss": 0.011173036694526673, "step": 85310 }, { "epoch": 0.8029647058823529, "grad_norm": 0.6764752913210044, "learning_rate": 2.2324743389982297e-06, "loss": 0.013776981830596923, "step": 85315 }, { "epoch": 0.8030117647058823, "grad_norm": 0.4694217028446498, "learning_rate": 2.232408922530394e-06, "loss": 0.013474591076374054, "step": 85320 }, { "epoch": 0.8030588235294117, "grad_norm": 0.5476565485329747, "learning_rate": 2.2323435118127656e-06, "loss": 0.011704519391059875, "step": 85325 }, { "epoch": 0.8031058823529412, "grad_norm": 1.0816944949074851, "learning_rate": 2.232278106844502e-06, "loss": 0.015458941459655762, "step": 85330 }, { "epoch": 0.8031529411764706, "grad_norm": 0.5482125447869187, "learning_rate": 2.2322127076247614e-06, "loss": 0.012751518189907074, "step": 85335 }, { "epoch": 0.8032, "grad_norm": 0.7745983338980289, "learning_rate": 2.2321473141527015e-06, "loss": 0.0171986922621727, "step": 85340 }, { "epoch": 0.8032470588235294, "grad_norm": 0.38439486581203824, "learning_rate": 2.232081926427481e-06, "loss": 0.013115517795085907, "step": 85345 }, { "epoch": 0.8032941176470588, "grad_norm": 0.4046724798512837, "learning_rate": 2.2320165444482577e-06, "loss": 0.013664948940277099, "step": 85350 }, { "epoch": 0.8033411764705882, "grad_norm": 0.3949802851163306, "learning_rate": 2.2319511682141904e-06, "loss": 0.0153265118598938, "step": 85355 }, { "epoch": 0.8033882352941176, "grad_norm": 0.47862038959741293, "learning_rate": 2.231885797724437e-06, "loss": 0.014954873919487, "step": 85360 }, { "epoch": 0.803435294117647, "grad_norm": 0.5635117021156081, "learning_rate": 2.2318204329781566e-06, "loss": 0.0163083091378212, "step": 85365 }, { "epoch": 0.8034823529411764, "grad_norm": 0.5707046220144961, "learning_rate": 2.2317550739745096e-06, "loss": 0.014942064881324768, "step": 85370 }, { "epoch": 0.8035294117647059, "grad_norm": 0.5977730859132637, "learning_rate": 2.231689720712653e-06, "loss": 0.011893634498119355, "step": 85375 }, { "epoch": 0.8035764705882353, "grad_norm": 0.5456353418262548, "learning_rate": 2.2316243731917477e-06, "loss": 0.02624564468860626, "step": 85380 }, { "epoch": 0.8036235294117647, "grad_norm": 0.7159565642053453, "learning_rate": 2.2315590314109525e-06, "loss": 0.016090011596679686, "step": 85385 }, { "epoch": 0.8036705882352941, "grad_norm": 0.3719652470012778, "learning_rate": 2.231493695369428e-06, "loss": 0.01502261459827423, "step": 85390 }, { "epoch": 0.8037176470588235, "grad_norm": 0.37634082731715424, "learning_rate": 2.2314283650663326e-06, "loss": 0.0121017187833786, "step": 85395 }, { "epoch": 0.8037647058823529, "grad_norm": 0.43261247991808827, "learning_rate": 2.231363040500828e-06, "loss": 0.011975647509098053, "step": 85400 }, { "epoch": 0.8038117647058823, "grad_norm": 0.43353891634668623, "learning_rate": 2.2312977216720726e-06, "loss": 0.019311973452568056, "step": 85405 }, { "epoch": 0.8038588235294117, "grad_norm": 0.41159474891833453, "learning_rate": 2.231232408579229e-06, "loss": 0.015231457352638245, "step": 85410 }, { "epoch": 0.8039058823529411, "grad_norm": 0.29422891790073585, "learning_rate": 2.2311671012214554e-06, "loss": 0.016581402719020845, "step": 85415 }, { "epoch": 0.8039529411764705, "grad_norm": 0.37268562607801403, "learning_rate": 2.2311017995979138e-06, "loss": 0.014660671353340149, "step": 85420 }, { "epoch": 0.804, "grad_norm": 0.46786242141149803, "learning_rate": 2.2310365037077653e-06, "loss": 0.013555625081062317, "step": 85425 }, { "epoch": 0.8040470588235294, "grad_norm": 0.8674728790182505, "learning_rate": 2.230971213550171e-06, "loss": 0.01695106625556946, "step": 85430 }, { "epoch": 0.8040941176470588, "grad_norm": 0.6517079322177729, "learning_rate": 2.2309059291242906e-06, "loss": 0.01723991185426712, "step": 85435 }, { "epoch": 0.8041411764705882, "grad_norm": 0.4588054510451504, "learning_rate": 2.230840650429288e-06, "loss": 0.015367263555526733, "step": 85440 }, { "epoch": 0.8041882352941176, "grad_norm": 0.4674965728631202, "learning_rate": 2.230775377464323e-06, "loss": 0.012734255194664002, "step": 85445 }, { "epoch": 0.804235294117647, "grad_norm": 0.4755785576740366, "learning_rate": 2.230710110228557e-06, "loss": 0.014611177146434784, "step": 85450 }, { "epoch": 0.8042823529411764, "grad_norm": 0.5694517087179518, "learning_rate": 2.230644848721154e-06, "loss": 0.015898546576499938, "step": 85455 }, { "epoch": 0.8043294117647058, "grad_norm": 0.42205104388572035, "learning_rate": 2.2305795929412746e-06, "loss": 0.01507968008518219, "step": 85460 }, { "epoch": 0.8043764705882352, "grad_norm": 0.5254102136188809, "learning_rate": 2.2305143428880812e-06, "loss": 0.014234663546085357, "step": 85465 }, { "epoch": 0.8044235294117648, "grad_norm": 0.3642706237138145, "learning_rate": 2.2304490985607365e-06, "loss": 0.011801423132419586, "step": 85470 }, { "epoch": 0.8044705882352942, "grad_norm": 0.6640849715156298, "learning_rate": 2.230383859958403e-06, "loss": 0.014051932096481323, "step": 85475 }, { "epoch": 0.8045176470588236, "grad_norm": 0.4117217128664376, "learning_rate": 2.230318627080244e-06, "loss": 0.015224677324295045, "step": 85480 }, { "epoch": 0.804564705882353, "grad_norm": 0.5627318933029568, "learning_rate": 2.2302533999254213e-06, "loss": 0.017623528838157654, "step": 85485 }, { "epoch": 0.8046117647058824, "grad_norm": 0.403480392197227, "learning_rate": 2.2301881784930993e-06, "loss": 0.013149355351924897, "step": 85490 }, { "epoch": 0.8046588235294118, "grad_norm": 0.4473769802418351, "learning_rate": 2.2301229627824405e-06, "loss": 0.015223437547683715, "step": 85495 }, { "epoch": 0.8047058823529412, "grad_norm": 0.6818893890321974, "learning_rate": 2.2300577527926086e-06, "loss": 0.016376671195030213, "step": 85500 }, { "epoch": 0.8047529411764706, "grad_norm": 0.5474277392902853, "learning_rate": 2.229992548522767e-06, "loss": 0.01612983047962189, "step": 85505 }, { "epoch": 0.8048, "grad_norm": 0.31426334948875423, "learning_rate": 2.2299273499720805e-06, "loss": 0.011188492178916931, "step": 85510 }, { "epoch": 0.8048470588235294, "grad_norm": 0.4098052692329531, "learning_rate": 2.229862157139712e-06, "loss": 0.012683092057704926, "step": 85515 }, { "epoch": 0.8048941176470589, "grad_norm": 0.6704784108411469, "learning_rate": 2.2297969700248257e-06, "loss": 0.018465855717658998, "step": 85520 }, { "epoch": 0.8049411764705883, "grad_norm": 0.538074378225855, "learning_rate": 2.2297317886265872e-06, "loss": 0.01488049328327179, "step": 85525 }, { "epoch": 0.8049882352941177, "grad_norm": 0.2977319895756671, "learning_rate": 2.229666612944159e-06, "loss": 0.021095246076583862, "step": 85530 }, { "epoch": 0.8050352941176471, "grad_norm": 0.2837172653724621, "learning_rate": 2.2296014429767075e-06, "loss": 0.012936244904994964, "step": 85535 }, { "epoch": 0.8050823529411765, "grad_norm": 0.5593356127125104, "learning_rate": 2.2295362787233967e-06, "loss": 0.016547492146492003, "step": 85540 }, { "epoch": 0.8051294117647059, "grad_norm": 0.6818886751754564, "learning_rate": 2.2294711201833914e-06, "loss": 0.01921258270740509, "step": 85545 }, { "epoch": 0.8051764705882353, "grad_norm": 0.6416263225971655, "learning_rate": 2.2294059673558584e-06, "loss": 0.014238117635250092, "step": 85550 }, { "epoch": 0.8052235294117647, "grad_norm": 0.42245654590645515, "learning_rate": 2.229340820239961e-06, "loss": 0.016351547837257386, "step": 85555 }, { "epoch": 0.8052705882352941, "grad_norm": 0.4508354645990607, "learning_rate": 2.2292756788348654e-06, "loss": 0.014549365639686585, "step": 85560 }, { "epoch": 0.8053176470588236, "grad_norm": 0.4475414155341027, "learning_rate": 2.229210543139738e-06, "loss": 0.01458938717842102, "step": 85565 }, { "epoch": 0.805364705882353, "grad_norm": 0.33415349508363684, "learning_rate": 2.229145413153744e-06, "loss": 0.016816943883895874, "step": 85570 }, { "epoch": 0.8054117647058824, "grad_norm": 0.4943689971277906, "learning_rate": 2.22908028887605e-06, "loss": 0.013778452575206757, "step": 85575 }, { "epoch": 0.8054588235294118, "grad_norm": 0.6128567677565242, "learning_rate": 2.229015170305821e-06, "loss": 0.012408971786499023, "step": 85580 }, { "epoch": 0.8055058823529412, "grad_norm": 0.3080957061333838, "learning_rate": 2.2289500574422246e-06, "loss": 0.01359969824552536, "step": 85585 }, { "epoch": 0.8055529411764706, "grad_norm": 0.492414900104748, "learning_rate": 2.228884950284427e-06, "loss": 0.013451875746250152, "step": 85590 }, { "epoch": 0.8056, "grad_norm": 0.35711872061645755, "learning_rate": 2.2288198488315947e-06, "loss": 0.013314300775527954, "step": 85595 }, { "epoch": 0.8056470588235294, "grad_norm": 0.3300807257397457, "learning_rate": 2.2287547530828947e-06, "loss": 0.011490831524133683, "step": 85600 }, { "epoch": 0.8056941176470588, "grad_norm": 0.3739628503691808, "learning_rate": 2.228689663037495e-06, "loss": 0.015836134552955627, "step": 85605 }, { "epoch": 0.8057411764705882, "grad_norm": 0.6757717477774919, "learning_rate": 2.228624578694561e-06, "loss": 0.015444788336753845, "step": 85610 }, { "epoch": 0.8057882352941177, "grad_norm": 0.4004782174733761, "learning_rate": 2.228559500053261e-06, "loss": 0.016805595159530638, "step": 85615 }, { "epoch": 0.8058352941176471, "grad_norm": 0.7558289141468112, "learning_rate": 2.228494427112763e-06, "loss": 0.016632437705993652, "step": 85620 }, { "epoch": 0.8058823529411765, "grad_norm": 0.5002805137904668, "learning_rate": 2.2284293598722347e-06, "loss": 0.012431757152080536, "step": 85625 }, { "epoch": 0.8059294117647059, "grad_norm": 0.4365970325171879, "learning_rate": 2.2283642983308434e-06, "loss": 0.010380712151527405, "step": 85630 }, { "epoch": 0.8059764705882353, "grad_norm": 0.2761200484195817, "learning_rate": 2.228299242487758e-06, "loss": 0.012820830941200257, "step": 85635 }, { "epoch": 0.8060235294117647, "grad_norm": 0.28900146904209223, "learning_rate": 2.2282341923421455e-06, "loss": 0.01378052830696106, "step": 85640 }, { "epoch": 0.8060705882352941, "grad_norm": 0.9190651079285802, "learning_rate": 2.2281691478931748e-06, "loss": 0.01579930931329727, "step": 85645 }, { "epoch": 0.8061176470588235, "grad_norm": 0.6215893098115464, "learning_rate": 2.2281041091400153e-06, "loss": 0.016790685057640076, "step": 85650 }, { "epoch": 0.8061647058823529, "grad_norm": 0.5709200980196503, "learning_rate": 2.2280390760818356e-06, "loss": 0.01835222840309143, "step": 85655 }, { "epoch": 0.8062117647058824, "grad_norm": 0.5038882497602171, "learning_rate": 2.2279740487178036e-06, "loss": 0.014498209953308106, "step": 85660 }, { "epoch": 0.8062588235294118, "grad_norm": 0.4204019720430289, "learning_rate": 2.2279090270470896e-06, "loss": 0.014149056375026703, "step": 85665 }, { "epoch": 0.8063058823529412, "grad_norm": 0.4331361437562269, "learning_rate": 2.2278440110688623e-06, "loss": 0.011550544202327729, "step": 85670 }, { "epoch": 0.8063529411764706, "grad_norm": 0.41757359616134654, "learning_rate": 2.227779000782291e-06, "loss": 0.016219398379325865, "step": 85675 }, { "epoch": 0.8064, "grad_norm": 0.472122759600301, "learning_rate": 2.2277139961865444e-06, "loss": 0.014250479638576508, "step": 85680 }, { "epoch": 0.8064470588235294, "grad_norm": 0.27933196384878445, "learning_rate": 2.227648997280795e-06, "loss": 0.009435062110424042, "step": 85685 }, { "epoch": 0.8064941176470588, "grad_norm": 0.7357348233899188, "learning_rate": 2.2275840040642107e-06, "loss": 0.012458826601505279, "step": 85690 }, { "epoch": 0.8065411764705882, "grad_norm": 0.30968122856373875, "learning_rate": 2.227519016535962e-06, "loss": 0.011511844396591187, "step": 85695 }, { "epoch": 0.8065882352941176, "grad_norm": 0.5766618692139941, "learning_rate": 2.2274540346952186e-06, "loss": 0.016070187091827393, "step": 85700 }, { "epoch": 0.806635294117647, "grad_norm": 0.39318746995588194, "learning_rate": 2.2273890585411524e-06, "loss": 0.011403441429138184, "step": 85705 }, { "epoch": 0.8066823529411765, "grad_norm": 0.3346941085350329, "learning_rate": 2.227324088072933e-06, "loss": 0.01587582230567932, "step": 85710 }, { "epoch": 0.8067294117647059, "grad_norm": 0.5246442046145547, "learning_rate": 2.2272591232897313e-06, "loss": 0.013148799538612366, "step": 85715 }, { "epoch": 0.8067764705882353, "grad_norm": 0.5239826785932059, "learning_rate": 2.2271941641907184e-06, "loss": 0.012590637803077698, "step": 85720 }, { "epoch": 0.8068235294117647, "grad_norm": 0.3067187995362327, "learning_rate": 2.2271292107750658e-06, "loss": 0.012444103509187699, "step": 85725 }, { "epoch": 0.8068705882352941, "grad_norm": 0.47715351212988005, "learning_rate": 2.227064263041944e-06, "loss": 0.01690203547477722, "step": 85730 }, { "epoch": 0.8069176470588235, "grad_norm": 0.4882896678503721, "learning_rate": 2.226999320990525e-06, "loss": 0.012918418645858765, "step": 85735 }, { "epoch": 0.8069647058823529, "grad_norm": 0.4843918818771984, "learning_rate": 2.22693438461998e-06, "loss": 0.02017974704504013, "step": 85740 }, { "epoch": 0.8070117647058823, "grad_norm": 0.6054428550841124, "learning_rate": 2.2268694539294817e-06, "loss": 0.012468940019607544, "step": 85745 }, { "epoch": 0.8070588235294117, "grad_norm": 0.7457127519427061, "learning_rate": 2.2268045289182014e-06, "loss": 0.015102696418762208, "step": 85750 }, { "epoch": 0.8071058823529412, "grad_norm": 0.4499861161224022, "learning_rate": 2.226739609585312e-06, "loss": 0.016534647345542906, "step": 85755 }, { "epoch": 0.8071529411764706, "grad_norm": 0.5399575292454251, "learning_rate": 2.226674695929984e-06, "loss": 0.013408815860748291, "step": 85760 }, { "epoch": 0.8072, "grad_norm": 0.7270066930057996, "learning_rate": 2.226609787951392e-06, "loss": 0.012651795148849487, "step": 85765 }, { "epoch": 0.8072470588235294, "grad_norm": 0.3912196594503108, "learning_rate": 2.2265448856487077e-06, "loss": 0.01364288330078125, "step": 85770 }, { "epoch": 0.8072941176470588, "grad_norm": 0.5605909471264938, "learning_rate": 2.2264799890211037e-06, "loss": 0.01357712149620056, "step": 85775 }, { "epoch": 0.8073411764705882, "grad_norm": 0.2749844167822021, "learning_rate": 2.2264150980677536e-06, "loss": 0.0146942600607872, "step": 85780 }, { "epoch": 0.8073882352941176, "grad_norm": 0.6748198634842841, "learning_rate": 2.2263502127878302e-06, "loss": 0.017379581928253174, "step": 85785 }, { "epoch": 0.807435294117647, "grad_norm": 0.6035360446562428, "learning_rate": 2.2262853331805068e-06, "loss": 0.013458096981048584, "step": 85790 }, { "epoch": 0.8074823529411764, "grad_norm": 0.33414297127636217, "learning_rate": 2.226220459244957e-06, "loss": 0.013807642459869384, "step": 85795 }, { "epoch": 0.8075294117647058, "grad_norm": 0.44388996219063975, "learning_rate": 2.2261555909803547e-06, "loss": 0.01609162092208862, "step": 85800 }, { "epoch": 0.8075764705882353, "grad_norm": 1.1010329162708306, "learning_rate": 2.2260907283858735e-06, "loss": 0.020956479012966156, "step": 85805 }, { "epoch": 0.8076235294117647, "grad_norm": 0.7336744302474864, "learning_rate": 2.226025871460687e-06, "loss": 0.01858336478471756, "step": 85810 }, { "epoch": 0.8076705882352941, "grad_norm": 0.4524289382404757, "learning_rate": 2.22596102020397e-06, "loss": 0.015528243780136109, "step": 85815 }, { "epoch": 0.8077176470588235, "grad_norm": 0.5441800668218019, "learning_rate": 2.225896174614897e-06, "loss": 0.014433406293392181, "step": 85820 }, { "epoch": 0.8077647058823529, "grad_norm": 0.36945441378902094, "learning_rate": 2.225831334692642e-06, "loss": 0.013721106946468354, "step": 85825 }, { "epoch": 0.8078117647058823, "grad_norm": 0.6014522852940146, "learning_rate": 2.2257665004363795e-06, "loss": 0.016163228452205657, "step": 85830 }, { "epoch": 0.8078588235294117, "grad_norm": 0.3040483680037699, "learning_rate": 2.2257016718452855e-06, "loss": 0.017271214723587038, "step": 85835 }, { "epoch": 0.8079058823529411, "grad_norm": 0.5111037389799359, "learning_rate": 2.2256368489185333e-06, "loss": 0.01743611544370651, "step": 85840 }, { "epoch": 0.8079529411764705, "grad_norm": 0.4533391704690575, "learning_rate": 2.2255720316552996e-06, "loss": 0.01752137243747711, "step": 85845 }, { "epoch": 0.808, "grad_norm": 0.43406515077883356, "learning_rate": 2.225507220054758e-06, "loss": 0.012141405045986176, "step": 85850 }, { "epoch": 0.8080470588235295, "grad_norm": 0.42498991181691637, "learning_rate": 2.225442414116087e-06, "loss": 0.018254077434539794, "step": 85855 }, { "epoch": 0.8080941176470589, "grad_norm": 0.5114974409516828, "learning_rate": 2.2253776138384596e-06, "loss": 0.012887091934680938, "step": 85860 }, { "epoch": 0.8081411764705883, "grad_norm": 0.44277777733585816, "learning_rate": 2.225312819221052e-06, "loss": 0.018879202008247376, "step": 85865 }, { "epoch": 0.8081882352941177, "grad_norm": 0.7394191261776378, "learning_rate": 2.2252480302630413e-06, "loss": 0.014993083477020264, "step": 85870 }, { "epoch": 0.808235294117647, "grad_norm": 1.1004665986156932, "learning_rate": 2.225183246963603e-06, "loss": 0.019313406944274903, "step": 85875 }, { "epoch": 0.8082823529411765, "grad_norm": 0.5582929431654742, "learning_rate": 2.2251184693219137e-06, "loss": 0.01581413149833679, "step": 85880 }, { "epoch": 0.8083294117647059, "grad_norm": 0.4632336297268582, "learning_rate": 2.2250536973371498e-06, "loss": 0.012240412086248398, "step": 85885 }, { "epoch": 0.8083764705882353, "grad_norm": 0.34021336063281377, "learning_rate": 2.2249889310084878e-06, "loss": 0.012956348061561585, "step": 85890 }, { "epoch": 0.8084235294117647, "grad_norm": 0.4028828661398025, "learning_rate": 2.2249241703351056e-06, "loss": 0.013657182455062866, "step": 85895 }, { "epoch": 0.8084705882352942, "grad_norm": 0.4190613995211627, "learning_rate": 2.2248594153161783e-06, "loss": 0.01776185482740402, "step": 85900 }, { "epoch": 0.8085176470588236, "grad_norm": 0.4411582174701256, "learning_rate": 2.224794665950885e-06, "loss": 0.013192418217658996, "step": 85905 }, { "epoch": 0.808564705882353, "grad_norm": 0.40683803754267256, "learning_rate": 2.224729922238402e-06, "loss": 0.012840145826339721, "step": 85910 }, { "epoch": 0.8086117647058824, "grad_norm": 0.5124613869483943, "learning_rate": 2.224665184177907e-06, "loss": 0.016315117478370667, "step": 85915 }, { "epoch": 0.8086588235294118, "grad_norm": 0.4941223415941537, "learning_rate": 2.2246004517685785e-06, "loss": 0.013082270324230195, "step": 85920 }, { "epoch": 0.8087058823529412, "grad_norm": 0.5573382674074517, "learning_rate": 2.224535725009593e-06, "loss": 0.015765655040740966, "step": 85925 }, { "epoch": 0.8087529411764706, "grad_norm": 0.322323014824542, "learning_rate": 2.22447100390013e-06, "loss": 0.01161469891667366, "step": 85930 }, { "epoch": 0.8088, "grad_norm": 0.5508523855090596, "learning_rate": 2.2244062884393663e-06, "loss": 0.013935226202011108, "step": 85935 }, { "epoch": 0.8088470588235294, "grad_norm": 0.7725186934428707, "learning_rate": 2.2243415786264807e-06, "loss": 0.0210561066865921, "step": 85940 }, { "epoch": 0.8088941176470589, "grad_norm": 0.4361129827816912, "learning_rate": 2.2242768744606523e-06, "loss": 0.016021138429641722, "step": 85945 }, { "epoch": 0.8089411764705883, "grad_norm": 0.4978378233788898, "learning_rate": 2.22421217594106e-06, "loss": 0.016269780695438385, "step": 85950 }, { "epoch": 0.8089882352941177, "grad_norm": 0.4514191822566992, "learning_rate": 2.2241474830668817e-06, "loss": 0.013910260796546937, "step": 85955 }, { "epoch": 0.8090352941176471, "grad_norm": 0.5076312831943282, "learning_rate": 2.2240827958372967e-06, "loss": 0.014960607886314392, "step": 85960 }, { "epoch": 0.8090823529411765, "grad_norm": 0.5081989330498459, "learning_rate": 2.2240181142514846e-06, "loss": 0.013653859496116638, "step": 85965 }, { "epoch": 0.8091294117647059, "grad_norm": 0.5815234999594949, "learning_rate": 2.2239534383086246e-06, "loss": 0.01581767201423645, "step": 85970 }, { "epoch": 0.8091764705882353, "grad_norm": 0.3311726056715745, "learning_rate": 2.2238887680078964e-06, "loss": 0.01617475301027298, "step": 85975 }, { "epoch": 0.8092235294117647, "grad_norm": 0.5182553628710277, "learning_rate": 2.2238241033484787e-06, "loss": 0.01452518105506897, "step": 85980 }, { "epoch": 0.8092705882352941, "grad_norm": 0.5533567417567078, "learning_rate": 2.223759444329553e-06, "loss": 0.017116069793701172, "step": 85985 }, { "epoch": 0.8093176470588235, "grad_norm": 0.4838336690068174, "learning_rate": 2.223694790950299e-06, "loss": 0.016332802176475526, "step": 85990 }, { "epoch": 0.809364705882353, "grad_norm": 0.3659703129910435, "learning_rate": 2.223630143209896e-06, "loss": 0.012477968633174897, "step": 85995 }, { "epoch": 0.8094117647058824, "grad_norm": 0.509873766933616, "learning_rate": 2.2235655011075244e-06, "loss": 0.0160398930311203, "step": 86000 }, { "epoch": 0.8094588235294118, "grad_norm": 0.3183450903930045, "learning_rate": 2.2235008646423654e-06, "loss": 0.014466965198516845, "step": 86005 }, { "epoch": 0.8095058823529412, "grad_norm": 0.5251544531963195, "learning_rate": 2.2234362338135995e-06, "loss": 0.015439468622207641, "step": 86010 }, { "epoch": 0.8095529411764706, "grad_norm": 0.47350029932641036, "learning_rate": 2.223371608620408e-06, "loss": 0.017563241720199584, "step": 86015 }, { "epoch": 0.8096, "grad_norm": 0.3700474065844417, "learning_rate": 2.223306989061971e-06, "loss": 0.015502604842185973, "step": 86020 }, { "epoch": 0.8096470588235294, "grad_norm": 0.5603426636973203, "learning_rate": 2.2232423751374707e-06, "loss": 0.013685101270675659, "step": 86025 }, { "epoch": 0.8096941176470588, "grad_norm": 0.5141596221747855, "learning_rate": 2.223177766846088e-06, "loss": 0.01628592759370804, "step": 86030 }, { "epoch": 0.8097411764705882, "grad_norm": 0.5141307409323587, "learning_rate": 2.223113164187004e-06, "loss": 0.013431257009506226, "step": 86035 }, { "epoch": 0.8097882352941177, "grad_norm": 0.5170141982083574, "learning_rate": 2.223048567159401e-06, "loss": 0.01654045730829239, "step": 86040 }, { "epoch": 0.8098352941176471, "grad_norm": 0.6410900640863936, "learning_rate": 2.222983975762461e-06, "loss": 0.017984208464622498, "step": 86045 }, { "epoch": 0.8098823529411765, "grad_norm": 0.43297634181095684, "learning_rate": 2.222919389995365e-06, "loss": 0.023659402132034303, "step": 86050 }, { "epoch": 0.8099294117647059, "grad_norm": 0.4665281714153648, "learning_rate": 2.222854809857297e-06, "loss": 0.0137461856007576, "step": 86055 }, { "epoch": 0.8099764705882353, "grad_norm": 0.45098640285534475, "learning_rate": 2.222790235347438e-06, "loss": 0.015148496627807618, "step": 86060 }, { "epoch": 0.8100235294117647, "grad_norm": 0.4292175474318929, "learning_rate": 2.222725666464971e-06, "loss": 0.012475116550922394, "step": 86065 }, { "epoch": 0.8100705882352941, "grad_norm": 0.30148968621566935, "learning_rate": 2.222661103209079e-06, "loss": 0.012521427869796754, "step": 86070 }, { "epoch": 0.8101176470588235, "grad_norm": 0.5075772017247275, "learning_rate": 2.222596545578944e-06, "loss": 0.016516545414924623, "step": 86075 }, { "epoch": 0.8101647058823529, "grad_norm": 0.3223399245962034, "learning_rate": 2.222531993573749e-06, "loss": 0.011953012645244598, "step": 86080 }, { "epoch": 0.8102117647058823, "grad_norm": 0.33393587716190604, "learning_rate": 2.2224674471926785e-06, "loss": 0.014214208722114563, "step": 86085 }, { "epoch": 0.8102588235294118, "grad_norm": 0.5272244528430652, "learning_rate": 2.2224029064349153e-06, "loss": 0.018224766850471495, "step": 86090 }, { "epoch": 0.8103058823529412, "grad_norm": 0.6067939241848095, "learning_rate": 2.222338371299643e-06, "loss": 0.016172990202903748, "step": 86095 }, { "epoch": 0.8103529411764706, "grad_norm": 0.4555747370476195, "learning_rate": 2.222273841786044e-06, "loss": 0.008922462165355683, "step": 86100 }, { "epoch": 0.8104, "grad_norm": 0.3286170318885907, "learning_rate": 2.2222093178933037e-06, "loss": 0.015448132157325744, "step": 86105 }, { "epoch": 0.8104470588235294, "grad_norm": 0.4560159949903125, "learning_rate": 2.2221447996206056e-06, "loss": 0.0138319730758667, "step": 86110 }, { "epoch": 0.8104941176470588, "grad_norm": 0.4010732477661993, "learning_rate": 2.2220802869671343e-06, "loss": 0.012521427869796754, "step": 86115 }, { "epoch": 0.8105411764705882, "grad_norm": 0.3726005221614761, "learning_rate": 2.222015779932074e-06, "loss": 0.013449656963348388, "step": 86120 }, { "epoch": 0.8105882352941176, "grad_norm": 0.46556755114610365, "learning_rate": 2.2219512785146087e-06, "loss": 0.014453208446502686, "step": 86125 }, { "epoch": 0.810635294117647, "grad_norm": 0.5986245172081982, "learning_rate": 2.221886782713924e-06, "loss": 0.016907957196235657, "step": 86130 }, { "epoch": 0.8106823529411765, "grad_norm": 0.5713958886161489, "learning_rate": 2.2218222925292033e-06, "loss": 0.017290283739566804, "step": 86135 }, { "epoch": 0.8107294117647059, "grad_norm": 0.6001155053550787, "learning_rate": 2.2217578079596333e-06, "loss": 0.014273449778556824, "step": 86140 }, { "epoch": 0.8107764705882353, "grad_norm": 0.3748153404310669, "learning_rate": 2.2216933290043983e-06, "loss": 0.017025014758110045, "step": 86145 }, { "epoch": 0.8108235294117647, "grad_norm": 0.3345997900564436, "learning_rate": 2.221628855662684e-06, "loss": 0.009930196404457092, "step": 86150 }, { "epoch": 0.8108705882352941, "grad_norm": 0.7761743359194369, "learning_rate": 2.2215643879336757e-06, "loss": 0.013028255105018616, "step": 86155 }, { "epoch": 0.8109176470588235, "grad_norm": 0.5651398673965442, "learning_rate": 2.2214999258165587e-06, "loss": 0.013723385334014893, "step": 86160 }, { "epoch": 0.8109647058823529, "grad_norm": 0.9559489949694754, "learning_rate": 2.2214354693105196e-06, "loss": 0.021789559721946718, "step": 86165 }, { "epoch": 0.8110117647058823, "grad_norm": 0.5831133758641052, "learning_rate": 2.2213710184147443e-06, "loss": 0.014700688421726227, "step": 86170 }, { "epoch": 0.8110588235294117, "grad_norm": 0.3220954550625981, "learning_rate": 2.2213065731284193e-06, "loss": 0.012979066371917725, "step": 86175 }, { "epoch": 0.8111058823529411, "grad_norm": 0.6253546031026661, "learning_rate": 2.22124213345073e-06, "loss": 0.012233635783195496, "step": 86180 }, { "epoch": 0.8111529411764706, "grad_norm": 0.6004266198627531, "learning_rate": 2.2211776993808634e-06, "loss": 0.015200531482696534, "step": 86185 }, { "epoch": 0.8112, "grad_norm": 0.7633393992196023, "learning_rate": 2.2211132709180057e-06, "loss": 0.012619352340698243, "step": 86190 }, { "epoch": 0.8112470588235294, "grad_norm": 0.3158983425893767, "learning_rate": 2.2210488480613453e-06, "loss": 0.013815993070602417, "step": 86195 }, { "epoch": 0.8112941176470588, "grad_norm": 0.3857005041729895, "learning_rate": 2.2209844308100672e-06, "loss": 0.016372407972812652, "step": 86200 }, { "epoch": 0.8113411764705882, "grad_norm": 0.531027362999878, "learning_rate": 2.22092001916336e-06, "loss": 0.013703233003616333, "step": 86205 }, { "epoch": 0.8113882352941176, "grad_norm": 0.560880984474002, "learning_rate": 2.220855613120411e-06, "loss": 0.015032720565795899, "step": 86210 }, { "epoch": 0.811435294117647, "grad_norm": 0.3925310318112188, "learning_rate": 2.220791212680406e-06, "loss": 0.016558438539505005, "step": 86215 }, { "epoch": 0.8114823529411764, "grad_norm": 0.4226280966347048, "learning_rate": 2.220726817842535e-06, "loss": 0.013205091655254363, "step": 86220 }, { "epoch": 0.8115294117647058, "grad_norm": 0.4637380112850814, "learning_rate": 2.2206624286059846e-06, "loss": 0.014491504430770874, "step": 86225 }, { "epoch": 0.8115764705882353, "grad_norm": 0.5970330927599197, "learning_rate": 2.2205980449699434e-06, "loss": 0.01723768413066864, "step": 86230 }, { "epoch": 0.8116235294117647, "grad_norm": 0.48482607154378765, "learning_rate": 2.2205336669335986e-06, "loss": 0.013477122783660889, "step": 86235 }, { "epoch": 0.8116705882352941, "grad_norm": 0.3716253874968174, "learning_rate": 2.2204692944961398e-06, "loss": 0.01440473347902298, "step": 86240 }, { "epoch": 0.8117176470588235, "grad_norm": 0.47705361303202715, "learning_rate": 2.220404927656754e-06, "loss": 0.016142736375331878, "step": 86245 }, { "epoch": 0.8117647058823529, "grad_norm": 0.40878796759052954, "learning_rate": 2.2203405664146312e-06, "loss": 0.014254045486450196, "step": 86250 }, { "epoch": 0.8118117647058823, "grad_norm": 0.5145356177346551, "learning_rate": 2.2202762107689594e-06, "loss": 0.013466159999370574, "step": 86255 }, { "epoch": 0.8118588235294117, "grad_norm": 0.5680812686733816, "learning_rate": 2.2202118607189287e-06, "loss": 0.019292716681957246, "step": 86260 }, { "epoch": 0.8119058823529411, "grad_norm": 0.5054307092519501, "learning_rate": 2.2201475162637264e-06, "loss": 0.012644624710083008, "step": 86265 }, { "epoch": 0.8119529411764705, "grad_norm": 0.4721436893197535, "learning_rate": 2.2200831774025435e-06, "loss": 0.012622992694377898, "step": 86270 }, { "epoch": 0.812, "grad_norm": 0.43341072256463065, "learning_rate": 2.2200188441345686e-06, "loss": 0.014406062662601471, "step": 86275 }, { "epoch": 0.8120470588235295, "grad_norm": 0.4620537164180414, "learning_rate": 2.219954516458992e-06, "loss": 0.013697871565818786, "step": 86280 }, { "epoch": 0.8120941176470589, "grad_norm": 0.5103878339096727, "learning_rate": 2.2198901943750026e-06, "loss": 0.013217157125473023, "step": 86285 }, { "epoch": 0.8121411764705883, "grad_norm": 0.3914237252986897, "learning_rate": 2.2198258778817914e-06, "loss": 0.01687939018011093, "step": 86290 }, { "epoch": 0.8121882352941177, "grad_norm": 0.2513700909110234, "learning_rate": 2.2197615669785476e-06, "loss": 0.011674201488494873, "step": 86295 }, { "epoch": 0.812235294117647, "grad_norm": 0.3771366024049066, "learning_rate": 2.219697261664462e-06, "loss": 0.01215599775314331, "step": 86300 }, { "epoch": 0.8122823529411765, "grad_norm": 0.5382754383832987, "learning_rate": 2.2196329619387255e-06, "loss": 0.015658125281333923, "step": 86305 }, { "epoch": 0.8123294117647059, "grad_norm": 0.4895766429832383, "learning_rate": 2.219568667800528e-06, "loss": 0.013727207481861115, "step": 86310 }, { "epoch": 0.8123764705882353, "grad_norm": 0.5049912738658043, "learning_rate": 2.2195043792490607e-06, "loss": 0.014061987400054932, "step": 86315 }, { "epoch": 0.8124235294117647, "grad_norm": 0.5336897493856528, "learning_rate": 2.2194400962835143e-06, "loss": 0.013625431060791015, "step": 86320 }, { "epoch": 0.8124705882352942, "grad_norm": 0.39824408914517634, "learning_rate": 2.2193758189030803e-06, "loss": 0.015483099222183227, "step": 86325 }, { "epoch": 0.8125176470588236, "grad_norm": 0.4539666277226053, "learning_rate": 2.21931154710695e-06, "loss": 0.013200363516807556, "step": 86330 }, { "epoch": 0.812564705882353, "grad_norm": 0.38374191461740614, "learning_rate": 2.2192472808943137e-06, "loss": 0.01194065809249878, "step": 86335 }, { "epoch": 0.8126117647058824, "grad_norm": 0.4499771381005037, "learning_rate": 2.2191830202643646e-06, "loss": 0.01337917298078537, "step": 86340 }, { "epoch": 0.8126588235294118, "grad_norm": 0.709071721017398, "learning_rate": 2.2191187652162935e-06, "loss": 0.017061489820480346, "step": 86345 }, { "epoch": 0.8127058823529412, "grad_norm": 0.4541785157602962, "learning_rate": 2.2190545157492933e-06, "loss": 0.010695838928222656, "step": 86350 }, { "epoch": 0.8127529411764706, "grad_norm": 0.5400782305404291, "learning_rate": 2.218990271862555e-06, "loss": 0.015042561292648315, "step": 86355 }, { "epoch": 0.8128, "grad_norm": 0.5094357393585826, "learning_rate": 2.2189260335552715e-06, "loss": 0.013604782521724701, "step": 86360 }, { "epoch": 0.8128470588235294, "grad_norm": 0.47920107529045175, "learning_rate": 2.218861800826635e-06, "loss": 0.01474546194076538, "step": 86365 }, { "epoch": 0.8128941176470589, "grad_norm": 0.5515917189331144, "learning_rate": 2.218797573675838e-06, "loss": 0.01491483747959137, "step": 86370 }, { "epoch": 0.8129411764705883, "grad_norm": 0.5476457593239553, "learning_rate": 2.2187333521020733e-06, "loss": 0.014586731791496277, "step": 86375 }, { "epoch": 0.8129882352941177, "grad_norm": 0.587035321170175, "learning_rate": 2.2186691361045343e-06, "loss": 0.013867396116256713, "step": 86380 }, { "epoch": 0.8130352941176471, "grad_norm": 0.5564015019311562, "learning_rate": 2.218604925682415e-06, "loss": 0.014588183164596558, "step": 86385 }, { "epoch": 0.8130823529411765, "grad_norm": 0.3068292981270235, "learning_rate": 2.2185407208349055e-06, "loss": 0.012410731613636016, "step": 86390 }, { "epoch": 0.8131294117647059, "grad_norm": 0.5326243146137749, "learning_rate": 2.2184765215612024e-06, "loss": 0.014231696724891663, "step": 86395 }, { "epoch": 0.8131764705882353, "grad_norm": 0.5210554411781612, "learning_rate": 2.2184123278604975e-06, "loss": 0.013726210594177246, "step": 86400 }, { "epoch": 0.8132235294117647, "grad_norm": 0.3828476987112596, "learning_rate": 2.2183481397319854e-06, "loss": 0.013784302771091462, "step": 86405 }, { "epoch": 0.8132705882352941, "grad_norm": 0.4926355507274792, "learning_rate": 2.2182839571748595e-06, "loss": 0.020697462558746337, "step": 86410 }, { "epoch": 0.8133176470588235, "grad_norm": 0.4842515662975885, "learning_rate": 2.2182197801883144e-06, "loss": 0.016298331320285797, "step": 86415 }, { "epoch": 0.813364705882353, "grad_norm": 0.6788009859601453, "learning_rate": 2.2181556087715434e-06, "loss": 0.018621082603931426, "step": 86420 }, { "epoch": 0.8134117647058824, "grad_norm": 0.4944440018870838, "learning_rate": 2.218091442923742e-06, "loss": 0.013339343667030334, "step": 86425 }, { "epoch": 0.8134588235294118, "grad_norm": 0.4800363122305762, "learning_rate": 2.218027282644104e-06, "loss": 0.022288957238197328, "step": 86430 }, { "epoch": 0.8135058823529412, "grad_norm": 0.45906525218109867, "learning_rate": 2.2179631279318247e-06, "loss": 0.01503559798002243, "step": 86435 }, { "epoch": 0.8135529411764706, "grad_norm": 0.4687957510115805, "learning_rate": 2.217898978786098e-06, "loss": 0.011299006640911102, "step": 86440 }, { "epoch": 0.8136, "grad_norm": 0.5439746753836388, "learning_rate": 2.217834835206121e-06, "loss": 0.012878811359405518, "step": 86445 }, { "epoch": 0.8136470588235294, "grad_norm": 0.5009639566336547, "learning_rate": 2.217770697191086e-06, "loss": 0.01361297070980072, "step": 86450 }, { "epoch": 0.8136941176470588, "grad_norm": 0.40271656058896105, "learning_rate": 2.2177065647401912e-06, "loss": 0.012337053567171097, "step": 86455 }, { "epoch": 0.8137411764705882, "grad_norm": 0.6598017601979568, "learning_rate": 2.217642437852631e-06, "loss": 0.017584407329559328, "step": 86460 }, { "epoch": 0.8137882352941177, "grad_norm": 0.8023619409033957, "learning_rate": 2.2175783165275997e-06, "loss": 0.014752334356307984, "step": 86465 }, { "epoch": 0.8138352941176471, "grad_norm": 0.4280210877722993, "learning_rate": 2.2175142007642954e-06, "loss": 0.019384294748306274, "step": 86470 }, { "epoch": 0.8138823529411765, "grad_norm": 0.5008480463819658, "learning_rate": 2.2174500905619127e-06, "loss": 0.011631502211093903, "step": 86475 }, { "epoch": 0.8139294117647059, "grad_norm": 0.5741050772300313, "learning_rate": 2.2173859859196487e-06, "loss": 0.013771182298660279, "step": 86480 }, { "epoch": 0.8139764705882353, "grad_norm": 0.3180759402458999, "learning_rate": 2.217321886836699e-06, "loss": 0.010194630920886993, "step": 86485 }, { "epoch": 0.8140235294117647, "grad_norm": 0.48137919834870846, "learning_rate": 2.2172577933122604e-06, "loss": 0.012055454403162002, "step": 86490 }, { "epoch": 0.8140705882352941, "grad_norm": 0.5461761051615956, "learning_rate": 2.2171937053455302e-06, "loss": 0.014923515915870666, "step": 86495 }, { "epoch": 0.8141176470588235, "grad_norm": 0.6085439963371438, "learning_rate": 2.217129622935704e-06, "loss": 0.015695832669734955, "step": 86500 }, { "epoch": 0.8141647058823529, "grad_norm": 0.4240489121942049, "learning_rate": 2.2170655460819797e-06, "loss": 0.011983875185251236, "step": 86505 }, { "epoch": 0.8142117647058823, "grad_norm": 0.3806978524206269, "learning_rate": 2.2170014747835544e-06, "loss": 0.014070338010787964, "step": 86510 }, { "epoch": 0.8142588235294118, "grad_norm": 0.5364716942154614, "learning_rate": 2.216937409039625e-06, "loss": 0.01545591801404953, "step": 86515 }, { "epoch": 0.8143058823529412, "grad_norm": 0.2071452326672018, "learning_rate": 2.216873348849389e-06, "loss": 0.012160184979438781, "step": 86520 }, { "epoch": 0.8143529411764706, "grad_norm": 0.6579634384739778, "learning_rate": 2.216809294212045e-06, "loss": 0.01757446825504303, "step": 86525 }, { "epoch": 0.8144, "grad_norm": 0.4881249373366022, "learning_rate": 2.21674524512679e-06, "loss": 0.017035150527954103, "step": 86530 }, { "epoch": 0.8144470588235294, "grad_norm": 0.44834819476446686, "learning_rate": 2.216681201592822e-06, "loss": 0.013474985957145691, "step": 86535 }, { "epoch": 0.8144941176470588, "grad_norm": 0.4619748964896077, "learning_rate": 2.216617163609339e-06, "loss": 0.01640353798866272, "step": 86540 }, { "epoch": 0.8145411764705882, "grad_norm": 0.5538262074336994, "learning_rate": 2.2165531311755395e-06, "loss": 0.013804829120635987, "step": 86545 }, { "epoch": 0.8145882352941176, "grad_norm": 0.7115672858275702, "learning_rate": 2.2164891042906227e-06, "loss": 0.01605566740036011, "step": 86550 }, { "epoch": 0.814635294117647, "grad_norm": 0.4294729513384574, "learning_rate": 2.2164250829537864e-06, "loss": 0.013375806808471679, "step": 86555 }, { "epoch": 0.8146823529411765, "grad_norm": 0.5378540957950498, "learning_rate": 2.2163610671642287e-06, "loss": 0.01100313663482666, "step": 86560 }, { "epoch": 0.8147294117647059, "grad_norm": 0.5304382961361029, "learning_rate": 2.21629705692115e-06, "loss": 0.014728358387947083, "step": 86565 }, { "epoch": 0.8147764705882353, "grad_norm": 0.31365305204124594, "learning_rate": 2.216233052223749e-06, "loss": 0.02036275267601013, "step": 86570 }, { "epoch": 0.8148235294117647, "grad_norm": 0.3524723012611923, "learning_rate": 2.216169053071225e-06, "loss": 0.01263478398323059, "step": 86575 }, { "epoch": 0.8148705882352941, "grad_norm": 0.4119462147235041, "learning_rate": 2.216105059462776e-06, "loss": 0.011125537753105163, "step": 86580 }, { "epoch": 0.8149176470588235, "grad_norm": 0.3469905426305227, "learning_rate": 2.2160410713976037e-06, "loss": 0.012552274763584137, "step": 86585 }, { "epoch": 0.8149647058823529, "grad_norm": 0.564120469282066, "learning_rate": 2.215977088874907e-06, "loss": 0.012075820565223694, "step": 86590 }, { "epoch": 0.8150117647058823, "grad_norm": 0.3675220939920206, "learning_rate": 2.2159131118938854e-06, "loss": 0.015094035863876342, "step": 86595 }, { "epoch": 0.8150588235294117, "grad_norm": 0.614956544415691, "learning_rate": 2.2158491404537394e-06, "loss": 0.016499838232994078, "step": 86600 }, { "epoch": 0.8151058823529411, "grad_norm": 0.46197408081097596, "learning_rate": 2.2157851745536693e-06, "loss": 0.012676361203193664, "step": 86605 }, { "epoch": 0.8151529411764706, "grad_norm": 0.4455297133461502, "learning_rate": 2.215721214192876e-06, "loss": 0.014065127074718475, "step": 86610 }, { "epoch": 0.8152, "grad_norm": 0.5541414215345009, "learning_rate": 2.215657259370559e-06, "loss": 0.0148101344704628, "step": 86615 }, { "epoch": 0.8152470588235294, "grad_norm": 0.7453914426894153, "learning_rate": 2.2155933100859193e-06, "loss": 0.01475985050201416, "step": 86620 }, { "epoch": 0.8152941176470588, "grad_norm": 0.26883305715620986, "learning_rate": 2.2155293663381582e-06, "loss": 0.009874506294727326, "step": 86625 }, { "epoch": 0.8153411764705882, "grad_norm": 0.3477640430403637, "learning_rate": 2.215465428126477e-06, "loss": 0.01281644105911255, "step": 86630 }, { "epoch": 0.8153882352941176, "grad_norm": 0.43626542117803785, "learning_rate": 2.215401495450076e-06, "loss": 0.016471248865127564, "step": 86635 }, { "epoch": 0.815435294117647, "grad_norm": 0.7866240884634821, "learning_rate": 2.2153375683081566e-06, "loss": 0.016587093472480774, "step": 86640 }, { "epoch": 0.8154823529411764, "grad_norm": 0.4917038730225363, "learning_rate": 2.2152736466999216e-06, "loss": 0.014655527472496033, "step": 86645 }, { "epoch": 0.8155294117647058, "grad_norm": 0.44460404642507934, "learning_rate": 2.215209730624572e-06, "loss": 0.012228309363126754, "step": 86650 }, { "epoch": 0.8155764705882353, "grad_norm": 0.38797515625575196, "learning_rate": 2.215145820081309e-06, "loss": 0.013728645443916321, "step": 86655 }, { "epoch": 0.8156235294117647, "grad_norm": 0.5660189535488621, "learning_rate": 2.2150819150693355e-06, "loss": 0.01468704640865326, "step": 86660 }, { "epoch": 0.8156705882352941, "grad_norm": 0.3827659399194892, "learning_rate": 2.215018015587853e-06, "loss": 0.018474353849887847, "step": 86665 }, { "epoch": 0.8157176470588235, "grad_norm": 0.37373990716420696, "learning_rate": 2.2149541216360644e-06, "loss": 0.011141812056303024, "step": 86670 }, { "epoch": 0.815764705882353, "grad_norm": 0.6610940621926007, "learning_rate": 2.214890233213172e-06, "loss": 0.015895543992519377, "step": 86675 }, { "epoch": 0.8158117647058823, "grad_norm": 0.3290696490441586, "learning_rate": 2.2148263503183785e-06, "loss": 0.012650589644908904, "step": 86680 }, { "epoch": 0.8158588235294117, "grad_norm": 0.3323696951800186, "learning_rate": 2.214762472950887e-06, "loss": 0.012619408965110778, "step": 86685 }, { "epoch": 0.8159058823529411, "grad_norm": 0.7795364632222642, "learning_rate": 2.2146986011098996e-06, "loss": 0.016818246245384215, "step": 86690 }, { "epoch": 0.8159529411764705, "grad_norm": 0.6808477039034007, "learning_rate": 2.2146347347946206e-06, "loss": 0.017059586942195892, "step": 86695 }, { "epoch": 0.816, "grad_norm": 0.4096925949938097, "learning_rate": 2.2145708740042525e-06, "loss": 0.013534742593765258, "step": 86700 }, { "epoch": 0.8160470588235295, "grad_norm": 0.4449784183362262, "learning_rate": 2.214507018737999e-06, "loss": 0.01509113609790802, "step": 86705 }, { "epoch": 0.8160941176470589, "grad_norm": 0.6740994959414024, "learning_rate": 2.2144431689950636e-06, "loss": 0.01697908192873001, "step": 86710 }, { "epoch": 0.8161411764705883, "grad_norm": 0.5053990651970602, "learning_rate": 2.2143793247746505e-06, "loss": 0.014561036229133606, "step": 86715 }, { "epoch": 0.8161882352941177, "grad_norm": 0.44656080798101017, "learning_rate": 2.2143154860759634e-06, "loss": 0.011085355281829834, "step": 86720 }, { "epoch": 0.8162352941176471, "grad_norm": 0.6693453790354951, "learning_rate": 2.214251652898206e-06, "loss": 0.014102134108543395, "step": 86725 }, { "epoch": 0.8162823529411765, "grad_norm": 0.309124033316342, "learning_rate": 2.2141878252405833e-06, "loss": 0.011315390467643738, "step": 86730 }, { "epoch": 0.8163294117647059, "grad_norm": 0.6108376099509811, "learning_rate": 2.2141240031022996e-06, "loss": 0.014277936518192291, "step": 86735 }, { "epoch": 0.8163764705882353, "grad_norm": 0.28184771658366414, "learning_rate": 2.214060186482559e-06, "loss": 0.016218115389347077, "step": 86740 }, { "epoch": 0.8164235294117647, "grad_norm": 0.4998869622490311, "learning_rate": 2.2139963753805664e-06, "loss": 0.015121793746948243, "step": 86745 }, { "epoch": 0.8164705882352942, "grad_norm": 0.521740517926695, "learning_rate": 2.213932569795527e-06, "loss": 0.017087167501449584, "step": 86750 }, { "epoch": 0.8165176470588236, "grad_norm": 0.5335745206911165, "learning_rate": 2.2138687697266456e-06, "loss": 0.012932822108268738, "step": 86755 }, { "epoch": 0.816564705882353, "grad_norm": 0.34616731387621164, "learning_rate": 2.213804975173128e-06, "loss": 0.013922673463821412, "step": 86760 }, { "epoch": 0.8166117647058824, "grad_norm": 0.37999948140110346, "learning_rate": 2.2137411861341787e-06, "loss": 0.01251130849123001, "step": 86765 }, { "epoch": 0.8166588235294118, "grad_norm": 0.25842295088423506, "learning_rate": 2.2136774026090036e-06, "loss": 0.011798937618732453, "step": 86770 }, { "epoch": 0.8167058823529412, "grad_norm": 0.6752288279668307, "learning_rate": 2.2136136245968094e-06, "loss": 0.01907054930925369, "step": 86775 }, { "epoch": 0.8167529411764706, "grad_norm": 0.38269885478478216, "learning_rate": 2.2135498520968003e-06, "loss": 0.013139489293098449, "step": 86780 }, { "epoch": 0.8168, "grad_norm": 0.5821466910771697, "learning_rate": 2.2134860851081834e-06, "loss": 0.012976467609405518, "step": 86785 }, { "epoch": 0.8168470588235294, "grad_norm": 0.34674493880549384, "learning_rate": 2.2134223236301646e-06, "loss": 0.014213547110557556, "step": 86790 }, { "epoch": 0.8168941176470588, "grad_norm": 0.5798270257416381, "learning_rate": 2.2133585676619507e-06, "loss": 0.015545380115509034, "step": 86795 }, { "epoch": 0.8169411764705883, "grad_norm": 0.5831582821915752, "learning_rate": 2.2132948172027473e-06, "loss": 0.015428313612937927, "step": 86800 }, { "epoch": 0.8169882352941177, "grad_norm": 0.3025911948137943, "learning_rate": 2.213231072251762e-06, "loss": 0.013982442021369935, "step": 86805 }, { "epoch": 0.8170352941176471, "grad_norm": 0.47005878975277043, "learning_rate": 2.213167332808201e-06, "loss": 0.011102955788373947, "step": 86810 }, { "epoch": 0.8170823529411765, "grad_norm": 0.31476791733245096, "learning_rate": 2.2131035988712714e-06, "loss": 0.011296974122524261, "step": 86815 }, { "epoch": 0.8171294117647059, "grad_norm": 0.4350554834778771, "learning_rate": 2.213039870440181e-06, "loss": 0.01904626190662384, "step": 86820 }, { "epoch": 0.8171764705882353, "grad_norm": 0.5014930426762123, "learning_rate": 2.2129761475141365e-06, "loss": 0.012998354434967042, "step": 86825 }, { "epoch": 0.8172235294117647, "grad_norm": 0.3071523819864054, "learning_rate": 2.212912430092345e-06, "loss": 0.019897088408470154, "step": 86830 }, { "epoch": 0.8172705882352941, "grad_norm": 0.4668049418402163, "learning_rate": 2.2128487181740146e-06, "loss": 0.014848113059997559, "step": 86835 }, { "epoch": 0.8173176470588235, "grad_norm": 0.4460690489085286, "learning_rate": 2.212785011758354e-06, "loss": 0.014604434370994568, "step": 86840 }, { "epoch": 0.817364705882353, "grad_norm": 0.49709447460938977, "learning_rate": 2.2127213108445694e-06, "loss": 0.017995427548885345, "step": 86845 }, { "epoch": 0.8174117647058824, "grad_norm": 0.4281083137840979, "learning_rate": 2.21265761543187e-06, "loss": 0.017197494208812714, "step": 86850 }, { "epoch": 0.8174588235294118, "grad_norm": 0.5007806454651746, "learning_rate": 2.2125939255194642e-06, "loss": 0.015428154170513153, "step": 86855 }, { "epoch": 0.8175058823529412, "grad_norm": 0.7195497189946142, "learning_rate": 2.21253024110656e-06, "loss": 0.017377927899360657, "step": 86860 }, { "epoch": 0.8175529411764706, "grad_norm": 0.5145203349732104, "learning_rate": 2.2124665621923655e-06, "loss": 0.011369875818490981, "step": 86865 }, { "epoch": 0.8176, "grad_norm": 0.8130923650101193, "learning_rate": 2.2124028887760906e-06, "loss": 0.017439582943916322, "step": 86870 }, { "epoch": 0.8176470588235294, "grad_norm": 0.5721376467038982, "learning_rate": 2.2123392208569436e-06, "loss": 0.015364545583724975, "step": 86875 }, { "epoch": 0.8176941176470588, "grad_norm": 0.41082605000807493, "learning_rate": 2.2122755584341336e-06, "loss": 0.015588000416755676, "step": 86880 }, { "epoch": 0.8177411764705882, "grad_norm": 0.5702711907195975, "learning_rate": 2.2122119015068696e-06, "loss": 0.013440196216106415, "step": 86885 }, { "epoch": 0.8177882352941176, "grad_norm": 0.48596984412534466, "learning_rate": 2.2121482500743617e-06, "loss": 0.01144343614578247, "step": 86890 }, { "epoch": 0.8178352941176471, "grad_norm": 0.47650455083390475, "learning_rate": 2.2120846041358184e-06, "loss": 0.018563762307167053, "step": 86895 }, { "epoch": 0.8178823529411765, "grad_norm": 0.44115551627858474, "learning_rate": 2.21202096369045e-06, "loss": 0.01551627516746521, "step": 86900 }, { "epoch": 0.8179294117647059, "grad_norm": 0.5498087036653704, "learning_rate": 2.2119573287374667e-06, "loss": 0.015871921181678773, "step": 86905 }, { "epoch": 0.8179764705882353, "grad_norm": 0.5998502154611376, "learning_rate": 2.2118936992760786e-06, "loss": 0.016081631183624268, "step": 86910 }, { "epoch": 0.8180235294117647, "grad_norm": 0.44537352813216374, "learning_rate": 2.211830075305495e-06, "loss": 0.016740745306015013, "step": 86915 }, { "epoch": 0.8180705882352941, "grad_norm": 0.5301995419231353, "learning_rate": 2.2117664568249267e-06, "loss": 0.021223951876163483, "step": 86920 }, { "epoch": 0.8181176470588235, "grad_norm": 0.4979770934274535, "learning_rate": 2.2117028438335844e-06, "loss": 0.015164582431316376, "step": 86925 }, { "epoch": 0.8181647058823529, "grad_norm": 0.6415466926580771, "learning_rate": 2.2116392363306784e-06, "loss": 0.01114729791879654, "step": 86930 }, { "epoch": 0.8182117647058823, "grad_norm": 0.3376413714833204, "learning_rate": 2.21157563431542e-06, "loss": 0.01406971514225006, "step": 86935 }, { "epoch": 0.8182588235294118, "grad_norm": 0.47345101154732167, "learning_rate": 2.2115120377870192e-06, "loss": 0.012826581299304963, "step": 86940 }, { "epoch": 0.8183058823529412, "grad_norm": 0.6021719030930756, "learning_rate": 2.211448446744689e-06, "loss": 0.018811962008476256, "step": 86945 }, { "epoch": 0.8183529411764706, "grad_norm": 0.48051759012937073, "learning_rate": 2.2113848611876387e-06, "loss": 0.015135517716407776, "step": 86950 }, { "epoch": 0.8184, "grad_norm": 0.8570095665262402, "learning_rate": 2.2113212811150808e-06, "loss": 0.01611292064189911, "step": 86955 }, { "epoch": 0.8184470588235294, "grad_norm": 0.4953674858627482, "learning_rate": 2.211257706526227e-06, "loss": 0.01186319813132286, "step": 86960 }, { "epoch": 0.8184941176470588, "grad_norm": 0.6734101970642653, "learning_rate": 2.2111941374202883e-06, "loss": 0.012874317169189454, "step": 86965 }, { "epoch": 0.8185411764705882, "grad_norm": 0.308372451000309, "learning_rate": 2.211130573796477e-06, "loss": 0.015938781201839447, "step": 86970 }, { "epoch": 0.8185882352941176, "grad_norm": 0.5620862219479809, "learning_rate": 2.2110670156540053e-06, "loss": 0.016691525280475617, "step": 86975 }, { "epoch": 0.818635294117647, "grad_norm": 0.7756563820404375, "learning_rate": 2.211003462992086e-06, "loss": 0.016707552969455718, "step": 86980 }, { "epoch": 0.8186823529411764, "grad_norm": 0.3878343832526474, "learning_rate": 2.2109399158099303e-06, "loss": 0.013227669894695282, "step": 86985 }, { "epoch": 0.8187294117647059, "grad_norm": 0.3513767594503693, "learning_rate": 2.2108763741067523e-06, "loss": 0.016840484738349915, "step": 86990 }, { "epoch": 0.8187764705882353, "grad_norm": 0.5207077464822162, "learning_rate": 2.2108128378817634e-06, "loss": 0.012209173291921616, "step": 86995 }, { "epoch": 0.8188235294117647, "grad_norm": 0.4862285468897839, "learning_rate": 2.2107493071341765e-06, "loss": 0.013878163695335389, "step": 87000 }, { "epoch": 0.8188705882352941, "grad_norm": 0.6330095377421132, "learning_rate": 2.2106857818632056e-06, "loss": 0.014876073598861695, "step": 87005 }, { "epoch": 0.8189176470588235, "grad_norm": 0.4533874570459852, "learning_rate": 2.2106222620680635e-06, "loss": 0.012242990732192992, "step": 87010 }, { "epoch": 0.8189647058823529, "grad_norm": 0.5025787086175801, "learning_rate": 2.2105587477479627e-06, "loss": 0.01627824306488037, "step": 87015 }, { "epoch": 0.8190117647058823, "grad_norm": 0.5053913674336772, "learning_rate": 2.2104952389021178e-06, "loss": 0.014124256372451783, "step": 87020 }, { "epoch": 0.8190588235294117, "grad_norm": 0.6431689431240862, "learning_rate": 2.2104317355297425e-06, "loss": 0.011876089870929718, "step": 87025 }, { "epoch": 0.8191058823529411, "grad_norm": 0.4270954378241352, "learning_rate": 2.21036823763005e-06, "loss": 0.01395616978406906, "step": 87030 }, { "epoch": 0.8191529411764706, "grad_norm": 0.31670682079702694, "learning_rate": 2.2103047452022545e-06, "loss": 0.014342659711837768, "step": 87035 }, { "epoch": 0.8192, "grad_norm": 0.5017927593793308, "learning_rate": 2.2102412582455705e-06, "loss": 0.014369653165340423, "step": 87040 }, { "epoch": 0.8192470588235294, "grad_norm": 0.43072821601871286, "learning_rate": 2.2101777767592116e-06, "loss": 0.015328028798103332, "step": 87045 }, { "epoch": 0.8192941176470588, "grad_norm": 0.4550977043969822, "learning_rate": 2.210114300742393e-06, "loss": 0.013969945907592773, "step": 87050 }, { "epoch": 0.8193411764705882, "grad_norm": 0.5859019547449897, "learning_rate": 2.2100508301943284e-06, "loss": 0.01910809278488159, "step": 87055 }, { "epoch": 0.8193882352941176, "grad_norm": 0.5912187372949331, "learning_rate": 2.2099873651142336e-06, "loss": 0.017517223954200745, "step": 87060 }, { "epoch": 0.819435294117647, "grad_norm": 0.6296338908437847, "learning_rate": 2.209923905501323e-06, "loss": 0.015523169934749604, "step": 87065 }, { "epoch": 0.8194823529411764, "grad_norm": 0.4190559113857082, "learning_rate": 2.2098604513548124e-06, "loss": 0.014388185739517213, "step": 87070 }, { "epoch": 0.8195294117647058, "grad_norm": 0.41012255727699715, "learning_rate": 2.209797002673916e-06, "loss": 0.015389478206634522, "step": 87075 }, { "epoch": 0.8195764705882352, "grad_norm": 0.5843493577700294, "learning_rate": 2.2097335594578493e-06, "loss": 0.01549486517906189, "step": 87080 }, { "epoch": 0.8196235294117648, "grad_norm": 0.3832903795812621, "learning_rate": 2.2096701217058285e-06, "loss": 0.011544285714626313, "step": 87085 }, { "epoch": 0.8196705882352942, "grad_norm": 0.4344659006485508, "learning_rate": 2.209606689417069e-06, "loss": 0.011544305086135864, "step": 87090 }, { "epoch": 0.8197176470588236, "grad_norm": 0.7115378927931757, "learning_rate": 2.209543262590787e-06, "loss": 0.020699293911457063, "step": 87095 }, { "epoch": 0.819764705882353, "grad_norm": 0.7201520924058904, "learning_rate": 2.2094798412261983e-06, "loss": 0.015094515681266785, "step": 87100 }, { "epoch": 0.8198117647058824, "grad_norm": 0.49476255488107146, "learning_rate": 2.2094164253225194e-06, "loss": 0.013209055364131927, "step": 87105 }, { "epoch": 0.8198588235294118, "grad_norm": 0.5855128774763682, "learning_rate": 2.2093530148789652e-06, "loss": 0.015426467359066009, "step": 87110 }, { "epoch": 0.8199058823529412, "grad_norm": 0.6545396870720129, "learning_rate": 2.209289609894755e-06, "loss": 0.018121686577796937, "step": 87115 }, { "epoch": 0.8199529411764706, "grad_norm": 0.5521682768460864, "learning_rate": 2.209226210369102e-06, "loss": 0.01647656559944153, "step": 87120 }, { "epoch": 0.82, "grad_norm": 0.5393666406117936, "learning_rate": 2.2091628163012263e-06, "loss": 0.014193037152290344, "step": 87125 }, { "epoch": 0.8200470588235295, "grad_norm": 0.4173697136616706, "learning_rate": 2.209099427690343e-06, "loss": 0.013281041383743286, "step": 87130 }, { "epoch": 0.8200941176470589, "grad_norm": 0.4586482869107235, "learning_rate": 2.20903604453567e-06, "loss": 0.01691671311855316, "step": 87135 }, { "epoch": 0.8201411764705883, "grad_norm": 0.3350333535865609, "learning_rate": 2.2089726668364235e-06, "loss": 0.012825459241867065, "step": 87140 }, { "epoch": 0.8201882352941177, "grad_norm": 0.3941551849469146, "learning_rate": 2.2089092945918225e-06, "loss": 0.012066550552845001, "step": 87145 }, { "epoch": 0.8202352941176471, "grad_norm": 0.44199577342058843, "learning_rate": 2.2088459278010833e-06, "loss": 0.01469050794839859, "step": 87150 }, { "epoch": 0.8202823529411765, "grad_norm": 0.6078613965569976, "learning_rate": 2.208782566463425e-06, "loss": 0.01783142387866974, "step": 87155 }, { "epoch": 0.8203294117647059, "grad_norm": 0.6504332274165958, "learning_rate": 2.2087192105780637e-06, "loss": 0.014621537923812867, "step": 87160 }, { "epoch": 0.8203764705882353, "grad_norm": 0.5597236009134017, "learning_rate": 2.2086558601442197e-06, "loss": 0.0137405663728714, "step": 87165 }, { "epoch": 0.8204235294117647, "grad_norm": 0.5685590204674001, "learning_rate": 2.2085925151611094e-06, "loss": 0.012704791128635406, "step": 87170 }, { "epoch": 0.8204705882352941, "grad_norm": 0.2608506834823542, "learning_rate": 2.2085291756279523e-06, "loss": 0.011418107897043228, "step": 87175 }, { "epoch": 0.8205176470588236, "grad_norm": 0.3964003688952634, "learning_rate": 2.208465841543966e-06, "loss": 0.017737424373626708, "step": 87180 }, { "epoch": 0.820564705882353, "grad_norm": 0.5000641575783535, "learning_rate": 2.20840251290837e-06, "loss": 0.016291074454784393, "step": 87185 }, { "epoch": 0.8206117647058824, "grad_norm": 0.45139498091501373, "learning_rate": 2.208339189720383e-06, "loss": 0.014015901088714599, "step": 87190 }, { "epoch": 0.8206588235294118, "grad_norm": 0.34969035643239277, "learning_rate": 2.2082758719792242e-06, "loss": 0.011293420195579528, "step": 87195 }, { "epoch": 0.8207058823529412, "grad_norm": 0.3449159205030897, "learning_rate": 2.2082125596841127e-06, "loss": 0.018527036905288695, "step": 87200 }, { "epoch": 0.8207529411764706, "grad_norm": 0.5069185221687017, "learning_rate": 2.2081492528342673e-06, "loss": 0.014955905079841614, "step": 87205 }, { "epoch": 0.8208, "grad_norm": 0.3617341975735414, "learning_rate": 2.208085951428908e-06, "loss": 0.010290531814098359, "step": 87210 }, { "epoch": 0.8208470588235294, "grad_norm": 0.6180758929758973, "learning_rate": 2.2080226554672543e-06, "loss": 0.013779902458190918, "step": 87215 }, { "epoch": 0.8208941176470588, "grad_norm": 0.5794860136432421, "learning_rate": 2.2079593649485264e-06, "loss": 0.01685284674167633, "step": 87220 }, { "epoch": 0.8209411764705883, "grad_norm": 0.5240801277955313, "learning_rate": 2.2078960798719433e-06, "loss": 0.013793128728866576, "step": 87225 }, { "epoch": 0.8209882352941177, "grad_norm": 0.5206936456606435, "learning_rate": 2.2078328002367265e-06, "loss": 0.013333010673522949, "step": 87230 }, { "epoch": 0.8210352941176471, "grad_norm": 0.8292401928087507, "learning_rate": 2.207769526042095e-06, "loss": 0.01839317977428436, "step": 87235 }, { "epoch": 0.8210823529411765, "grad_norm": 0.4737051192465107, "learning_rate": 2.20770625728727e-06, "loss": 0.014831620454788207, "step": 87240 }, { "epoch": 0.8211294117647059, "grad_norm": 0.6868209331427888, "learning_rate": 2.207642993971472e-06, "loss": 0.01726810187101364, "step": 87245 }, { "epoch": 0.8211764705882353, "grad_norm": 0.7059318720024291, "learning_rate": 2.2075797360939216e-06, "loss": 0.013757939636707305, "step": 87250 }, { "epoch": 0.8212235294117647, "grad_norm": 0.4485263433315796, "learning_rate": 2.2075164836538394e-06, "loss": 0.012422728538513183, "step": 87255 }, { "epoch": 0.8212705882352941, "grad_norm": 0.443898819320045, "learning_rate": 2.207453236650447e-06, "loss": 0.012211918085813522, "step": 87260 }, { "epoch": 0.8213176470588235, "grad_norm": 0.536388447295105, "learning_rate": 2.2073899950829652e-06, "loss": 0.021039193868637084, "step": 87265 }, { "epoch": 0.8213647058823529, "grad_norm": 0.8035319668503751, "learning_rate": 2.2073267589506157e-06, "loss": 0.012618377804756165, "step": 87270 }, { "epoch": 0.8214117647058824, "grad_norm": 0.4694819282127514, "learning_rate": 2.20726352825262e-06, "loss": 0.018259425461292268, "step": 87275 }, { "epoch": 0.8214588235294118, "grad_norm": 0.24456262072950694, "learning_rate": 2.2072003029881993e-06, "loss": 0.014143000543117522, "step": 87280 }, { "epoch": 0.8215058823529412, "grad_norm": 0.49539877059692916, "learning_rate": 2.2071370831565765e-06, "loss": 0.0107060045003891, "step": 87285 }, { "epoch": 0.8215529411764706, "grad_norm": 0.4779413060461829, "learning_rate": 2.2070738687569727e-06, "loss": 0.011478616297245026, "step": 87290 }, { "epoch": 0.8216, "grad_norm": 0.29914995507107894, "learning_rate": 2.20701065978861e-06, "loss": 0.014335730671882629, "step": 87295 }, { "epoch": 0.8216470588235294, "grad_norm": 0.6779094306125091, "learning_rate": 2.2069474562507114e-06, "loss": 0.01228061318397522, "step": 87300 }, { "epoch": 0.8216941176470588, "grad_norm": 0.2619430791076436, "learning_rate": 2.206884258142499e-06, "loss": 0.011773331463336945, "step": 87305 }, { "epoch": 0.8217411764705882, "grad_norm": 0.4053735199338114, "learning_rate": 2.206821065463195e-06, "loss": 0.015285283327102661, "step": 87310 }, { "epoch": 0.8217882352941176, "grad_norm": 0.5573147769280327, "learning_rate": 2.206757878212023e-06, "loss": 0.01593039631843567, "step": 87315 }, { "epoch": 0.8218352941176471, "grad_norm": 0.635294002802553, "learning_rate": 2.2066946963882047e-06, "loss": 0.014746005833148956, "step": 87320 }, { "epoch": 0.8218823529411765, "grad_norm": 0.3945267598297605, "learning_rate": 2.206631519990965e-06, "loss": 0.013270735740661621, "step": 87325 }, { "epoch": 0.8219294117647059, "grad_norm": 0.6755444247006287, "learning_rate": 2.206568349019526e-06, "loss": 0.013238266110420227, "step": 87330 }, { "epoch": 0.8219764705882353, "grad_norm": 0.7478320954113258, "learning_rate": 2.206505183473111e-06, "loss": 0.013704200088977814, "step": 87335 }, { "epoch": 0.8220235294117647, "grad_norm": 0.6314531252588919, "learning_rate": 2.2064420233509434e-06, "loss": 0.0205498069524765, "step": 87340 }, { "epoch": 0.8220705882352941, "grad_norm": 0.572963940218779, "learning_rate": 2.2063788686522477e-06, "loss": 0.012467361986637115, "step": 87345 }, { "epoch": 0.8221176470588235, "grad_norm": 0.3767654515005089, "learning_rate": 2.2063157193762473e-06, "loss": 0.012840229272842407, "step": 87350 }, { "epoch": 0.8221647058823529, "grad_norm": 0.43329912312368524, "learning_rate": 2.206252575522166e-06, "loss": 0.01373913288116455, "step": 87355 }, { "epoch": 0.8222117647058823, "grad_norm": 0.3740155836465664, "learning_rate": 2.206189437089229e-06, "loss": 0.01351657509803772, "step": 87360 }, { "epoch": 0.8222588235294118, "grad_norm": 0.4521234290973745, "learning_rate": 2.2061263040766595e-06, "loss": 0.016895893216133117, "step": 87365 }, { "epoch": 0.8223058823529412, "grad_norm": 0.4747069391359263, "learning_rate": 2.2060631764836827e-06, "loss": 0.014594297111034393, "step": 87370 }, { "epoch": 0.8223529411764706, "grad_norm": 0.5729323886320328, "learning_rate": 2.206000054309522e-06, "loss": 0.016910535097122193, "step": 87375 }, { "epoch": 0.8224, "grad_norm": 0.5234795333956844, "learning_rate": 2.205936937553404e-06, "loss": 0.01624332368373871, "step": 87380 }, { "epoch": 0.8224470588235294, "grad_norm": 0.4219046043685404, "learning_rate": 2.205873826214553e-06, "loss": 0.014724609255790711, "step": 87385 }, { "epoch": 0.8224941176470588, "grad_norm": 0.3943995007078762, "learning_rate": 2.205810720292194e-06, "loss": 0.014280647039413452, "step": 87390 }, { "epoch": 0.8225411764705882, "grad_norm": 0.2952219248712623, "learning_rate": 2.205747619785551e-06, "loss": 0.014418306946754455, "step": 87395 }, { "epoch": 0.8225882352941176, "grad_norm": 0.5702481977547591, "learning_rate": 2.205684524693852e-06, "loss": 0.013584589958190918, "step": 87400 }, { "epoch": 0.822635294117647, "grad_norm": 0.4410582847202089, "learning_rate": 2.2056214350163203e-06, "loss": 0.011801834404468536, "step": 87405 }, { "epoch": 0.8226823529411764, "grad_norm": 0.4976823624814203, "learning_rate": 2.205558350752183e-06, "loss": 0.01607571542263031, "step": 87410 }, { "epoch": 0.8227294117647059, "grad_norm": 0.7358679518097008, "learning_rate": 2.205495271900665e-06, "loss": 0.015653395652770997, "step": 87415 }, { "epoch": 0.8227764705882353, "grad_norm": 0.3708213057929655, "learning_rate": 2.2054321984609932e-06, "loss": 0.011944874376058578, "step": 87420 }, { "epoch": 0.8228235294117647, "grad_norm": 0.6232525794264252, "learning_rate": 2.205369130432393e-06, "loss": 0.014476174116134643, "step": 87425 }, { "epoch": 0.8228705882352941, "grad_norm": 0.4485655820740173, "learning_rate": 2.2053060678140914e-06, "loss": 0.013414442539215088, "step": 87430 }, { "epoch": 0.8229176470588235, "grad_norm": 0.4321673693535195, "learning_rate": 2.2052430106053148e-06, "loss": 0.015067169070243835, "step": 87435 }, { "epoch": 0.8229647058823529, "grad_norm": 0.4491833982296454, "learning_rate": 2.2051799588052903e-06, "loss": 0.013571247458457947, "step": 87440 }, { "epoch": 0.8230117647058823, "grad_norm": 0.6743530863960328, "learning_rate": 2.205116912413243e-06, "loss": 0.01776466965675354, "step": 87445 }, { "epoch": 0.8230588235294117, "grad_norm": 0.4342360850111246, "learning_rate": 2.2050538714284017e-06, "loss": 0.013270270824432374, "step": 87450 }, { "epoch": 0.8231058823529411, "grad_norm": 0.9377863022316535, "learning_rate": 2.204990835849993e-06, "loss": 0.017036136984825135, "step": 87455 }, { "epoch": 0.8231529411764706, "grad_norm": 0.5910705546179489, "learning_rate": 2.204927805677244e-06, "loss": 0.014485487341880798, "step": 87460 }, { "epoch": 0.8232, "grad_norm": 0.7907025663007752, "learning_rate": 2.204864780909382e-06, "loss": 0.015034577250480652, "step": 87465 }, { "epoch": 0.8232470588235294, "grad_norm": 0.5490736952608389, "learning_rate": 2.2048017615456345e-06, "loss": 0.013394108414649964, "step": 87470 }, { "epoch": 0.8232941176470588, "grad_norm": 0.7032472511380161, "learning_rate": 2.20473874758523e-06, "loss": 0.017133677005767824, "step": 87475 }, { "epoch": 0.8233411764705882, "grad_norm": 0.4535156085233402, "learning_rate": 2.204675739027396e-06, "loss": 0.014424806833267212, "step": 87480 }, { "epoch": 0.8233882352941176, "grad_norm": 0.35407192686213923, "learning_rate": 2.20461273587136e-06, "loss": 0.015981581807136536, "step": 87485 }, { "epoch": 0.823435294117647, "grad_norm": 0.3320210242106963, "learning_rate": 2.204549738116351e-06, "loss": 0.012283427268266678, "step": 87490 }, { "epoch": 0.8234823529411764, "grad_norm": 0.5642581422128107, "learning_rate": 2.2044867457615965e-06, "loss": 0.015120859444141387, "step": 87495 }, { "epoch": 0.8235294117647058, "grad_norm": 0.4300682672225031, "learning_rate": 2.204423758806326e-06, "loss": 0.013459798693656922, "step": 87500 }, { "epoch": 0.8235764705882352, "grad_norm": 0.4971494661161941, "learning_rate": 2.2043607772497675e-06, "loss": 0.015956556797027587, "step": 87505 }, { "epoch": 0.8236235294117648, "grad_norm": 0.47073710182151235, "learning_rate": 2.2042978010911503e-06, "loss": 0.017448550462722777, "step": 87510 }, { "epoch": 0.8236705882352942, "grad_norm": 0.32891913790230315, "learning_rate": 2.204234830329703e-06, "loss": 0.012550896406173706, "step": 87515 }, { "epoch": 0.8237176470588236, "grad_norm": 0.47917814718257834, "learning_rate": 2.2041718649646545e-06, "loss": 0.014247065782546997, "step": 87520 }, { "epoch": 0.823764705882353, "grad_norm": 0.5186021554543488, "learning_rate": 2.204108904995235e-06, "loss": 0.016852053999900817, "step": 87525 }, { "epoch": 0.8238117647058824, "grad_norm": 0.27776751808904193, "learning_rate": 2.204045950420673e-06, "loss": 0.011044377833604813, "step": 87530 }, { "epoch": 0.8238588235294118, "grad_norm": 0.6259370488324589, "learning_rate": 2.2039830012401987e-06, "loss": 0.015217271447181702, "step": 87535 }, { "epoch": 0.8239058823529412, "grad_norm": 0.5051768008465606, "learning_rate": 2.2039200574530415e-06, "loss": 0.012628936767578125, "step": 87540 }, { "epoch": 0.8239529411764706, "grad_norm": 0.7156494978453436, "learning_rate": 2.203857119058431e-06, "loss": 0.015914586186408997, "step": 87545 }, { "epoch": 0.824, "grad_norm": 0.5291382761960223, "learning_rate": 2.2037941860555982e-06, "loss": 0.013989238440990448, "step": 87550 }, { "epoch": 0.8240470588235295, "grad_norm": 0.5989222287768189, "learning_rate": 2.2037312584437727e-06, "loss": 0.01775764226913452, "step": 87555 }, { "epoch": 0.8240941176470589, "grad_norm": 0.19548791565271442, "learning_rate": 2.203668336222185e-06, "loss": 0.008847919851541519, "step": 87560 }, { "epoch": 0.8241411764705883, "grad_norm": 0.7073186068306171, "learning_rate": 2.203605419390065e-06, "loss": 0.01452776938676834, "step": 87565 }, { "epoch": 0.8241882352941177, "grad_norm": 0.5764396902609165, "learning_rate": 2.2035425079466443e-06, "loss": 0.01566529870033264, "step": 87570 }, { "epoch": 0.8242352941176471, "grad_norm": 0.7994216127074525, "learning_rate": 2.203479601891154e-06, "loss": 0.015730792284011842, "step": 87575 }, { "epoch": 0.8242823529411765, "grad_norm": 0.5355076055672776, "learning_rate": 2.2034167012228233e-06, "loss": 0.01709800660610199, "step": 87580 }, { "epoch": 0.8243294117647059, "grad_norm": 0.4030660678500889, "learning_rate": 2.203353805940885e-06, "loss": 0.013396164774894715, "step": 87585 }, { "epoch": 0.8243764705882353, "grad_norm": 0.2848309751197564, "learning_rate": 2.2032909160445703e-06, "loss": 0.01343342661857605, "step": 87590 }, { "epoch": 0.8244235294117647, "grad_norm": 0.5768116708011556, "learning_rate": 2.2032280315331095e-06, "loss": 0.016286739706993104, "step": 87595 }, { "epoch": 0.8244705882352941, "grad_norm": 0.4532813092285682, "learning_rate": 2.2031651524057353e-06, "loss": 0.011967174708843231, "step": 87600 }, { "epoch": 0.8245176470588236, "grad_norm": 0.3940842558360113, "learning_rate": 2.2031022786616786e-06, "loss": 0.014359459280967712, "step": 87605 }, { "epoch": 0.824564705882353, "grad_norm": 0.3636081338651623, "learning_rate": 2.2030394103001723e-06, "loss": 0.015305644273757935, "step": 87610 }, { "epoch": 0.8246117647058824, "grad_norm": 0.6202811332552594, "learning_rate": 2.2029765473204475e-06, "loss": 0.009930352866649627, "step": 87615 }, { "epoch": 0.8246588235294118, "grad_norm": 0.40476763563964574, "learning_rate": 2.202913689721737e-06, "loss": 0.014214371144771577, "step": 87620 }, { "epoch": 0.8247058823529412, "grad_norm": 0.4464213855531125, "learning_rate": 2.202850837503273e-06, "loss": 0.016908809542655945, "step": 87625 }, { "epoch": 0.8247529411764706, "grad_norm": 0.4703626747236305, "learning_rate": 2.202787990664288e-06, "loss": 0.016140078008174897, "step": 87630 }, { "epoch": 0.8248, "grad_norm": 0.37003144878716443, "learning_rate": 2.2027251492040144e-06, "loss": 0.0143808513879776, "step": 87635 }, { "epoch": 0.8248470588235294, "grad_norm": 0.5405829125584907, "learning_rate": 2.2026623131216853e-06, "loss": 0.023408207297325134, "step": 87640 }, { "epoch": 0.8248941176470588, "grad_norm": 0.4857082433095435, "learning_rate": 2.202599482416534e-06, "loss": 0.01317189186811447, "step": 87645 }, { "epoch": 0.8249411764705883, "grad_norm": 0.48951283464187983, "learning_rate": 2.202536657087793e-06, "loss": 0.014964506030082703, "step": 87650 }, { "epoch": 0.8249882352941177, "grad_norm": 0.4380089729642455, "learning_rate": 2.2024738371346955e-06, "loss": 0.01863154172897339, "step": 87655 }, { "epoch": 0.8250352941176471, "grad_norm": 0.5362489093799466, "learning_rate": 2.202411022556476e-06, "loss": 0.012628388404846192, "step": 87660 }, { "epoch": 0.8250823529411765, "grad_norm": 0.743290801976303, "learning_rate": 2.2023482133523665e-06, "loss": 0.014322769641876221, "step": 87665 }, { "epoch": 0.8251294117647059, "grad_norm": 0.5377573412426406, "learning_rate": 2.2022854095216024e-06, "loss": 0.013124682009220123, "step": 87670 }, { "epoch": 0.8251764705882353, "grad_norm": 0.3902040037480864, "learning_rate": 2.202222611063416e-06, "loss": 0.012723404169082641, "step": 87675 }, { "epoch": 0.8252235294117647, "grad_norm": 0.5118584358242123, "learning_rate": 2.202159817977043e-06, "loss": 0.014737577736377716, "step": 87680 }, { "epoch": 0.8252705882352941, "grad_norm": 0.6279701706194889, "learning_rate": 2.202097030261716e-06, "loss": 0.016365762054920196, "step": 87685 }, { "epoch": 0.8253176470588235, "grad_norm": 0.4422883458845333, "learning_rate": 2.2020342479166706e-06, "loss": 0.014176487922668457, "step": 87690 }, { "epoch": 0.8253647058823529, "grad_norm": 0.35781814391734623, "learning_rate": 2.2019714709411403e-06, "loss": 0.011234940588474273, "step": 87695 }, { "epoch": 0.8254117647058824, "grad_norm": 0.4326850076243435, "learning_rate": 2.2019086993343607e-06, "loss": 0.015062381327152253, "step": 87700 }, { "epoch": 0.8254588235294118, "grad_norm": 0.7861752731165341, "learning_rate": 2.2018459330955664e-06, "loss": 0.012089738249778747, "step": 87705 }, { "epoch": 0.8255058823529412, "grad_norm": 0.3114833695429571, "learning_rate": 2.2017831722239917e-06, "loss": 0.013254216313362122, "step": 87710 }, { "epoch": 0.8255529411764706, "grad_norm": 0.37613241348212145, "learning_rate": 2.2017204167188718e-06, "loss": 0.016305361688137055, "step": 87715 }, { "epoch": 0.8256, "grad_norm": 0.496450436654423, "learning_rate": 2.201657666579442e-06, "loss": 0.012760362029075623, "step": 87720 }, { "epoch": 0.8256470588235294, "grad_norm": 0.36741538860122475, "learning_rate": 2.201594921804939e-06, "loss": 0.013294699788093566, "step": 87725 }, { "epoch": 0.8256941176470588, "grad_norm": 0.3343899995638741, "learning_rate": 2.201532182394597e-06, "loss": 0.015283030271530152, "step": 87730 }, { "epoch": 0.8257411764705882, "grad_norm": 0.7070113867302319, "learning_rate": 2.201469448347652e-06, "loss": 0.014958585798740386, "step": 87735 }, { "epoch": 0.8257882352941176, "grad_norm": 0.33442077755350064, "learning_rate": 2.2014067196633404e-06, "loss": 0.015099909901618958, "step": 87740 }, { "epoch": 0.8258352941176471, "grad_norm": 0.4385212918721796, "learning_rate": 2.2013439963408977e-06, "loss": 0.01536971926689148, "step": 87745 }, { "epoch": 0.8258823529411765, "grad_norm": 0.4652403740077454, "learning_rate": 2.2012812783795598e-06, "loss": 0.011236689984798431, "step": 87750 }, { "epoch": 0.8259294117647059, "grad_norm": 0.5651462376184673, "learning_rate": 2.2012185657785636e-06, "loss": 0.01982834339141846, "step": 87755 }, { "epoch": 0.8259764705882353, "grad_norm": 0.6015256311421342, "learning_rate": 2.2011558585371455e-06, "loss": 0.015523779392242431, "step": 87760 }, { "epoch": 0.8260235294117647, "grad_norm": 0.42055496526811986, "learning_rate": 2.201093156654542e-06, "loss": 0.012891216576099396, "step": 87765 }, { "epoch": 0.8260705882352941, "grad_norm": 0.4139626379080195, "learning_rate": 2.20103046012999e-06, "loss": 0.015827611088752747, "step": 87770 }, { "epoch": 0.8261176470588235, "grad_norm": 0.5079366454056973, "learning_rate": 2.2009677689627264e-06, "loss": 0.010079031437635421, "step": 87775 }, { "epoch": 0.8261647058823529, "grad_norm": 0.560158747955139, "learning_rate": 2.200905083151988e-06, "loss": 0.015393853187561035, "step": 87780 }, { "epoch": 0.8262117647058823, "grad_norm": 0.4968301883773177, "learning_rate": 2.200842402697013e-06, "loss": 0.015757268667221068, "step": 87785 }, { "epoch": 0.8262588235294117, "grad_norm": 0.31888337969621794, "learning_rate": 2.2007797275970375e-06, "loss": 0.015288041532039642, "step": 87790 }, { "epoch": 0.8263058823529412, "grad_norm": 0.5371402080755602, "learning_rate": 2.2007170578512998e-06, "loss": 0.015589727461338044, "step": 87795 }, { "epoch": 0.8263529411764706, "grad_norm": 0.41780475731911454, "learning_rate": 2.2006543934590372e-06, "loss": 0.014243999123573303, "step": 87800 }, { "epoch": 0.8264, "grad_norm": 0.5061857099574004, "learning_rate": 2.2005917344194884e-06, "loss": 0.013935932517051696, "step": 87805 }, { "epoch": 0.8264470588235294, "grad_norm": 0.604402022550445, "learning_rate": 2.2005290807318906e-06, "loss": 0.015385851263999939, "step": 87810 }, { "epoch": 0.8264941176470588, "grad_norm": 0.466527747929827, "learning_rate": 2.2004664323954824e-06, "loss": 0.015829962491989136, "step": 87815 }, { "epoch": 0.8265411764705882, "grad_norm": 0.2927278086952824, "learning_rate": 2.2004037894095017e-06, "loss": 0.011923037469387054, "step": 87820 }, { "epoch": 0.8265882352941176, "grad_norm": 0.39973003158753767, "learning_rate": 2.200341151773187e-06, "loss": 0.013529469072818757, "step": 87825 }, { "epoch": 0.826635294117647, "grad_norm": 0.4206092998918242, "learning_rate": 2.2002785194857772e-06, "loss": 0.013105377554893494, "step": 87830 }, { "epoch": 0.8266823529411764, "grad_norm": 0.4086796205018429, "learning_rate": 2.2002158925465113e-06, "loss": 0.01182377189397812, "step": 87835 }, { "epoch": 0.8267294117647059, "grad_norm": 0.6682594394452566, "learning_rate": 2.200153270954628e-06, "loss": 0.01589842140674591, "step": 87840 }, { "epoch": 0.8267764705882353, "grad_norm": 0.3996760366616294, "learning_rate": 2.200090654709365e-06, "loss": 0.01435348391532898, "step": 87845 }, { "epoch": 0.8268235294117647, "grad_norm": 0.37713432387393264, "learning_rate": 2.200028043809964e-06, "loss": 0.01792082190513611, "step": 87850 }, { "epoch": 0.8268705882352941, "grad_norm": 0.735704186820637, "learning_rate": 2.199965438255663e-06, "loss": 0.017959409952163698, "step": 87855 }, { "epoch": 0.8269176470588235, "grad_norm": 0.36069901205626337, "learning_rate": 2.1999028380457015e-06, "loss": 0.016156136989593506, "step": 87860 }, { "epoch": 0.8269647058823529, "grad_norm": 0.5436795619146636, "learning_rate": 2.199840243179319e-06, "loss": 0.01229168325662613, "step": 87865 }, { "epoch": 0.8270117647058823, "grad_norm": 0.6778398995429282, "learning_rate": 2.1997776536557558e-06, "loss": 0.013826538622379304, "step": 87870 }, { "epoch": 0.8270588235294117, "grad_norm": 0.5028133618721584, "learning_rate": 2.199715069474252e-06, "loss": 0.016816726326942442, "step": 87875 }, { "epoch": 0.8271058823529411, "grad_norm": 0.4101751559436669, "learning_rate": 2.199652490634047e-06, "loss": 0.015221455693244934, "step": 87880 }, { "epoch": 0.8271529411764705, "grad_norm": 0.3627311873125421, "learning_rate": 2.1995899171343816e-06, "loss": 0.016572314500808715, "step": 87885 }, { "epoch": 0.8272, "grad_norm": 0.3495091604648553, "learning_rate": 2.199527348974497e-06, "loss": 0.012472291290760041, "step": 87890 }, { "epoch": 0.8272470588235294, "grad_norm": 0.3085743224463332, "learning_rate": 2.199464786153632e-06, "loss": 0.014573906362056733, "step": 87895 }, { "epoch": 0.8272941176470588, "grad_norm": 0.45838931646372166, "learning_rate": 2.199402228671029e-06, "loss": 0.011836409568786621, "step": 87900 }, { "epoch": 0.8273411764705882, "grad_norm": 0.4207134472793841, "learning_rate": 2.199339676525928e-06, "loss": 0.018269890546798707, "step": 87905 }, { "epoch": 0.8273882352941176, "grad_norm": 0.49871754084455644, "learning_rate": 2.19927712971757e-06, "loss": 0.015455347299575806, "step": 87910 }, { "epoch": 0.827435294117647, "grad_norm": 0.5038958668158119, "learning_rate": 2.199214588245196e-06, "loss": 0.012805373966693878, "step": 87915 }, { "epoch": 0.8274823529411764, "grad_norm": 0.649384785601223, "learning_rate": 2.1991520521080486e-06, "loss": 0.016324362158775328, "step": 87920 }, { "epoch": 0.8275294117647058, "grad_norm": 0.6251393454042047, "learning_rate": 2.199089521305368e-06, "loss": 0.018780510127544402, "step": 87925 }, { "epoch": 0.8275764705882352, "grad_norm": 0.6249891796954967, "learning_rate": 2.199026995836396e-06, "loss": 0.013442914187908172, "step": 87930 }, { "epoch": 0.8276235294117648, "grad_norm": 0.5056467300792784, "learning_rate": 2.1989644757003756e-06, "loss": 0.015564367175102234, "step": 87935 }, { "epoch": 0.8276705882352942, "grad_norm": 0.3732641773074416, "learning_rate": 2.198901960896547e-06, "loss": 0.012644487619400024, "step": 87940 }, { "epoch": 0.8277176470588236, "grad_norm": 0.4627971777158937, "learning_rate": 2.198839451424153e-06, "loss": 0.012459472566843034, "step": 87945 }, { "epoch": 0.827764705882353, "grad_norm": 0.5714362819587775, "learning_rate": 2.1987769472824363e-06, "loss": 0.015064537525177002, "step": 87950 }, { "epoch": 0.8278117647058824, "grad_norm": 0.6182400502015881, "learning_rate": 2.198714448470639e-06, "loss": 0.010743553936481475, "step": 87955 }, { "epoch": 0.8278588235294118, "grad_norm": 0.5856369013837437, "learning_rate": 2.1986519549880036e-06, "loss": 0.01465909630060196, "step": 87960 }, { "epoch": 0.8279058823529412, "grad_norm": 0.3973999174840986, "learning_rate": 2.1985894668337728e-06, "loss": 0.013509419560432435, "step": 87965 }, { "epoch": 0.8279529411764706, "grad_norm": 0.45780275370188744, "learning_rate": 2.198526984007189e-06, "loss": 0.01164340078830719, "step": 87970 }, { "epoch": 0.828, "grad_norm": 0.4668446970803881, "learning_rate": 2.198464506507496e-06, "loss": 0.014933761954307557, "step": 87975 }, { "epoch": 0.8280470588235294, "grad_norm": 0.78713364109715, "learning_rate": 2.1984020343339363e-06, "loss": 0.013771672546863557, "step": 87980 }, { "epoch": 0.8280941176470589, "grad_norm": 0.4679142598859188, "learning_rate": 2.1983395674857537e-06, "loss": 0.012729920446872711, "step": 87985 }, { "epoch": 0.8281411764705883, "grad_norm": 0.7382203667420234, "learning_rate": 2.1982771059621914e-06, "loss": 0.01663389801979065, "step": 87990 }, { "epoch": 0.8281882352941177, "grad_norm": 0.4953500005426687, "learning_rate": 2.1982146497624928e-06, "loss": 0.01314660906791687, "step": 87995 }, { "epoch": 0.8282352941176471, "grad_norm": 0.6907242971051667, "learning_rate": 2.1981521988859024e-06, "loss": 0.019762787222862243, "step": 88000 }, { "epoch": 0.8282823529411765, "grad_norm": 0.5397875923776198, "learning_rate": 2.198089753331663e-06, "loss": 0.018912377953529357, "step": 88005 }, { "epoch": 0.8283294117647059, "grad_norm": 0.39120738053428294, "learning_rate": 2.1980273130990193e-06, "loss": 0.015465620160102844, "step": 88010 }, { "epoch": 0.8283764705882353, "grad_norm": 0.550548005386416, "learning_rate": 2.1979648781872157e-06, "loss": 0.014184099435806275, "step": 88015 }, { "epoch": 0.8284235294117647, "grad_norm": 0.5801649382962988, "learning_rate": 2.197902448595496e-06, "loss": 0.014839117228984833, "step": 88020 }, { "epoch": 0.8284705882352941, "grad_norm": 0.3320038903203505, "learning_rate": 2.197840024323105e-06, "loss": 0.01407465934753418, "step": 88025 }, { "epoch": 0.8285176470588236, "grad_norm": 0.569379692285389, "learning_rate": 2.1977776053692876e-06, "loss": 0.011822821944952011, "step": 88030 }, { "epoch": 0.828564705882353, "grad_norm": 0.8013659176837107, "learning_rate": 2.197715191733288e-06, "loss": 0.019885879755020142, "step": 88035 }, { "epoch": 0.8286117647058824, "grad_norm": 0.4230210523332615, "learning_rate": 2.1976527834143517e-06, "loss": 0.012828800082206725, "step": 88040 }, { "epoch": 0.8286588235294118, "grad_norm": 0.6696021524598385, "learning_rate": 2.1975903804117234e-06, "loss": 0.01618482917547226, "step": 88045 }, { "epoch": 0.8287058823529412, "grad_norm": 0.6292970505157532, "learning_rate": 2.197527982724649e-06, "loss": 0.012699373066425323, "step": 88050 }, { "epoch": 0.8287529411764706, "grad_norm": 0.407184543286833, "learning_rate": 2.1974655903523725e-06, "loss": 0.013364957273006439, "step": 88055 }, { "epoch": 0.8288, "grad_norm": 0.6003739634792183, "learning_rate": 2.1974032032941413e-06, "loss": 0.012686645984649659, "step": 88060 }, { "epoch": 0.8288470588235294, "grad_norm": 0.5473377479986794, "learning_rate": 2.1973408215492e-06, "loss": 0.01970299631357193, "step": 88065 }, { "epoch": 0.8288941176470588, "grad_norm": 0.6013703633540521, "learning_rate": 2.1972784451167946e-06, "loss": 0.016422417759895325, "step": 88070 }, { "epoch": 0.8289411764705882, "grad_norm": 0.4697584274595567, "learning_rate": 2.1972160739961705e-06, "loss": 0.011995795369148254, "step": 88075 }, { "epoch": 0.8289882352941177, "grad_norm": 0.44231588984230197, "learning_rate": 2.1971537081865752e-06, "loss": 0.015980789065361024, "step": 88080 }, { "epoch": 0.8290352941176471, "grad_norm": 0.5862952661783573, "learning_rate": 2.1970913476872543e-06, "loss": 0.012935687601566315, "step": 88085 }, { "epoch": 0.8290823529411765, "grad_norm": 0.45778464525806417, "learning_rate": 2.197028992497454e-06, "loss": 0.01883220374584198, "step": 88090 }, { "epoch": 0.8291294117647059, "grad_norm": 0.5639388532702139, "learning_rate": 2.1969666426164212e-06, "loss": 0.012467403709888459, "step": 88095 }, { "epoch": 0.8291764705882353, "grad_norm": 0.45392618493931763, "learning_rate": 2.1969042980434026e-06, "loss": 0.016595745086669923, "step": 88100 }, { "epoch": 0.8292235294117647, "grad_norm": 0.45747537563475105, "learning_rate": 2.1968419587776455e-06, "loss": 0.013633203506469727, "step": 88105 }, { "epoch": 0.8292705882352941, "grad_norm": 0.46223149751383497, "learning_rate": 2.1967796248183957e-06, "loss": 0.015835648775100707, "step": 88110 }, { "epoch": 0.8293176470588235, "grad_norm": 0.28906289016222564, "learning_rate": 2.1967172961649018e-06, "loss": 0.011894651502370835, "step": 88115 }, { "epoch": 0.8293647058823529, "grad_norm": 0.2849588525376278, "learning_rate": 2.196654972816411e-06, "loss": 0.010436759889125824, "step": 88120 }, { "epoch": 0.8294117647058824, "grad_norm": 0.42994061494911423, "learning_rate": 2.1965926547721693e-06, "loss": 0.017664624750614165, "step": 88125 }, { "epoch": 0.8294588235294118, "grad_norm": 0.6730919242741779, "learning_rate": 2.196530342031426e-06, "loss": 0.013007572293281556, "step": 88130 }, { "epoch": 0.8295058823529412, "grad_norm": 0.5157853965351109, "learning_rate": 2.1964680345934287e-06, "loss": 0.012700921297073365, "step": 88135 }, { "epoch": 0.8295529411764706, "grad_norm": 0.5225960313471097, "learning_rate": 2.1964057324574248e-06, "loss": 0.015697166323661804, "step": 88140 }, { "epoch": 0.8296, "grad_norm": 0.3975397953669538, "learning_rate": 2.196343435622662e-06, "loss": 0.01606135070323944, "step": 88145 }, { "epoch": 0.8296470588235294, "grad_norm": 0.43851718936882195, "learning_rate": 2.19628114408839e-06, "loss": 0.013998481631278991, "step": 88150 }, { "epoch": 0.8296941176470588, "grad_norm": 0.6791218876335637, "learning_rate": 2.1962188578538557e-06, "loss": 0.0149770125746727, "step": 88155 }, { "epoch": 0.8297411764705882, "grad_norm": 0.485477714616494, "learning_rate": 2.196156576918309e-06, "loss": 0.013296887278556824, "step": 88160 }, { "epoch": 0.8297882352941176, "grad_norm": 0.49508105531660135, "learning_rate": 2.196094301280997e-06, "loss": 0.013752645254135132, "step": 88165 }, { "epoch": 0.829835294117647, "grad_norm": 0.2814143012423628, "learning_rate": 2.1960320309411694e-06, "loss": 0.01365169882774353, "step": 88170 }, { "epoch": 0.8298823529411765, "grad_norm": 0.5184234395293328, "learning_rate": 2.1959697658980757e-06, "loss": 0.015554052591323853, "step": 88175 }, { "epoch": 0.8299294117647059, "grad_norm": 0.24554154640822792, "learning_rate": 2.1959075061509645e-06, "loss": 0.010408009588718414, "step": 88180 }, { "epoch": 0.8299764705882353, "grad_norm": 0.4927498796366517, "learning_rate": 2.195845251699085e-06, "loss": 0.01229168176651001, "step": 88185 }, { "epoch": 0.8300235294117647, "grad_norm": 0.2910740222045584, "learning_rate": 2.1957830025416866e-06, "loss": 0.009235914796590805, "step": 88190 }, { "epoch": 0.8300705882352941, "grad_norm": 0.4687456337951132, "learning_rate": 2.195720758678019e-06, "loss": 0.013721910119056702, "step": 88195 }, { "epoch": 0.8301176470588235, "grad_norm": 0.6021722413263678, "learning_rate": 2.1956585201073325e-06, "loss": 0.017059949040412904, "step": 88200 }, { "epoch": 0.8301647058823529, "grad_norm": 0.6727412260775505, "learning_rate": 2.1955962868288756e-06, "loss": 0.013905361294746399, "step": 88205 }, { "epoch": 0.8302117647058823, "grad_norm": 0.38363004698679404, "learning_rate": 2.1955340588418996e-06, "loss": 0.014703929424285889, "step": 88210 }, { "epoch": 0.8302588235294117, "grad_norm": 0.49261604370083917, "learning_rate": 2.195471836145655e-06, "loss": 0.01412605047225952, "step": 88215 }, { "epoch": 0.8303058823529412, "grad_norm": 0.6884237453531266, "learning_rate": 2.1954096187393904e-06, "loss": 0.013425935804843903, "step": 88220 }, { "epoch": 0.8303529411764706, "grad_norm": 0.5962563161616202, "learning_rate": 2.195347406622358e-06, "loss": 0.01500326544046402, "step": 88225 }, { "epoch": 0.8304, "grad_norm": 0.3633523923673881, "learning_rate": 2.1952851997938073e-06, "loss": 0.011735764145851136, "step": 88230 }, { "epoch": 0.8304470588235294, "grad_norm": 0.4034933023405388, "learning_rate": 2.1952229982529892e-06, "loss": 0.01664859354496002, "step": 88235 }, { "epoch": 0.8304941176470588, "grad_norm": 0.5822637815214314, "learning_rate": 2.1951608019991556e-06, "loss": 0.018680912256240845, "step": 88240 }, { "epoch": 0.8305411764705882, "grad_norm": 0.5358130027989633, "learning_rate": 2.1950986110315563e-06, "loss": 0.014951807260513306, "step": 88245 }, { "epoch": 0.8305882352941176, "grad_norm": 0.6531372619637025, "learning_rate": 2.195036425349444e-06, "loss": 0.0164490669965744, "step": 88250 }, { "epoch": 0.830635294117647, "grad_norm": 0.27049369619608304, "learning_rate": 2.1949742449520683e-06, "loss": 0.009500342607498168, "step": 88255 }, { "epoch": 0.8306823529411764, "grad_norm": 0.3941155953459702, "learning_rate": 2.1949120698386816e-06, "loss": 0.012157627195119858, "step": 88260 }, { "epoch": 0.8307294117647058, "grad_norm": 0.43421165129130296, "learning_rate": 2.194849900008536e-06, "loss": 0.012359996140003205, "step": 88265 }, { "epoch": 0.8307764705882353, "grad_norm": 0.36790948410643587, "learning_rate": 2.1947877354608827e-06, "loss": 0.015892627835273742, "step": 88270 }, { "epoch": 0.8308235294117647, "grad_norm": 0.35679232787868526, "learning_rate": 2.194725576194974e-06, "loss": 0.01077043116092682, "step": 88275 }, { "epoch": 0.8308705882352941, "grad_norm": 0.6613230870458723, "learning_rate": 2.1946634222100616e-06, "loss": 0.0172633558511734, "step": 88280 }, { "epoch": 0.8309176470588235, "grad_norm": 0.704968726552781, "learning_rate": 2.1946012735053983e-06, "loss": 0.01752474009990692, "step": 88285 }, { "epoch": 0.8309647058823529, "grad_norm": 0.8892604643727036, "learning_rate": 2.194539130080236e-06, "loss": 0.01539604514837265, "step": 88290 }, { "epoch": 0.8310117647058823, "grad_norm": 0.6033809598137375, "learning_rate": 2.1944769919338276e-06, "loss": 0.016404010355472565, "step": 88295 }, { "epoch": 0.8310588235294117, "grad_norm": 0.4435985351316915, "learning_rate": 2.1944148590654256e-06, "loss": 0.01622467190027237, "step": 88300 }, { "epoch": 0.8311058823529411, "grad_norm": 0.6390505447320373, "learning_rate": 2.1943527314742832e-06, "loss": 0.013916525244712829, "step": 88305 }, { "epoch": 0.8311529411764705, "grad_norm": 0.6404756957265038, "learning_rate": 2.194290609159653e-06, "loss": 0.015111155807971954, "step": 88310 }, { "epoch": 0.8312, "grad_norm": 0.7258105477764488, "learning_rate": 2.1942284921207883e-06, "loss": 0.012888036668300629, "step": 88315 }, { "epoch": 0.8312470588235294, "grad_norm": 0.5436141888341832, "learning_rate": 2.194166380356942e-06, "loss": 0.015541580319404603, "step": 88320 }, { "epoch": 0.8312941176470589, "grad_norm": 0.44036397409955436, "learning_rate": 2.1941042738673685e-06, "loss": 0.015107482671737671, "step": 88325 }, { "epoch": 0.8313411764705883, "grad_norm": 0.5546940026949481, "learning_rate": 2.1940421726513207e-06, "loss": 0.01543400138616562, "step": 88330 }, { "epoch": 0.8313882352941177, "grad_norm": 0.3608087353005553, "learning_rate": 2.193980076708053e-06, "loss": 0.014868031442165374, "step": 88335 }, { "epoch": 0.831435294117647, "grad_norm": 0.5553178165158484, "learning_rate": 2.193917986036818e-06, "loss": 0.014933383464813233, "step": 88340 }, { "epoch": 0.8314823529411765, "grad_norm": 0.5667782865943065, "learning_rate": 2.1938559006368705e-06, "loss": 0.012407466024160384, "step": 88345 }, { "epoch": 0.8315294117647059, "grad_norm": 0.28332909489359354, "learning_rate": 2.193793820507465e-06, "loss": 0.014536678791046143, "step": 88350 }, { "epoch": 0.8315764705882353, "grad_norm": 0.5606859912222958, "learning_rate": 2.1937317456478554e-06, "loss": 0.01386638879776001, "step": 88355 }, { "epoch": 0.8316235294117647, "grad_norm": 0.40085321529961854, "learning_rate": 2.1936696760572967e-06, "loss": 0.0122124083340168, "step": 88360 }, { "epoch": 0.8316705882352942, "grad_norm": 0.404716885226735, "learning_rate": 2.1936076117350427e-06, "loss": 0.011600950360298156, "step": 88365 }, { "epoch": 0.8317176470588236, "grad_norm": 0.5783912507321786, "learning_rate": 2.1935455526803486e-06, "loss": 0.014647063612937928, "step": 88370 }, { "epoch": 0.831764705882353, "grad_norm": 0.3642295414375382, "learning_rate": 2.193483498892469e-06, "loss": 0.01848873347043991, "step": 88375 }, { "epoch": 0.8318117647058824, "grad_norm": 0.5328892278612799, "learning_rate": 2.19342145037066e-06, "loss": 0.01571687459945679, "step": 88380 }, { "epoch": 0.8318588235294118, "grad_norm": 0.6166742643976151, "learning_rate": 2.193359407114176e-06, "loss": 0.011450005322694778, "step": 88385 }, { "epoch": 0.8319058823529412, "grad_norm": 0.47360773423135843, "learning_rate": 2.1932973691222725e-06, "loss": 0.012440329790115357, "step": 88390 }, { "epoch": 0.8319529411764706, "grad_norm": 0.3713015344674276, "learning_rate": 2.1932353363942048e-06, "loss": 0.016000892221927642, "step": 88395 }, { "epoch": 0.832, "grad_norm": 0.3697940528632142, "learning_rate": 2.1931733089292288e-06, "loss": 0.012371072173118591, "step": 88400 }, { "epoch": 0.8320470588235294, "grad_norm": 0.46952929312367736, "learning_rate": 2.1931112867266004e-06, "loss": 0.013641902804374694, "step": 88405 }, { "epoch": 0.8320941176470589, "grad_norm": 0.5978004718808361, "learning_rate": 2.193049269785575e-06, "loss": 0.01929665058851242, "step": 88410 }, { "epoch": 0.8321411764705883, "grad_norm": 0.43023475241782416, "learning_rate": 2.192987258105409e-06, "loss": 0.011190960556268692, "step": 88415 }, { "epoch": 0.8321882352941177, "grad_norm": 0.3469100349897088, "learning_rate": 2.1929252516853594e-06, "loss": 0.01298629641532898, "step": 88420 }, { "epoch": 0.8322352941176471, "grad_norm": 0.38572269663282094, "learning_rate": 2.192863250524682e-06, "loss": 0.01036311537027359, "step": 88425 }, { "epoch": 0.8322823529411765, "grad_norm": 0.3418685743580026, "learning_rate": 2.192801254622633e-06, "loss": 0.01355847269296646, "step": 88430 }, { "epoch": 0.8323294117647059, "grad_norm": 0.5191707814934794, "learning_rate": 2.1927392639784696e-06, "loss": 0.016840019822120668, "step": 88435 }, { "epoch": 0.8323764705882353, "grad_norm": 0.47627497633928595, "learning_rate": 2.1926772785914485e-06, "loss": 0.018320631980895997, "step": 88440 }, { "epoch": 0.8324235294117647, "grad_norm": 0.35206303096192537, "learning_rate": 2.1926152984608264e-06, "loss": 0.012012669444084167, "step": 88445 }, { "epoch": 0.8324705882352941, "grad_norm": 0.42742558139155706, "learning_rate": 2.1925533235858603e-06, "loss": 0.014784350991249084, "step": 88450 }, { "epoch": 0.8325176470588236, "grad_norm": 0.3773258608456155, "learning_rate": 2.1924913539658086e-06, "loss": 0.01595536172389984, "step": 88455 }, { "epoch": 0.832564705882353, "grad_norm": 0.5887873685887589, "learning_rate": 2.1924293895999278e-06, "loss": 0.012278951704502106, "step": 88460 }, { "epoch": 0.8326117647058824, "grad_norm": 0.3988701484317018, "learning_rate": 2.1923674304874753e-06, "loss": 0.012907946109771728, "step": 88465 }, { "epoch": 0.8326588235294118, "grad_norm": 0.4172599414071548, "learning_rate": 2.1923054766277087e-06, "loss": 0.019353292882442474, "step": 88470 }, { "epoch": 0.8327058823529412, "grad_norm": 0.49918882967192063, "learning_rate": 2.1922435280198874e-06, "loss": 0.013199335336685181, "step": 88475 }, { "epoch": 0.8327529411764706, "grad_norm": 0.5004559547699277, "learning_rate": 2.192181584663268e-06, "loss": 0.013405232131481171, "step": 88480 }, { "epoch": 0.8328, "grad_norm": 0.3560903978952732, "learning_rate": 2.192119646557109e-06, "loss": 0.013452206552028657, "step": 88485 }, { "epoch": 0.8328470588235294, "grad_norm": 0.30628661996743345, "learning_rate": 2.192057713700668e-06, "loss": 0.013495039939880372, "step": 88490 }, { "epoch": 0.8328941176470588, "grad_norm": 0.3288459962673973, "learning_rate": 2.191995786093205e-06, "loss": 0.01390230804681778, "step": 88495 }, { "epoch": 0.8329411764705882, "grad_norm": 0.4889094048112521, "learning_rate": 2.191933863733977e-06, "loss": 0.01721339821815491, "step": 88500 }, { "epoch": 0.8329882352941177, "grad_norm": 0.3715975354064463, "learning_rate": 2.1918719466222446e-06, "loss": 0.01227157786488533, "step": 88505 }, { "epoch": 0.8330352941176471, "grad_norm": 0.48504434405235014, "learning_rate": 2.191810034757265e-06, "loss": 0.014023953676223755, "step": 88510 }, { "epoch": 0.8330823529411765, "grad_norm": 0.7759222742401019, "learning_rate": 2.191748128138298e-06, "loss": 0.013727603852748871, "step": 88515 }, { "epoch": 0.8331294117647059, "grad_norm": 0.5763055874568519, "learning_rate": 2.191686226764602e-06, "loss": 0.03682369887828827, "step": 88520 }, { "epoch": 0.8331764705882353, "grad_norm": 0.6184828880639984, "learning_rate": 2.1916243306354376e-06, "loss": 0.012695801258087159, "step": 88525 }, { "epoch": 0.8332235294117647, "grad_norm": 0.592303217996669, "learning_rate": 2.1915624397500635e-06, "loss": 0.016490566730499267, "step": 88530 }, { "epoch": 0.8332705882352941, "grad_norm": 0.7408188559802008, "learning_rate": 2.1915005541077394e-06, "loss": 0.014078649878501891, "step": 88535 }, { "epoch": 0.8333176470588235, "grad_norm": 0.3168083310466982, "learning_rate": 2.191438673707725e-06, "loss": 0.017645978927612306, "step": 88540 }, { "epoch": 0.8333647058823529, "grad_norm": 0.43459816318012845, "learning_rate": 2.1913767985492808e-06, "loss": 0.009826773405075073, "step": 88545 }, { "epoch": 0.8334117647058824, "grad_norm": 0.6550810860078924, "learning_rate": 2.191314928631666e-06, "loss": 0.015534274280071259, "step": 88550 }, { "epoch": 0.8334588235294118, "grad_norm": 0.36069768462017743, "learning_rate": 2.1912530639541413e-06, "loss": 0.013739649951457978, "step": 88555 }, { "epoch": 0.8335058823529412, "grad_norm": 0.39649784060997756, "learning_rate": 2.191191204515967e-06, "loss": 0.015654616057872772, "step": 88560 }, { "epoch": 0.8335529411764706, "grad_norm": 0.32905855913138987, "learning_rate": 2.1911293503164037e-06, "loss": 0.015486350655555725, "step": 88565 }, { "epoch": 0.8336, "grad_norm": 0.26147885858129616, "learning_rate": 2.191067501354712e-06, "loss": 0.012399132549762725, "step": 88570 }, { "epoch": 0.8336470588235294, "grad_norm": 0.6670034475437598, "learning_rate": 2.1910056576301526e-06, "loss": 0.011975958198308944, "step": 88575 }, { "epoch": 0.8336941176470588, "grad_norm": 0.6441922375247972, "learning_rate": 2.190943819141986e-06, "loss": 0.015417635440826416, "step": 88580 }, { "epoch": 0.8337411764705882, "grad_norm": 0.3534288673009623, "learning_rate": 2.190881985889474e-06, "loss": 0.01750909835100174, "step": 88585 }, { "epoch": 0.8337882352941176, "grad_norm": 0.6777429216780276, "learning_rate": 2.190820157871878e-06, "loss": 0.017326617240905763, "step": 88590 }, { "epoch": 0.833835294117647, "grad_norm": 0.2416375356811414, "learning_rate": 2.190758335088459e-06, "loss": 0.01439661979675293, "step": 88595 }, { "epoch": 0.8338823529411765, "grad_norm": 0.6892835472053105, "learning_rate": 2.1906965175384775e-06, "loss": 0.01894657015800476, "step": 88600 }, { "epoch": 0.8339294117647059, "grad_norm": 0.36314533080095385, "learning_rate": 2.190634705221197e-06, "loss": 0.01324675977230072, "step": 88605 }, { "epoch": 0.8339764705882353, "grad_norm": 0.33052277242244527, "learning_rate": 2.190572898135879e-06, "loss": 0.011482094973325729, "step": 88610 }, { "epoch": 0.8340235294117647, "grad_norm": 0.7854754802910514, "learning_rate": 2.1905110962817838e-06, "loss": 0.016766199469566347, "step": 88615 }, { "epoch": 0.8340705882352941, "grad_norm": 0.6050798048214201, "learning_rate": 2.190449299658175e-06, "loss": 0.01443183422088623, "step": 88620 }, { "epoch": 0.8341176470588235, "grad_norm": 0.4544621767954769, "learning_rate": 2.190387508264315e-06, "loss": 0.015853163599967957, "step": 88625 }, { "epoch": 0.8341647058823529, "grad_norm": 0.2943776406037097, "learning_rate": 2.1903257220994657e-06, "loss": 0.011721041053533554, "step": 88630 }, { "epoch": 0.8342117647058823, "grad_norm": 0.2546750193223964, "learning_rate": 2.190263941162889e-06, "loss": 0.014699557423591613, "step": 88635 }, { "epoch": 0.8342588235294117, "grad_norm": 0.4428692366511764, "learning_rate": 2.1902021654538486e-06, "loss": 0.017032116651535034, "step": 88640 }, { "epoch": 0.8343058823529412, "grad_norm": 0.45945428068177196, "learning_rate": 2.190140394971607e-06, "loss": 0.018010845780372618, "step": 88645 }, { "epoch": 0.8343529411764706, "grad_norm": 0.5735684259350894, "learning_rate": 2.190078629715427e-06, "loss": 0.015002653002738953, "step": 88650 }, { "epoch": 0.8344, "grad_norm": 0.4556534070642952, "learning_rate": 2.1900168696845723e-06, "loss": 0.011994987726211548, "step": 88655 }, { "epoch": 0.8344470588235294, "grad_norm": 0.5532923619444625, "learning_rate": 2.189955114878305e-06, "loss": 0.01384533941745758, "step": 88660 }, { "epoch": 0.8344941176470588, "grad_norm": 0.5558556409835689, "learning_rate": 2.18989336529589e-06, "loss": 0.012492047250270843, "step": 88665 }, { "epoch": 0.8345411764705882, "grad_norm": 0.7814878727457928, "learning_rate": 2.1898316209365903e-06, "loss": 0.017457945644855498, "step": 88670 }, { "epoch": 0.8345882352941176, "grad_norm": 0.2846437771843665, "learning_rate": 2.1897698817996687e-06, "loss": 0.010475555062294006, "step": 88675 }, { "epoch": 0.834635294117647, "grad_norm": 0.3510183280817378, "learning_rate": 2.1897081478843905e-06, "loss": 0.01248514950275421, "step": 88680 }, { "epoch": 0.8346823529411764, "grad_norm": 0.619230306106392, "learning_rate": 2.1896464191900184e-06, "loss": 0.014168591797351837, "step": 88685 }, { "epoch": 0.8347294117647058, "grad_norm": 0.45095595840880653, "learning_rate": 2.189584695715818e-06, "loss": 0.013947685062885285, "step": 88690 }, { "epoch": 0.8347764705882353, "grad_norm": 0.4798487736574478, "learning_rate": 2.1895229774610517e-06, "loss": 0.014846703410148621, "step": 88695 }, { "epoch": 0.8348235294117647, "grad_norm": 0.4473258447582786, "learning_rate": 2.1894612644249857e-06, "loss": 0.010846404731273651, "step": 88700 }, { "epoch": 0.8348705882352941, "grad_norm": 0.39430688930612484, "learning_rate": 2.189399556606884e-06, "loss": 0.010177402198314667, "step": 88705 }, { "epoch": 0.8349176470588235, "grad_norm": 0.44729085636476007, "learning_rate": 2.189337854006011e-06, "loss": 0.012537358701229096, "step": 88710 }, { "epoch": 0.8349647058823529, "grad_norm": 0.41215790650585943, "learning_rate": 2.189276156621631e-06, "loss": 0.014887705445289612, "step": 88715 }, { "epoch": 0.8350117647058823, "grad_norm": 0.5154430279076119, "learning_rate": 2.1892144644530107e-06, "loss": 0.011754277348518371, "step": 88720 }, { "epoch": 0.8350588235294117, "grad_norm": 0.6122307128695708, "learning_rate": 2.189152777499414e-06, "loss": 0.014814735949039459, "step": 88725 }, { "epoch": 0.8351058823529411, "grad_norm": 0.42104653932263647, "learning_rate": 2.1890910957601065e-06, "loss": 0.015073731541633606, "step": 88730 }, { "epoch": 0.8351529411764705, "grad_norm": 0.359938891549114, "learning_rate": 2.189029419234354e-06, "loss": 0.0110364630818367, "step": 88735 }, { "epoch": 0.8352, "grad_norm": 0.5375733984488943, "learning_rate": 2.1889677479214216e-06, "loss": 0.017675071954727173, "step": 88740 }, { "epoch": 0.8352470588235295, "grad_norm": 0.5874075504956923, "learning_rate": 2.188906081820575e-06, "loss": 0.011100499331951142, "step": 88745 }, { "epoch": 0.8352941176470589, "grad_norm": 0.4672851828709026, "learning_rate": 2.18884442093108e-06, "loss": 0.016488555073738097, "step": 88750 }, { "epoch": 0.8353411764705883, "grad_norm": 0.6025319840146603, "learning_rate": 2.188782765252204e-06, "loss": 0.017952854931354522, "step": 88755 }, { "epoch": 0.8353882352941177, "grad_norm": 0.518718403822935, "learning_rate": 2.1887211147832116e-06, "loss": 0.019984132051467894, "step": 88760 }, { "epoch": 0.835435294117647, "grad_norm": 0.6100032962822871, "learning_rate": 2.1886594695233695e-06, "loss": 0.01595284640789032, "step": 88765 }, { "epoch": 0.8354823529411765, "grad_norm": 0.48419097164513736, "learning_rate": 2.188597829471944e-06, "loss": 0.014387191832065582, "step": 88770 }, { "epoch": 0.8355294117647059, "grad_norm": 0.5471198976225216, "learning_rate": 2.1885361946282025e-06, "loss": 0.013696667551994324, "step": 88775 }, { "epoch": 0.8355764705882353, "grad_norm": 0.4940305583724101, "learning_rate": 2.1884745649914113e-06, "loss": 0.0153617262840271, "step": 88780 }, { "epoch": 0.8356235294117647, "grad_norm": 0.4798514206384666, "learning_rate": 2.188412940560837e-06, "loss": 0.013905997574329376, "step": 88785 }, { "epoch": 0.8356705882352942, "grad_norm": 0.5769442368704019, "learning_rate": 2.188351321335747e-06, "loss": 0.011508341878652573, "step": 88790 }, { "epoch": 0.8357176470588236, "grad_norm": 0.6608374442174747, "learning_rate": 2.1882897073154083e-06, "loss": 0.016587591171264647, "step": 88795 }, { "epoch": 0.835764705882353, "grad_norm": 0.4526107704078958, "learning_rate": 2.1882280984990885e-06, "loss": 0.01421177089214325, "step": 88800 }, { "epoch": 0.8358117647058824, "grad_norm": 0.32105122360798277, "learning_rate": 2.1881664948860544e-06, "loss": 0.01242273449897766, "step": 88805 }, { "epoch": 0.8358588235294118, "grad_norm": 0.34288851934483056, "learning_rate": 2.1881048964755745e-06, "loss": 0.017553770542144777, "step": 88810 }, { "epoch": 0.8359058823529412, "grad_norm": 0.3254610003681106, "learning_rate": 2.1880433032669163e-06, "loss": 0.01535942554473877, "step": 88815 }, { "epoch": 0.8359529411764706, "grad_norm": 0.3870008468872252, "learning_rate": 2.187981715259348e-06, "loss": 0.013371358811855315, "step": 88820 }, { "epoch": 0.836, "grad_norm": 0.4010051994792907, "learning_rate": 2.187920132452136e-06, "loss": 0.014019212126731873, "step": 88825 }, { "epoch": 0.8360470588235294, "grad_norm": 0.36846527527809003, "learning_rate": 2.1878585548445503e-06, "loss": 0.01364576667547226, "step": 88830 }, { "epoch": 0.8360941176470589, "grad_norm": 0.6473472658311249, "learning_rate": 2.1877969824358593e-06, "loss": 0.01126391664147377, "step": 88835 }, { "epoch": 0.8361411764705883, "grad_norm": 0.6849452740899035, "learning_rate": 2.18773541522533e-06, "loss": 0.014343050122261048, "step": 88840 }, { "epoch": 0.8361882352941177, "grad_norm": 0.4862908496982875, "learning_rate": 2.1876738532122323e-06, "loss": 0.019845005869865418, "step": 88845 }, { "epoch": 0.8362352941176471, "grad_norm": 0.48950470871711804, "learning_rate": 2.1876122963958343e-06, "loss": 0.011807774007320405, "step": 88850 }, { "epoch": 0.8362823529411765, "grad_norm": 0.39998437277846594, "learning_rate": 2.1875507447754054e-06, "loss": 0.01163235604763031, "step": 88855 }, { "epoch": 0.8363294117647059, "grad_norm": 0.48594544431164116, "learning_rate": 2.1874891983502145e-06, "loss": 0.012176550924777985, "step": 88860 }, { "epoch": 0.8363764705882353, "grad_norm": 0.349333512588898, "learning_rate": 2.1874276571195306e-06, "loss": 0.012728707492351532, "step": 88865 }, { "epoch": 0.8364235294117647, "grad_norm": 0.40957929926397696, "learning_rate": 2.1873661210826232e-06, "loss": 0.012041985988616943, "step": 88870 }, { "epoch": 0.8364705882352941, "grad_norm": 0.3702972618089568, "learning_rate": 2.1873045902387614e-06, "loss": 0.013374993205070495, "step": 88875 }, { "epoch": 0.8365176470588235, "grad_norm": 0.4362104157374704, "learning_rate": 2.1872430645872157e-06, "loss": 0.012864705920219422, "step": 88880 }, { "epoch": 0.836564705882353, "grad_norm": 0.45517837555581003, "learning_rate": 2.187181544127255e-06, "loss": 0.013558897376060485, "step": 88885 }, { "epoch": 0.8366117647058824, "grad_norm": 0.5116616726194037, "learning_rate": 2.18712002885815e-06, "loss": 0.01204034760594368, "step": 88890 }, { "epoch": 0.8366588235294118, "grad_norm": 0.3000460062526304, "learning_rate": 2.1870585187791702e-06, "loss": 0.013679291307926177, "step": 88895 }, { "epoch": 0.8367058823529412, "grad_norm": 0.6129293349489057, "learning_rate": 2.1869970138895862e-06, "loss": 0.012062439322471618, "step": 88900 }, { "epoch": 0.8367529411764706, "grad_norm": 0.5956926971437159, "learning_rate": 2.186935514188668e-06, "loss": 0.01716883182525635, "step": 88905 }, { "epoch": 0.8368, "grad_norm": 0.3857768906727442, "learning_rate": 2.1868740196756863e-06, "loss": 0.014181679487228394, "step": 88910 }, { "epoch": 0.8368470588235294, "grad_norm": 0.48000315613374434, "learning_rate": 2.1868125303499117e-06, "loss": 0.0123434379696846, "step": 88915 }, { "epoch": 0.8368941176470588, "grad_norm": 0.45322671965338635, "learning_rate": 2.186751046210615e-06, "loss": 0.018648314476013183, "step": 88920 }, { "epoch": 0.8369411764705882, "grad_norm": 0.6990509868687864, "learning_rate": 2.186689567257067e-06, "loss": 0.017505644261837004, "step": 88925 }, { "epoch": 0.8369882352941177, "grad_norm": 0.3608398304867028, "learning_rate": 2.186628093488539e-06, "loss": 0.019611507654190063, "step": 88930 }, { "epoch": 0.8370352941176471, "grad_norm": 0.9125539410445594, "learning_rate": 2.1865666249043017e-06, "loss": 0.016324976086616518, "step": 88935 }, { "epoch": 0.8370823529411765, "grad_norm": 0.4562940769509992, "learning_rate": 2.1865051615036273e-06, "loss": 0.012180139124393464, "step": 88940 }, { "epoch": 0.8371294117647059, "grad_norm": 0.375518714221326, "learning_rate": 2.1864437032857866e-06, "loss": 0.015811435878276825, "step": 88945 }, { "epoch": 0.8371764705882353, "grad_norm": 0.4133489720012992, "learning_rate": 2.1863822502500515e-06, "loss": 0.014316311478614807, "step": 88950 }, { "epoch": 0.8372235294117647, "grad_norm": 0.44593996835443017, "learning_rate": 2.186320802395694e-06, "loss": 0.012942159175872802, "step": 88955 }, { "epoch": 0.8372705882352941, "grad_norm": 0.70422550250506, "learning_rate": 2.1862593597219857e-06, "loss": 0.014242753386497498, "step": 88960 }, { "epoch": 0.8373176470588235, "grad_norm": 1.0472912597829465, "learning_rate": 2.186197922228199e-06, "loss": 0.018721991777420045, "step": 88965 }, { "epoch": 0.8373647058823529, "grad_norm": 0.8003672791398908, "learning_rate": 2.1861364899136057e-06, "loss": 0.02241295129060745, "step": 88970 }, { "epoch": 0.8374117647058823, "grad_norm": 0.48574780772741766, "learning_rate": 2.1860750627774785e-06, "loss": 0.01813262403011322, "step": 88975 }, { "epoch": 0.8374588235294118, "grad_norm": 0.7153256834507368, "learning_rate": 2.18601364081909e-06, "loss": 0.015818606317043304, "step": 88980 }, { "epoch": 0.8375058823529412, "grad_norm": 0.38225417829758035, "learning_rate": 2.1859522240377123e-06, "loss": 0.012620493769645691, "step": 88985 }, { "epoch": 0.8375529411764706, "grad_norm": 0.2832721406587616, "learning_rate": 2.1858908124326185e-06, "loss": 0.011907020211219787, "step": 88990 }, { "epoch": 0.8376, "grad_norm": 0.34778751677429814, "learning_rate": 2.185829406003082e-06, "loss": 0.012633615732192993, "step": 88995 }, { "epoch": 0.8376470588235294, "grad_norm": 0.4584496302033714, "learning_rate": 2.185768004748375e-06, "loss": 0.011349594593048096, "step": 89000 }, { "epoch": 0.8376941176470588, "grad_norm": 0.4078575927623239, "learning_rate": 2.1857066086677715e-06, "loss": 0.013987942039966584, "step": 89005 }, { "epoch": 0.8377411764705882, "grad_norm": 0.6071506711711039, "learning_rate": 2.1856452177605444e-06, "loss": 0.020662064850330352, "step": 89010 }, { "epoch": 0.8377882352941176, "grad_norm": 0.6114673787347022, "learning_rate": 2.185583832025967e-06, "loss": 0.029705148935317994, "step": 89015 }, { "epoch": 0.837835294117647, "grad_norm": 0.42133032325175035, "learning_rate": 2.1855224514633138e-06, "loss": 0.013328555226325988, "step": 89020 }, { "epoch": 0.8378823529411765, "grad_norm": 0.653573397157202, "learning_rate": 2.185461076071858e-06, "loss": 0.013826312124729156, "step": 89025 }, { "epoch": 0.8379294117647059, "grad_norm": 0.26519182518240486, "learning_rate": 2.185399705850873e-06, "loss": 0.014727430045604705, "step": 89030 }, { "epoch": 0.8379764705882353, "grad_norm": 0.37089580228659474, "learning_rate": 2.185338340799634e-06, "loss": 0.017176544666290282, "step": 89035 }, { "epoch": 0.8380235294117647, "grad_norm": 0.3638141616883185, "learning_rate": 2.185276980917415e-06, "loss": 0.013829545676708221, "step": 89040 }, { "epoch": 0.8380705882352941, "grad_norm": 0.4799246358171145, "learning_rate": 2.1852156262034895e-06, "loss": 0.01242126151919365, "step": 89045 }, { "epoch": 0.8381176470588235, "grad_norm": 0.43279095070229223, "learning_rate": 2.185154276657133e-06, "loss": 0.014536775648593903, "step": 89050 }, { "epoch": 0.8381647058823529, "grad_norm": 0.5344162545052028, "learning_rate": 2.185092932277619e-06, "loss": 0.012763534486293793, "step": 89055 }, { "epoch": 0.8382117647058823, "grad_norm": 0.5241561789380498, "learning_rate": 2.185031593064224e-06, "loss": 0.014940810203552247, "step": 89060 }, { "epoch": 0.8382588235294117, "grad_norm": 0.4734140955803572, "learning_rate": 2.1849702590162216e-06, "loss": 0.012645229697227478, "step": 89065 }, { "epoch": 0.8383058823529411, "grad_norm": 0.40220212206475514, "learning_rate": 2.184908930132887e-06, "loss": 0.012843531370162965, "step": 89070 }, { "epoch": 0.8383529411764706, "grad_norm": 0.7696947135137859, "learning_rate": 2.1848476064134956e-06, "loss": 0.016061528027057646, "step": 89075 }, { "epoch": 0.8384, "grad_norm": 0.45080798159151536, "learning_rate": 2.1847862878573233e-06, "loss": 0.012666159868240356, "step": 89080 }, { "epoch": 0.8384470588235294, "grad_norm": 0.49847441477134186, "learning_rate": 2.1847249744636444e-06, "loss": 0.011017745733261109, "step": 89085 }, { "epoch": 0.8384941176470588, "grad_norm": 0.5700603248107798, "learning_rate": 2.1846636662317357e-06, "loss": 0.015997278690338134, "step": 89090 }, { "epoch": 0.8385411764705882, "grad_norm": 0.5504925596251343, "learning_rate": 2.1846023631608723e-06, "loss": 0.013909836113452912, "step": 89095 }, { "epoch": 0.8385882352941176, "grad_norm": 0.32004747634475433, "learning_rate": 2.1845410652503306e-06, "loss": 0.012236012518405915, "step": 89100 }, { "epoch": 0.838635294117647, "grad_norm": 0.5792133268124733, "learning_rate": 2.1844797724993864e-06, "loss": 0.014297473430633544, "step": 89105 }, { "epoch": 0.8386823529411764, "grad_norm": 0.40931750573701214, "learning_rate": 2.1844184849073158e-06, "loss": 0.01392526924610138, "step": 89110 }, { "epoch": 0.8387294117647058, "grad_norm": 0.37110686263464665, "learning_rate": 2.184357202473395e-06, "loss": 0.012340474128723144, "step": 89115 }, { "epoch": 0.8387764705882353, "grad_norm": 0.37226008930062093, "learning_rate": 2.1842959251969016e-06, "loss": 0.014852496981620788, "step": 89120 }, { "epoch": 0.8388235294117647, "grad_norm": 0.6362329795861594, "learning_rate": 2.184234653077111e-06, "loss": 0.012187144160270691, "step": 89125 }, { "epoch": 0.8388705882352941, "grad_norm": 0.22294224531422044, "learning_rate": 2.1841733861133e-06, "loss": 0.010852142423391341, "step": 89130 }, { "epoch": 0.8389176470588235, "grad_norm": 0.5092458134291975, "learning_rate": 2.1841121243047463e-06, "loss": 0.01380956768989563, "step": 89135 }, { "epoch": 0.838964705882353, "grad_norm": 0.4444794989333761, "learning_rate": 2.1840508676507273e-06, "loss": 0.016803869605064393, "step": 89140 }, { "epoch": 0.8390117647058823, "grad_norm": 0.33187681783305395, "learning_rate": 2.1839896161505184e-06, "loss": 0.012114086747169494, "step": 89145 }, { "epoch": 0.8390588235294117, "grad_norm": 0.40054660158807054, "learning_rate": 2.1839283698033987e-06, "loss": 0.013930633664131165, "step": 89150 }, { "epoch": 0.8391058823529411, "grad_norm": 0.3178574138208427, "learning_rate": 2.183867128608645e-06, "loss": 0.013932651281356812, "step": 89155 }, { "epoch": 0.8391529411764705, "grad_norm": 0.5278295148624255, "learning_rate": 2.183805892565535e-06, "loss": 0.014531464874744415, "step": 89160 }, { "epoch": 0.8392, "grad_norm": 0.4732636108143013, "learning_rate": 2.1837446616733464e-06, "loss": 0.013488128781318665, "step": 89165 }, { "epoch": 0.8392470588235295, "grad_norm": 0.5052867281733185, "learning_rate": 2.1836834359313573e-06, "loss": 0.013631518185138702, "step": 89170 }, { "epoch": 0.8392941176470589, "grad_norm": 0.5836180219797823, "learning_rate": 2.1836222153388457e-06, "loss": 0.015048046410083771, "step": 89175 }, { "epoch": 0.8393411764705883, "grad_norm": 0.6427472783867619, "learning_rate": 2.183560999895089e-06, "loss": 0.016332611441612244, "step": 89180 }, { "epoch": 0.8393882352941177, "grad_norm": 0.8392663210259764, "learning_rate": 2.183499789599367e-06, "loss": 0.011538478732109069, "step": 89185 }, { "epoch": 0.8394352941176471, "grad_norm": 0.3345106445491924, "learning_rate": 2.1834385844509577e-06, "loss": 0.013128086924552917, "step": 89190 }, { "epoch": 0.8394823529411765, "grad_norm": 0.7165348397479743, "learning_rate": 2.1833773844491395e-06, "loss": 0.019705620408058167, "step": 89195 }, { "epoch": 0.8395294117647059, "grad_norm": 0.5868785393548644, "learning_rate": 2.183316189593191e-06, "loss": 0.014709007740020753, "step": 89200 }, { "epoch": 0.8395764705882353, "grad_norm": 0.4425321714947006, "learning_rate": 2.1832549998823905e-06, "loss": 0.016347959637641907, "step": 89205 }, { "epoch": 0.8396235294117647, "grad_norm": 0.7095786667201313, "learning_rate": 2.183193815316019e-06, "loss": 0.016150671243667602, "step": 89210 }, { "epoch": 0.8396705882352942, "grad_norm": 0.41372649013869706, "learning_rate": 2.1831326358933536e-06, "loss": 0.012672695517539977, "step": 89215 }, { "epoch": 0.8397176470588236, "grad_norm": 0.6403910010451865, "learning_rate": 2.1830714616136746e-06, "loss": 0.01564515382051468, "step": 89220 }, { "epoch": 0.839764705882353, "grad_norm": 0.29639885277169736, "learning_rate": 2.183010292476262e-06, "loss": 0.011195605993270874, "step": 89225 }, { "epoch": 0.8398117647058824, "grad_norm": 0.34392756465347213, "learning_rate": 2.1829491284803945e-06, "loss": 0.014301127195358277, "step": 89230 }, { "epoch": 0.8398588235294118, "grad_norm": 0.7769645719878072, "learning_rate": 2.1828879696253523e-06, "loss": 0.02070872187614441, "step": 89235 }, { "epoch": 0.8399058823529412, "grad_norm": 0.41134547312701936, "learning_rate": 2.1828268159104148e-06, "loss": 0.011283719539642334, "step": 89240 }, { "epoch": 0.8399529411764706, "grad_norm": 0.384954020949743, "learning_rate": 2.182765667334863e-06, "loss": 0.014682483673095704, "step": 89245 }, { "epoch": 0.84, "grad_norm": 0.6564954674334998, "learning_rate": 2.1827045238979763e-06, "loss": 0.015311606228351593, "step": 89250 }, { "epoch": 0.8400470588235294, "grad_norm": 0.4898820735566411, "learning_rate": 2.1826433855990355e-06, "loss": 0.015259452164173126, "step": 89255 }, { "epoch": 0.8400941176470588, "grad_norm": 0.36125215313291037, "learning_rate": 2.18258225243732e-06, "loss": 0.01465650051832199, "step": 89260 }, { "epoch": 0.8401411764705883, "grad_norm": 0.39192078047437223, "learning_rate": 2.182521124412112e-06, "loss": 0.012217356264591217, "step": 89265 }, { "epoch": 0.8401882352941177, "grad_norm": 0.5086799072637763, "learning_rate": 2.1824600015226912e-06, "loss": 0.01533106118440628, "step": 89270 }, { "epoch": 0.8402352941176471, "grad_norm": 0.23327116521107813, "learning_rate": 2.1823988837683384e-06, "loss": 0.013866537809371948, "step": 89275 }, { "epoch": 0.8402823529411765, "grad_norm": 0.38044296801649424, "learning_rate": 2.1823377711483355e-06, "loss": 0.019877922534942628, "step": 89280 }, { "epoch": 0.8403294117647059, "grad_norm": 0.45059026228687377, "learning_rate": 2.1822766636619626e-06, "loss": 0.011819669604301452, "step": 89285 }, { "epoch": 0.8403764705882353, "grad_norm": 0.7102905846867357, "learning_rate": 2.1822155613085017e-06, "loss": 0.01648886948823929, "step": 89290 }, { "epoch": 0.8404235294117647, "grad_norm": 0.3828843610434089, "learning_rate": 2.1821544640872343e-06, "loss": 0.013830670714378357, "step": 89295 }, { "epoch": 0.8404705882352941, "grad_norm": 0.46708542542428255, "learning_rate": 2.1820933719974415e-06, "loss": 0.01445734202861786, "step": 89300 }, { "epoch": 0.8405176470588235, "grad_norm": 0.3257885958747615, "learning_rate": 2.1820322850384055e-06, "loss": 0.01210334450006485, "step": 89305 }, { "epoch": 0.840564705882353, "grad_norm": 0.47188849243162884, "learning_rate": 2.181971203209408e-06, "loss": 0.015525923669338226, "step": 89310 }, { "epoch": 0.8406117647058824, "grad_norm": 0.5747059888280154, "learning_rate": 2.1819101265097304e-06, "loss": 0.016081607341766356, "step": 89315 }, { "epoch": 0.8406588235294118, "grad_norm": 0.779027847667905, "learning_rate": 2.1818490549386564e-06, "loss": 0.0136851504445076, "step": 89320 }, { "epoch": 0.8407058823529412, "grad_norm": 0.5278631920357781, "learning_rate": 2.1817879884954665e-06, "loss": 0.011349987983703614, "step": 89325 }, { "epoch": 0.8407529411764706, "grad_norm": 0.42247387909760453, "learning_rate": 2.1817269271794445e-06, "loss": 0.012710145115852356, "step": 89330 }, { "epoch": 0.8408, "grad_norm": 0.540282789724294, "learning_rate": 2.181665870989872e-06, "loss": 0.014077375829219817, "step": 89335 }, { "epoch": 0.8408470588235294, "grad_norm": 0.43685126641672, "learning_rate": 2.181604819926032e-06, "loss": 0.013697603344917297, "step": 89340 }, { "epoch": 0.8408941176470588, "grad_norm": 0.41299175130697463, "learning_rate": 2.1815437739872076e-06, "loss": 0.014246034622192382, "step": 89345 }, { "epoch": 0.8409411764705882, "grad_norm": 0.5718138100947321, "learning_rate": 2.181482733172682e-06, "loss": 0.01691020429134369, "step": 89350 }, { "epoch": 0.8409882352941176, "grad_norm": 0.5132962549954452, "learning_rate": 2.1814216974817384e-06, "loss": 0.013822488486766815, "step": 89355 }, { "epoch": 0.8410352941176471, "grad_norm": 0.4620167435901631, "learning_rate": 2.181360666913659e-06, "loss": 0.014789530634880066, "step": 89360 }, { "epoch": 0.8410823529411765, "grad_norm": 0.4761447707881659, "learning_rate": 2.1812996414677283e-06, "loss": 0.013663586974143983, "step": 89365 }, { "epoch": 0.8411294117647059, "grad_norm": 0.6415071275772564, "learning_rate": 2.1812386211432292e-06, "loss": 0.013195706903934479, "step": 89370 }, { "epoch": 0.8411764705882353, "grad_norm": 0.484735324598921, "learning_rate": 2.1811776059394463e-06, "loss": 0.01796662211418152, "step": 89375 }, { "epoch": 0.8412235294117647, "grad_norm": 0.4569406020214835, "learning_rate": 2.181116595855662e-06, "loss": 0.012120753526687622, "step": 89380 }, { "epoch": 0.8412705882352941, "grad_norm": 0.4081895601997118, "learning_rate": 2.181055590891162e-06, "loss": 0.01137995794415474, "step": 89385 }, { "epoch": 0.8413176470588235, "grad_norm": 0.40666402978027066, "learning_rate": 2.1809945910452285e-06, "loss": 0.0132944256067276, "step": 89390 }, { "epoch": 0.8413647058823529, "grad_norm": 0.42325883588895524, "learning_rate": 2.180933596317148e-06, "loss": 0.014749783277511596, "step": 89395 }, { "epoch": 0.8414117647058823, "grad_norm": 0.476938885198331, "learning_rate": 2.180872606706203e-06, "loss": 0.014946138858795166, "step": 89400 }, { "epoch": 0.8414588235294118, "grad_norm": 0.32882409422614184, "learning_rate": 2.1808116222116786e-06, "loss": 0.0152835875749588, "step": 89405 }, { "epoch": 0.8415058823529412, "grad_norm": 0.4297967766441806, "learning_rate": 2.1807506428328594e-06, "loss": 0.012081056833267212, "step": 89410 }, { "epoch": 0.8415529411764706, "grad_norm": 0.6136841821130753, "learning_rate": 2.1806896685690307e-06, "loss": 0.014065222442150116, "step": 89415 }, { "epoch": 0.8416, "grad_norm": 0.35739904093946556, "learning_rate": 2.1806286994194773e-06, "loss": 0.014421728253364564, "step": 89420 }, { "epoch": 0.8416470588235294, "grad_norm": 0.4518989255008529, "learning_rate": 2.1805677353834843e-06, "loss": 0.011410284042358398, "step": 89425 }, { "epoch": 0.8416941176470588, "grad_norm": 0.6069688340660871, "learning_rate": 2.1805067764603363e-06, "loss": 0.013008269667625427, "step": 89430 }, { "epoch": 0.8417411764705882, "grad_norm": 0.4485582531556523, "learning_rate": 2.1804458226493197e-06, "loss": 0.015629231929779053, "step": 89435 }, { "epoch": 0.8417882352941176, "grad_norm": 0.47217188838362284, "learning_rate": 2.1803848739497187e-06, "loss": 0.013719281554222107, "step": 89440 }, { "epoch": 0.841835294117647, "grad_norm": 0.38727789133533835, "learning_rate": 2.180323930360821e-06, "loss": 0.016669341921806337, "step": 89445 }, { "epoch": 0.8418823529411765, "grad_norm": 0.5181830238130207, "learning_rate": 2.18026299188191e-06, "loss": 0.017574477195739745, "step": 89450 }, { "epoch": 0.8419294117647059, "grad_norm": 0.7588054363410262, "learning_rate": 2.1802020585122736e-06, "loss": 0.01849870979785919, "step": 89455 }, { "epoch": 0.8419764705882353, "grad_norm": 0.392116358128181, "learning_rate": 2.180141130251197e-06, "loss": 0.016601699590682983, "step": 89460 }, { "epoch": 0.8420235294117647, "grad_norm": 0.3390037730212711, "learning_rate": 2.1800802070979657e-06, "loss": 0.013400077819824219, "step": 89465 }, { "epoch": 0.8420705882352941, "grad_norm": 0.41085134894687936, "learning_rate": 2.180019289051868e-06, "loss": 0.016651108860969543, "step": 89470 }, { "epoch": 0.8421176470588235, "grad_norm": 0.4154985370760641, "learning_rate": 2.1799583761121883e-06, "loss": 0.016766224801540375, "step": 89475 }, { "epoch": 0.8421647058823529, "grad_norm": 0.7155731073439506, "learning_rate": 2.1798974682782145e-06, "loss": 0.026276379823684692, "step": 89480 }, { "epoch": 0.8422117647058823, "grad_norm": 0.2634391986600808, "learning_rate": 2.1798365655492334e-06, "loss": 0.025577306747436523, "step": 89485 }, { "epoch": 0.8422588235294117, "grad_norm": 0.5684357621176993, "learning_rate": 2.179775667924531e-06, "loss": 0.013279755413532258, "step": 89490 }, { "epoch": 0.8423058823529411, "grad_norm": 0.6509339973359043, "learning_rate": 2.179714775403395e-06, "loss": 0.01655801832675934, "step": 89495 }, { "epoch": 0.8423529411764706, "grad_norm": 0.45131812946826716, "learning_rate": 2.179653887985113e-06, "loss": 0.013974449038505555, "step": 89500 }, { "epoch": 0.8424, "grad_norm": 0.41387027575189755, "learning_rate": 2.1795930056689716e-06, "loss": 0.01590239703655243, "step": 89505 }, { "epoch": 0.8424470588235294, "grad_norm": 0.6067096573090026, "learning_rate": 2.1795321284542584e-06, "loss": 0.013304384052753448, "step": 89510 }, { "epoch": 0.8424941176470588, "grad_norm": 0.392571444884024, "learning_rate": 2.1794712563402616e-06, "loss": 0.013923844695091248, "step": 89515 }, { "epoch": 0.8425411764705882, "grad_norm": 0.4584953703091109, "learning_rate": 2.179410389326268e-06, "loss": 0.014825867116451263, "step": 89520 }, { "epoch": 0.8425882352941176, "grad_norm": 0.2758824641624546, "learning_rate": 2.1793495274115663e-06, "loss": 0.01339646577835083, "step": 89525 }, { "epoch": 0.842635294117647, "grad_norm": 0.8253666719413582, "learning_rate": 2.179288670595444e-06, "loss": 0.017275628447532655, "step": 89530 }, { "epoch": 0.8426823529411764, "grad_norm": 0.435454596741386, "learning_rate": 2.179227818877189e-06, "loss": 0.011871156096458436, "step": 89535 }, { "epoch": 0.8427294117647058, "grad_norm": 0.40529092825741314, "learning_rate": 2.1791669722560905e-06, "loss": 0.013941323757171631, "step": 89540 }, { "epoch": 0.8427764705882353, "grad_norm": 0.6496071367701364, "learning_rate": 2.179106130731437e-06, "loss": 0.01559838205575943, "step": 89545 }, { "epoch": 0.8428235294117647, "grad_norm": 0.4825539201395491, "learning_rate": 2.1790452943025165e-06, "loss": 0.013847994804382324, "step": 89550 }, { "epoch": 0.8428705882352941, "grad_norm": 0.5254193566829883, "learning_rate": 2.1789844629686173e-06, "loss": 0.015390655398368836, "step": 89555 }, { "epoch": 0.8429176470588235, "grad_norm": 0.6648937383934894, "learning_rate": 2.178923636729029e-06, "loss": 0.017942368984222412, "step": 89560 }, { "epoch": 0.842964705882353, "grad_norm": 0.3812406170276991, "learning_rate": 2.1788628155830406e-06, "loss": 0.011602652817964553, "step": 89565 }, { "epoch": 0.8430117647058823, "grad_norm": 0.3265069833750623, "learning_rate": 2.178801999529941e-06, "loss": 0.013356216251850128, "step": 89570 }, { "epoch": 0.8430588235294117, "grad_norm": 0.3225562304136514, "learning_rate": 2.1787411885690193e-06, "loss": 0.013749168813228607, "step": 89575 }, { "epoch": 0.8431058823529411, "grad_norm": 0.5458803964831925, "learning_rate": 2.178680382699565e-06, "loss": 0.015601733326911926, "step": 89580 }, { "epoch": 0.8431529411764705, "grad_norm": 0.5536317005640918, "learning_rate": 2.1786195819208684e-06, "loss": 0.013926476240158081, "step": 89585 }, { "epoch": 0.8432, "grad_norm": 0.6046944695027617, "learning_rate": 2.1785587862322183e-06, "loss": 0.01495593786239624, "step": 89590 }, { "epoch": 0.8432470588235295, "grad_norm": 0.32270408884230734, "learning_rate": 2.178497995632905e-06, "loss": 0.013962756097316741, "step": 89595 }, { "epoch": 0.8432941176470589, "grad_norm": 0.574825119386042, "learning_rate": 2.178437210122218e-06, "loss": 0.014586946368217469, "step": 89600 }, { "epoch": 0.8433411764705883, "grad_norm": 0.5367467757806772, "learning_rate": 2.1783764296994477e-06, "loss": 0.014905798435211181, "step": 89605 }, { "epoch": 0.8433882352941177, "grad_norm": 0.5412278204334932, "learning_rate": 2.1783156543638847e-06, "loss": 0.01232118234038353, "step": 89610 }, { "epoch": 0.8434352941176471, "grad_norm": 0.4218921676496755, "learning_rate": 2.178254884114819e-06, "loss": 0.011034801602363586, "step": 89615 }, { "epoch": 0.8434823529411765, "grad_norm": 0.33000214062552435, "learning_rate": 2.178194118951541e-06, "loss": 0.015912553668022154, "step": 89620 }, { "epoch": 0.8435294117647059, "grad_norm": 0.5254157707704248, "learning_rate": 2.178133358873342e-06, "loss": 0.014058148860931397, "step": 89625 }, { "epoch": 0.8435764705882353, "grad_norm": 0.45183483622878523, "learning_rate": 2.1780726038795118e-06, "loss": 0.014949005842208863, "step": 89630 }, { "epoch": 0.8436235294117647, "grad_norm": 0.5580434461875811, "learning_rate": 2.1780118539693422e-06, "loss": 0.013664363324642182, "step": 89635 }, { "epoch": 0.8436705882352942, "grad_norm": 0.4772379047401782, "learning_rate": 2.1779511091421242e-06, "loss": 0.015922819077968598, "step": 89640 }, { "epoch": 0.8437176470588236, "grad_norm": 0.47861036062385043, "learning_rate": 2.1778903693971484e-06, "loss": 0.014103896915912628, "step": 89645 }, { "epoch": 0.843764705882353, "grad_norm": 0.369908045822072, "learning_rate": 2.1778296347337066e-06, "loss": 0.015013322234153748, "step": 89650 }, { "epoch": 0.8438117647058824, "grad_norm": 0.623881241375273, "learning_rate": 2.1777689051510902e-06, "loss": 0.020860576629638673, "step": 89655 }, { "epoch": 0.8438588235294118, "grad_norm": 0.5246382712048977, "learning_rate": 2.1777081806485913e-06, "loss": 0.018342310190200807, "step": 89660 }, { "epoch": 0.8439058823529412, "grad_norm": 0.5187414786326847, "learning_rate": 2.177647461225501e-06, "loss": 0.014252546429634094, "step": 89665 }, { "epoch": 0.8439529411764706, "grad_norm": 0.6952174410226576, "learning_rate": 2.1775867468811117e-06, "loss": 0.01636756956577301, "step": 89670 }, { "epoch": 0.844, "grad_norm": 0.5047671036313343, "learning_rate": 2.1775260376147153e-06, "loss": 0.008327746391296386, "step": 89675 }, { "epoch": 0.8440470588235294, "grad_norm": 0.4548229625373246, "learning_rate": 2.177465333425604e-06, "loss": 0.03348094820976257, "step": 89680 }, { "epoch": 0.8440941176470588, "grad_norm": 0.48347636720064346, "learning_rate": 2.17740463431307e-06, "loss": 0.019115751981735228, "step": 89685 }, { "epoch": 0.8441411764705883, "grad_norm": 0.7237335797246991, "learning_rate": 2.177343940276405e-06, "loss": 0.01721934974193573, "step": 89690 }, { "epoch": 0.8441882352941177, "grad_norm": 0.2642654476682222, "learning_rate": 2.1772832513149034e-06, "loss": 0.01217721551656723, "step": 89695 }, { "epoch": 0.8442352941176471, "grad_norm": 0.3943279262754827, "learning_rate": 2.177222567427857e-06, "loss": 0.013974638283252716, "step": 89700 }, { "epoch": 0.8442823529411765, "grad_norm": 0.467725563968593, "learning_rate": 2.1771618886145583e-06, "loss": 0.012837451696395875, "step": 89705 }, { "epoch": 0.8443294117647059, "grad_norm": 0.6295619279869179, "learning_rate": 2.1771012148743005e-06, "loss": 0.016002219915390015, "step": 89710 }, { "epoch": 0.8443764705882353, "grad_norm": 0.5730737300517149, "learning_rate": 2.177040546206377e-06, "loss": 0.011816645413637162, "step": 89715 }, { "epoch": 0.8444235294117647, "grad_norm": 0.69075876935484, "learning_rate": 2.176979882610081e-06, "loss": 0.02141445130109787, "step": 89720 }, { "epoch": 0.8444705882352941, "grad_norm": 0.5197214834089312, "learning_rate": 2.1769192240847064e-06, "loss": 0.01485075056552887, "step": 89725 }, { "epoch": 0.8445176470588235, "grad_norm": 0.7266970708696651, "learning_rate": 2.176858570629546e-06, "loss": 0.014221478998661042, "step": 89730 }, { "epoch": 0.844564705882353, "grad_norm": 0.5016271916710284, "learning_rate": 2.1767979222438938e-06, "loss": 0.019041535258293153, "step": 89735 }, { "epoch": 0.8446117647058824, "grad_norm": 0.38187788080558677, "learning_rate": 2.1767372789270434e-06, "loss": 0.01369083821773529, "step": 89740 }, { "epoch": 0.8446588235294118, "grad_norm": 0.46244241965712357, "learning_rate": 2.17667664067829e-06, "loss": 0.01470411717891693, "step": 89745 }, { "epoch": 0.8447058823529412, "grad_norm": 0.3970547089508598, "learning_rate": 2.176616007496926e-06, "loss": 0.009021943807601929, "step": 89750 }, { "epoch": 0.8447529411764706, "grad_norm": 0.5573244119289275, "learning_rate": 2.176555379382246e-06, "loss": 0.012132382392883301, "step": 89755 }, { "epoch": 0.8448, "grad_norm": 0.3979856164822541, "learning_rate": 2.1764947563335458e-06, "loss": 0.012856578826904297, "step": 89760 }, { "epoch": 0.8448470588235294, "grad_norm": 0.5832400298850144, "learning_rate": 2.1764341383501185e-06, "loss": 0.01276843249797821, "step": 89765 }, { "epoch": 0.8448941176470588, "grad_norm": 0.6680596736335278, "learning_rate": 2.176373525431259e-06, "loss": 0.013788262009620666, "step": 89770 }, { "epoch": 0.8449411764705882, "grad_norm": 0.4350054475130499, "learning_rate": 2.1763129175762623e-06, "loss": 0.013119307160377503, "step": 89775 }, { "epoch": 0.8449882352941176, "grad_norm": 0.5818820383226243, "learning_rate": 2.176252314784424e-06, "loss": 0.017722247540950774, "step": 89780 }, { "epoch": 0.8450352941176471, "grad_norm": 0.7038168801646852, "learning_rate": 2.176191717055038e-06, "loss": 0.012236759066581726, "step": 89785 }, { "epoch": 0.8450823529411765, "grad_norm": 0.5931386528104989, "learning_rate": 2.1761311243874e-06, "loss": 0.018775574862957, "step": 89790 }, { "epoch": 0.8451294117647059, "grad_norm": 0.47560245365370235, "learning_rate": 2.176070536780806e-06, "loss": 0.014473360776901246, "step": 89795 }, { "epoch": 0.8451764705882353, "grad_norm": 0.39526286975698127, "learning_rate": 2.1760099542345505e-06, "loss": 0.011606371402740479, "step": 89800 }, { "epoch": 0.8452235294117647, "grad_norm": 0.2739269425207436, "learning_rate": 2.1759493767479294e-06, "loss": 0.009829726815223695, "step": 89805 }, { "epoch": 0.8452705882352941, "grad_norm": 0.5776339677949444, "learning_rate": 2.1758888043202393e-06, "loss": 0.01602422147989273, "step": 89810 }, { "epoch": 0.8453176470588235, "grad_norm": 0.8093144750032084, "learning_rate": 2.1758282369507745e-06, "loss": 0.01675901412963867, "step": 89815 }, { "epoch": 0.8453647058823529, "grad_norm": 0.32595648051989856, "learning_rate": 2.175767674638833e-06, "loss": 0.01237175315618515, "step": 89820 }, { "epoch": 0.8454117647058823, "grad_norm": 0.6980691139603428, "learning_rate": 2.1757071173837094e-06, "loss": 0.01592334508895874, "step": 89825 }, { "epoch": 0.8454588235294118, "grad_norm": 0.43921090914166094, "learning_rate": 2.1756465651847004e-06, "loss": 0.017823295295238496, "step": 89830 }, { "epoch": 0.8455058823529412, "grad_norm": 0.7115053855718538, "learning_rate": 2.175586018041103e-06, "loss": 0.01114809587597847, "step": 89835 }, { "epoch": 0.8455529411764706, "grad_norm": 0.39588180879746926, "learning_rate": 2.1755254759522136e-06, "loss": 0.015706849098205567, "step": 89840 }, { "epoch": 0.8456, "grad_norm": 0.4528531829398039, "learning_rate": 2.175464938917328e-06, "loss": 0.01545701026916504, "step": 89845 }, { "epoch": 0.8456470588235294, "grad_norm": 0.5948904624973839, "learning_rate": 2.1754044069357445e-06, "loss": 0.015856118500232698, "step": 89850 }, { "epoch": 0.8456941176470588, "grad_norm": 0.5504037529636614, "learning_rate": 2.175343880006759e-06, "loss": 0.01309230923652649, "step": 89855 }, { "epoch": 0.8457411764705882, "grad_norm": 0.7716383233334976, "learning_rate": 2.175283358129669e-06, "loss": 0.015982359647750854, "step": 89860 }, { "epoch": 0.8457882352941176, "grad_norm": 0.35584036962903326, "learning_rate": 2.175222841303772e-06, "loss": 0.015314097702503204, "step": 89865 }, { "epoch": 0.845835294117647, "grad_norm": 0.5319212731006767, "learning_rate": 2.175162329528365e-06, "loss": 0.014072304964065552, "step": 89870 }, { "epoch": 0.8458823529411764, "grad_norm": 0.5266796196301161, "learning_rate": 2.175101822802746e-06, "loss": 0.016245558857917786, "step": 89875 }, { "epoch": 0.8459294117647059, "grad_norm": 0.547853813012178, "learning_rate": 2.1750413211262124e-06, "loss": 0.010980284214019776, "step": 89880 }, { "epoch": 0.8459764705882353, "grad_norm": 0.49173641473349755, "learning_rate": 2.1749808244980624e-06, "loss": 0.014726004004478455, "step": 89885 }, { "epoch": 0.8460235294117647, "grad_norm": 0.378435855721995, "learning_rate": 2.1749203329175935e-06, "loss": 0.012061052024364471, "step": 89890 }, { "epoch": 0.8460705882352941, "grad_norm": 0.4505092579909316, "learning_rate": 2.174859846384104e-06, "loss": 0.02136545777320862, "step": 89895 }, { "epoch": 0.8461176470588235, "grad_norm": 0.4748982399649688, "learning_rate": 2.174799364896892e-06, "loss": 0.014889737963676453, "step": 89900 }, { "epoch": 0.8461647058823529, "grad_norm": 0.6072771958652711, "learning_rate": 2.174738888455256e-06, "loss": 0.013244742155075073, "step": 89905 }, { "epoch": 0.8462117647058823, "grad_norm": 0.281279868568917, "learning_rate": 2.1746784170584945e-06, "loss": 0.01345350444316864, "step": 89910 }, { "epoch": 0.8462588235294117, "grad_norm": 0.4518864763778904, "learning_rate": 2.174617950705906e-06, "loss": 0.016010768711566925, "step": 89915 }, { "epoch": 0.8463058823529411, "grad_norm": 0.42673226584087615, "learning_rate": 2.1745574893967894e-06, "loss": 0.010479942709207535, "step": 89920 }, { "epoch": 0.8463529411764706, "grad_norm": 0.5756380515261865, "learning_rate": 2.1744970331304433e-06, "loss": 0.01751105934381485, "step": 89925 }, { "epoch": 0.8464, "grad_norm": 0.3004189782267109, "learning_rate": 2.1744365819061676e-06, "loss": 0.015886251628398896, "step": 89930 }, { "epoch": 0.8464470588235294, "grad_norm": 0.5787282079665781, "learning_rate": 2.174376135723261e-06, "loss": 0.013243347406387329, "step": 89935 }, { "epoch": 0.8464941176470588, "grad_norm": 0.5396946707981094, "learning_rate": 2.1743156945810223e-06, "loss": 0.01816061735153198, "step": 89940 }, { "epoch": 0.8465411764705882, "grad_norm": 0.5341576451749472, "learning_rate": 2.1742552584787517e-06, "loss": 0.014532391726970673, "step": 89945 }, { "epoch": 0.8465882352941176, "grad_norm": 0.46992619376422945, "learning_rate": 2.1741948274157486e-06, "loss": 0.01626833975315094, "step": 89950 }, { "epoch": 0.846635294117647, "grad_norm": 0.3215899267077258, "learning_rate": 2.1741344013913125e-06, "loss": 0.01326698362827301, "step": 89955 }, { "epoch": 0.8466823529411764, "grad_norm": 0.8010156410020347, "learning_rate": 2.1740739804047434e-06, "loss": 0.01516624391078949, "step": 89960 }, { "epoch": 0.8467294117647058, "grad_norm": 0.37965912096558146, "learning_rate": 2.1740135644553415e-06, "loss": 0.014336124062538147, "step": 89965 }, { "epoch": 0.8467764705882352, "grad_norm": 0.6073754693488479, "learning_rate": 2.1739531535424065e-06, "loss": 0.012039671838283538, "step": 89970 }, { "epoch": 0.8468235294117648, "grad_norm": 0.3598638091346102, "learning_rate": 2.1738927476652395e-06, "loss": 0.013667266070842742, "step": 89975 }, { "epoch": 0.8468705882352942, "grad_norm": 0.6356051266694159, "learning_rate": 2.17383234682314e-06, "loss": 0.016804076731204987, "step": 89980 }, { "epoch": 0.8469176470588236, "grad_norm": 0.7482631658791354, "learning_rate": 2.173771951015409e-06, "loss": 0.02072768211364746, "step": 89985 }, { "epoch": 0.846964705882353, "grad_norm": 0.6393992776238393, "learning_rate": 2.1737115602413473e-06, "loss": 0.016512608528137206, "step": 89990 }, { "epoch": 0.8470117647058824, "grad_norm": 0.49896809023126204, "learning_rate": 2.173651174500255e-06, "loss": 0.01578277051448822, "step": 89995 }, { "epoch": 0.8470588235294118, "grad_norm": 0.5818632719878049, "learning_rate": 2.1735907937914343e-06, "loss": 0.017099499702453613, "step": 90000 }, { "epoch": 0.8470588235294118, "eval_loss": 0.014685890637338161, "eval_runtime": 613.7188, "eval_samples_per_second": 110.8, "eval_steps_per_second": 6.925, "step": 90000 }, { "epoch": 0.8471058823529412, "grad_norm": 0.394075672855289, "learning_rate": 2.173530418114185e-06, "loss": 0.014929227530956268, "step": 90005 }, { "epoch": 0.8471529411764706, "grad_norm": 0.5904423874794086, "learning_rate": 2.173470047467809e-06, "loss": 0.014492958784103394, "step": 90010 }, { "epoch": 0.8472, "grad_norm": 0.42248878545220186, "learning_rate": 2.173409681851608e-06, "loss": 0.013044779002666474, "step": 90015 }, { "epoch": 0.8472470588235295, "grad_norm": 0.7136362508989297, "learning_rate": 2.173349321264883e-06, "loss": 0.018808797001838684, "step": 90020 }, { "epoch": 0.8472941176470589, "grad_norm": 0.3722714022383942, "learning_rate": 2.1732889657069355e-06, "loss": 0.013678120076656341, "step": 90025 }, { "epoch": 0.8473411764705883, "grad_norm": 0.5415075462054173, "learning_rate": 2.1732286151770675e-06, "loss": 0.019023676216602326, "step": 90030 }, { "epoch": 0.8473882352941177, "grad_norm": 0.4978304502516897, "learning_rate": 2.173168269674581e-06, "loss": 0.013561305403709412, "step": 90035 }, { "epoch": 0.8474352941176471, "grad_norm": 0.6163813000620323, "learning_rate": 2.173107929198778e-06, "loss": 0.017442503571510316, "step": 90040 }, { "epoch": 0.8474823529411765, "grad_norm": 0.4701025343816984, "learning_rate": 2.173047593748961e-06, "loss": 0.01835644245147705, "step": 90045 }, { "epoch": 0.8475294117647059, "grad_norm": 0.4558766202328629, "learning_rate": 2.1729872633244313e-06, "loss": 0.015857800841331482, "step": 90050 }, { "epoch": 0.8475764705882353, "grad_norm": 0.6802305762266744, "learning_rate": 2.172926937924492e-06, "loss": 0.025260168313980102, "step": 90055 }, { "epoch": 0.8476235294117647, "grad_norm": 0.2945512307979386, "learning_rate": 2.172866617548446e-06, "loss": 0.011420957744121552, "step": 90060 }, { "epoch": 0.8476705882352941, "grad_norm": 0.640628071783045, "learning_rate": 2.172806302195596e-06, "loss": 0.011911039054393769, "step": 90065 }, { "epoch": 0.8477176470588236, "grad_norm": 0.565874480391278, "learning_rate": 2.172745991865244e-06, "loss": 0.01823100447654724, "step": 90070 }, { "epoch": 0.847764705882353, "grad_norm": 0.5234990046319568, "learning_rate": 2.172685686556694e-06, "loss": 0.017191982269287108, "step": 90075 }, { "epoch": 0.8478117647058824, "grad_norm": 0.5562073917089384, "learning_rate": 2.1726253862692486e-06, "loss": 0.013915048539638519, "step": 90080 }, { "epoch": 0.8478588235294118, "grad_norm": 0.5682704821198256, "learning_rate": 2.172565091002211e-06, "loss": 0.013779173791408538, "step": 90085 }, { "epoch": 0.8479058823529412, "grad_norm": 0.419856893986956, "learning_rate": 2.172504800754885e-06, "loss": 0.013551490008831024, "step": 90090 }, { "epoch": 0.8479529411764706, "grad_norm": 0.5028324927930448, "learning_rate": 2.172444515526574e-06, "loss": 0.013299128413200379, "step": 90095 }, { "epoch": 0.848, "grad_norm": 0.2933272290825401, "learning_rate": 2.1723842353165818e-06, "loss": 0.013073025643825531, "step": 90100 }, { "epoch": 0.8480470588235294, "grad_norm": 0.4422894094534973, "learning_rate": 2.172323960124211e-06, "loss": 0.01507905125617981, "step": 90105 }, { "epoch": 0.8480941176470588, "grad_norm": 0.5173054518361773, "learning_rate": 2.172263689948767e-06, "loss": 0.01131889820098877, "step": 90110 }, { "epoch": 0.8481411764705883, "grad_norm": 0.46870777669717567, "learning_rate": 2.1722034247895536e-06, "loss": 0.011623091250658035, "step": 90115 }, { "epoch": 0.8481882352941177, "grad_norm": 0.5155400571795274, "learning_rate": 2.1721431646458747e-06, "loss": 0.01575583815574646, "step": 90120 }, { "epoch": 0.8482352941176471, "grad_norm": 0.6769739651950714, "learning_rate": 2.172082909517035e-06, "loss": 0.016292095184326172, "step": 90125 }, { "epoch": 0.8482823529411765, "grad_norm": 0.5985066229594902, "learning_rate": 2.1720226594023384e-06, "loss": 0.012233789265155792, "step": 90130 }, { "epoch": 0.8483294117647059, "grad_norm": 0.2972292348843179, "learning_rate": 2.1719624143010897e-06, "loss": 0.014467966556549073, "step": 90135 }, { "epoch": 0.8483764705882353, "grad_norm": 0.5290149634227375, "learning_rate": 2.1719021742125942e-06, "loss": 0.01438896507024765, "step": 90140 }, { "epoch": 0.8484235294117647, "grad_norm": 0.37763351819461555, "learning_rate": 2.171841939136156e-06, "loss": 0.013948667049407958, "step": 90145 }, { "epoch": 0.8484705882352941, "grad_norm": 0.7955011141878141, "learning_rate": 2.1717817090710815e-06, "loss": 0.01666000634431839, "step": 90150 }, { "epoch": 0.8485176470588235, "grad_norm": 0.5862649564206778, "learning_rate": 2.1717214840166737e-06, "loss": 0.01627638041973114, "step": 90155 }, { "epoch": 0.8485647058823529, "grad_norm": 0.652537664701721, "learning_rate": 2.1716612639722393e-06, "loss": 0.017986470460891725, "step": 90160 }, { "epoch": 0.8486117647058824, "grad_norm": 0.589934790022672, "learning_rate": 2.171601048937084e-06, "loss": 0.015650828182697297, "step": 90165 }, { "epoch": 0.8486588235294118, "grad_norm": 0.509376694006526, "learning_rate": 2.1715408389105126e-06, "loss": 0.01335901916027069, "step": 90170 }, { "epoch": 0.8487058823529412, "grad_norm": 0.5926981745152254, "learning_rate": 2.171480633891831e-06, "loss": 0.014223986864089965, "step": 90175 }, { "epoch": 0.8487529411764706, "grad_norm": 0.4268084004174147, "learning_rate": 2.1714204338803455e-06, "loss": 0.012741486728191375, "step": 90180 }, { "epoch": 0.8488, "grad_norm": 0.3047326458983681, "learning_rate": 2.171360238875361e-06, "loss": 0.01229981631040573, "step": 90185 }, { "epoch": 0.8488470588235294, "grad_norm": 0.3800752532352043, "learning_rate": 2.1713000488761845e-06, "loss": 0.014258185029029846, "step": 90190 }, { "epoch": 0.8488941176470588, "grad_norm": 0.5577749567367637, "learning_rate": 2.171239863882122e-06, "loss": 0.014245885610580444, "step": 90195 }, { "epoch": 0.8489411764705882, "grad_norm": 0.524697591505077, "learning_rate": 2.17117968389248e-06, "loss": 0.013674755394458771, "step": 90200 }, { "epoch": 0.8489882352941176, "grad_norm": 0.5557676675615595, "learning_rate": 2.1711195089065644e-06, "loss": 0.012824535369873047, "step": 90205 }, { "epoch": 0.8490352941176471, "grad_norm": 0.4096704412393573, "learning_rate": 2.1710593389236826e-06, "loss": 0.01470595896244049, "step": 90210 }, { "epoch": 0.8490823529411765, "grad_norm": 0.5809496008198103, "learning_rate": 2.170999173943141e-06, "loss": 0.021944764256477355, "step": 90215 }, { "epoch": 0.8491294117647059, "grad_norm": 0.382011173562766, "learning_rate": 2.1709390139642464e-06, "loss": 0.014673785865306854, "step": 90220 }, { "epoch": 0.8491764705882353, "grad_norm": 0.593198347101876, "learning_rate": 2.170878858986306e-06, "loss": 0.015290659666061402, "step": 90225 }, { "epoch": 0.8492235294117647, "grad_norm": 0.5082049931563595, "learning_rate": 2.170818709008628e-06, "loss": 0.01700958013534546, "step": 90230 }, { "epoch": 0.8492705882352941, "grad_norm": 0.32081440422680463, "learning_rate": 2.1707585640305174e-06, "loss": 0.012808150053024292, "step": 90235 }, { "epoch": 0.8493176470588235, "grad_norm": 0.545300586191324, "learning_rate": 2.1706984240512836e-06, "loss": 0.013874609768390656, "step": 90240 }, { "epoch": 0.8493647058823529, "grad_norm": 0.43951488135652217, "learning_rate": 2.1706382890702333e-06, "loss": 0.01792043149471283, "step": 90245 }, { "epoch": 0.8494117647058823, "grad_norm": 0.4316464991937228, "learning_rate": 2.1705781590866744e-06, "loss": 0.015123891830444335, "step": 90250 }, { "epoch": 0.8494588235294117, "grad_norm": 0.5192112910144471, "learning_rate": 2.1705180340999154e-06, "loss": 0.01477285921573639, "step": 90255 }, { "epoch": 0.8495058823529412, "grad_norm": 0.514114579322453, "learning_rate": 2.170457914109263e-06, "loss": 0.015382292866706847, "step": 90260 }, { "epoch": 0.8495529411764706, "grad_norm": 0.58600369891042, "learning_rate": 2.170397799114026e-06, "loss": 0.014272192120552063, "step": 90265 }, { "epoch": 0.8496, "grad_norm": 0.6895290270074594, "learning_rate": 2.170337689113513e-06, "loss": 0.014861999452114106, "step": 90270 }, { "epoch": 0.8496470588235294, "grad_norm": 0.7183701414334777, "learning_rate": 2.1702775841070317e-06, "loss": 0.015678241848945618, "step": 90275 }, { "epoch": 0.8496941176470588, "grad_norm": 0.6130399249922535, "learning_rate": 2.170217484093891e-06, "loss": 0.01618456244468689, "step": 90280 }, { "epoch": 0.8497411764705882, "grad_norm": 0.4740636983356694, "learning_rate": 2.1701573890733995e-06, "loss": 0.012754444777965546, "step": 90285 }, { "epoch": 0.8497882352941176, "grad_norm": 0.4549496393348143, "learning_rate": 2.1700972990448663e-06, "loss": 0.010501667112112045, "step": 90290 }, { "epoch": 0.849835294117647, "grad_norm": 0.5740117777193464, "learning_rate": 2.1700372140075997e-06, "loss": 0.010592276602983475, "step": 90295 }, { "epoch": 0.8498823529411764, "grad_norm": 0.38148218573021936, "learning_rate": 2.1699771339609093e-06, "loss": 0.012792909145355224, "step": 90300 }, { "epoch": 0.8499294117647059, "grad_norm": 0.4401452340985254, "learning_rate": 2.1699170589041035e-06, "loss": 0.013073289394378662, "step": 90305 }, { "epoch": 0.8499764705882353, "grad_norm": 0.6561800944106554, "learning_rate": 2.1698569888364925e-06, "loss": 0.01653503179550171, "step": 90310 }, { "epoch": 0.8500235294117647, "grad_norm": 0.5627452865873298, "learning_rate": 2.1697969237573854e-06, "loss": 0.016861249506473542, "step": 90315 }, { "epoch": 0.8500705882352941, "grad_norm": 0.5596343483595082, "learning_rate": 2.1697368636660916e-06, "loss": 0.013800787925720214, "step": 90320 }, { "epoch": 0.8501176470588235, "grad_norm": 0.5621624159627273, "learning_rate": 2.1696768085619215e-06, "loss": 0.012142933905124664, "step": 90325 }, { "epoch": 0.8501647058823529, "grad_norm": 0.5484484319627813, "learning_rate": 2.169616758444184e-06, "loss": 0.012169703841209412, "step": 90330 }, { "epoch": 0.8502117647058823, "grad_norm": 0.5465596458223074, "learning_rate": 2.1695567133121896e-06, "loss": 0.01605796217918396, "step": 90335 }, { "epoch": 0.8502588235294117, "grad_norm": 0.49016969713826797, "learning_rate": 2.1694966731652488e-06, "loss": 0.01405423879623413, "step": 90340 }, { "epoch": 0.8503058823529411, "grad_norm": 0.6611513352673294, "learning_rate": 2.169436638002671e-06, "loss": 0.01953129470348358, "step": 90345 }, { "epoch": 0.8503529411764705, "grad_norm": 0.2947559352086135, "learning_rate": 2.169376607823768e-06, "loss": 0.011276812851428985, "step": 90350 }, { "epoch": 0.8504, "grad_norm": 0.44088484621186147, "learning_rate": 2.1693165826278483e-06, "loss": 0.013757526874542236, "step": 90355 }, { "epoch": 0.8504470588235294, "grad_norm": 0.46090861872426453, "learning_rate": 2.169256562414224e-06, "loss": 0.010723946243524551, "step": 90360 }, { "epoch": 0.8504941176470588, "grad_norm": 0.35107480097618643, "learning_rate": 2.1691965471822055e-06, "loss": 0.014985996484756469, "step": 90365 }, { "epoch": 0.8505411764705882, "grad_norm": 0.4999909686934536, "learning_rate": 2.169136536931104e-06, "loss": 0.01263192892074585, "step": 90370 }, { "epoch": 0.8505882352941176, "grad_norm": 0.3815580877403799, "learning_rate": 2.16907653166023e-06, "loss": 0.014699923992156982, "step": 90375 }, { "epoch": 0.850635294117647, "grad_norm": 0.5088220947499861, "learning_rate": 2.169016531368895e-06, "loss": 0.01780357360839844, "step": 90380 }, { "epoch": 0.8506823529411764, "grad_norm": 0.4509926465213476, "learning_rate": 2.168956536056411e-06, "loss": 0.014828692376613616, "step": 90385 }, { "epoch": 0.8507294117647058, "grad_norm": 0.5201266590370343, "learning_rate": 2.1688965457220877e-06, "loss": 0.013102515041828156, "step": 90390 }, { "epoch": 0.8507764705882352, "grad_norm": 0.3298914143660762, "learning_rate": 2.168836560365239e-06, "loss": 0.01196078211069107, "step": 90395 }, { "epoch": 0.8508235294117648, "grad_norm": 0.704152059020998, "learning_rate": 2.1687765799851744e-06, "loss": 0.01765979826450348, "step": 90400 }, { "epoch": 0.8508705882352942, "grad_norm": 0.4473624674126157, "learning_rate": 2.168716604581207e-06, "loss": 0.013507901132106781, "step": 90405 }, { "epoch": 0.8509176470588236, "grad_norm": 0.3452839761267165, "learning_rate": 2.168656634152649e-06, "loss": 0.01549399048089981, "step": 90410 }, { "epoch": 0.850964705882353, "grad_norm": 0.4177214635325763, "learning_rate": 2.1685966686988116e-06, "loss": 0.01072915643453598, "step": 90415 }, { "epoch": 0.8510117647058824, "grad_norm": 0.4309759809550569, "learning_rate": 2.168536708219008e-06, "loss": 0.016160190105438232, "step": 90420 }, { "epoch": 0.8510588235294118, "grad_norm": 0.4271867599915766, "learning_rate": 2.16847675271255e-06, "loss": 0.013145743310451508, "step": 90425 }, { "epoch": 0.8511058823529412, "grad_norm": 0.4185307974144525, "learning_rate": 2.16841680217875e-06, "loss": 0.012477602809667587, "step": 90430 }, { "epoch": 0.8511529411764706, "grad_norm": 0.41620280503346035, "learning_rate": 2.1683568566169213e-06, "loss": 0.011113275587558747, "step": 90435 }, { "epoch": 0.8512, "grad_norm": 0.346526160688318, "learning_rate": 2.168296916026376e-06, "loss": 0.013228411972522735, "step": 90440 }, { "epoch": 0.8512470588235294, "grad_norm": 0.4976889651673009, "learning_rate": 2.1682369804064273e-06, "loss": 0.014810526371002197, "step": 90445 }, { "epoch": 0.8512941176470589, "grad_norm": 0.5158353702189733, "learning_rate": 2.1681770497563885e-06, "loss": 0.0118740975856781, "step": 90450 }, { "epoch": 0.8513411764705883, "grad_norm": 0.5519347362192099, "learning_rate": 2.1681171240755726e-06, "loss": 0.016081851720809937, "step": 90455 }, { "epoch": 0.8513882352941177, "grad_norm": 0.8853880293560324, "learning_rate": 2.1680572033632928e-06, "loss": 0.018603911995887755, "step": 90460 }, { "epoch": 0.8514352941176471, "grad_norm": 0.49208689068157796, "learning_rate": 2.167997287618863e-06, "loss": 0.01456783264875412, "step": 90465 }, { "epoch": 0.8514823529411765, "grad_norm": 0.27963216405463964, "learning_rate": 2.1679373768415955e-06, "loss": 0.01138264536857605, "step": 90470 }, { "epoch": 0.8515294117647059, "grad_norm": 0.4249298311373939, "learning_rate": 2.167877471030806e-06, "loss": 0.016304539144039155, "step": 90475 }, { "epoch": 0.8515764705882353, "grad_norm": 0.4027601755887387, "learning_rate": 2.167817570185807e-06, "loss": 0.012412992864847183, "step": 90480 }, { "epoch": 0.8516235294117647, "grad_norm": 0.34308043173728314, "learning_rate": 2.1677576743059125e-06, "loss": 0.013045486807823182, "step": 90485 }, { "epoch": 0.8516705882352941, "grad_norm": 0.6005736326198675, "learning_rate": 2.1676977833904374e-06, "loss": 0.015770921111106874, "step": 90490 }, { "epoch": 0.8517176470588236, "grad_norm": 0.3788724187542083, "learning_rate": 2.1676378974386952e-06, "loss": 0.017693710327148438, "step": 90495 }, { "epoch": 0.851764705882353, "grad_norm": 0.475002350022224, "learning_rate": 2.1675780164500007e-06, "loss": 0.015081524848937988, "step": 90500 }, { "epoch": 0.8518117647058824, "grad_norm": 0.9155802424146465, "learning_rate": 2.1675181404236683e-06, "loss": 0.01370624303817749, "step": 90505 }, { "epoch": 0.8518588235294118, "grad_norm": 0.4208911096789397, "learning_rate": 2.167458269359013e-06, "loss": 0.016828888654708864, "step": 90510 }, { "epoch": 0.8519058823529412, "grad_norm": 0.5431346164454641, "learning_rate": 2.1673984032553487e-06, "loss": 0.012673091888427735, "step": 90515 }, { "epoch": 0.8519529411764706, "grad_norm": 0.5642124750561326, "learning_rate": 2.1673385421119907e-06, "loss": 0.016165703535079956, "step": 90520 }, { "epoch": 0.852, "grad_norm": 0.8988721989688573, "learning_rate": 2.167278685928254e-06, "loss": 0.019942089915275574, "step": 90525 }, { "epoch": 0.8520470588235294, "grad_norm": 0.31624911005167444, "learning_rate": 2.167218834703455e-06, "loss": 0.013651300966739655, "step": 90530 }, { "epoch": 0.8520941176470588, "grad_norm": 0.7742425418794768, "learning_rate": 2.1671589884369073e-06, "loss": 0.018572741746902467, "step": 90535 }, { "epoch": 0.8521411764705883, "grad_norm": 0.4807827166217558, "learning_rate": 2.167099147127927e-06, "loss": 0.01568032503128052, "step": 90540 }, { "epoch": 0.8521882352941177, "grad_norm": 0.4263052788565541, "learning_rate": 2.1670393107758295e-06, "loss": 0.013055673241615296, "step": 90545 }, { "epoch": 0.8522352941176471, "grad_norm": 0.4991162311581295, "learning_rate": 2.166979479379931e-06, "loss": 0.019662921130657197, "step": 90550 }, { "epoch": 0.8522823529411765, "grad_norm": 0.4428035677527777, "learning_rate": 2.1669196529395477e-06, "loss": 0.014919251203536987, "step": 90555 }, { "epoch": 0.8523294117647059, "grad_norm": 0.499475903896379, "learning_rate": 2.1668598314539936e-06, "loss": 0.015875129401683806, "step": 90560 }, { "epoch": 0.8523764705882353, "grad_norm": 0.608846157889714, "learning_rate": 2.1668000149225866e-06, "loss": 0.01523275077342987, "step": 90565 }, { "epoch": 0.8524235294117647, "grad_norm": 0.6337567888987841, "learning_rate": 2.1667402033446427e-06, "loss": 0.021054489910602568, "step": 90570 }, { "epoch": 0.8524705882352941, "grad_norm": 0.4592792379652936, "learning_rate": 2.1666803967194782e-06, "loss": 0.013543701171875, "step": 90575 }, { "epoch": 0.8525176470588235, "grad_norm": 0.39447593336903575, "learning_rate": 2.166620595046409e-06, "loss": 0.021960878372192384, "step": 90580 }, { "epoch": 0.8525647058823529, "grad_norm": 0.5997868355273666, "learning_rate": 2.166560798324752e-06, "loss": 0.015236112475395202, "step": 90585 }, { "epoch": 0.8526117647058824, "grad_norm": 0.30109586959663004, "learning_rate": 2.166501006553825e-06, "loss": 0.011653207242488861, "step": 90590 }, { "epoch": 0.8526588235294118, "grad_norm": 0.37878013636907093, "learning_rate": 2.1664412197329434e-06, "loss": 0.010559958219528199, "step": 90595 }, { "epoch": 0.8527058823529412, "grad_norm": 0.49829570425656344, "learning_rate": 2.166381437861425e-06, "loss": 0.012279371917247772, "step": 90600 }, { "epoch": 0.8527529411764706, "grad_norm": 0.6048546461748809, "learning_rate": 2.1663216609385868e-06, "loss": 0.012839506566524505, "step": 90605 }, { "epoch": 0.8528, "grad_norm": 0.3864798407814959, "learning_rate": 2.166261888963746e-06, "loss": 0.013979965448379516, "step": 90610 }, { "epoch": 0.8528470588235294, "grad_norm": 0.4634653118169303, "learning_rate": 2.1662021219362197e-06, "loss": 0.011122733354568481, "step": 90615 }, { "epoch": 0.8528941176470588, "grad_norm": 0.6356643827104668, "learning_rate": 2.1661423598553266e-06, "loss": 0.011975903064012527, "step": 90620 }, { "epoch": 0.8529411764705882, "grad_norm": 0.2737752421346331, "learning_rate": 2.1660826027203834e-06, "loss": 0.015000806748867035, "step": 90625 }, { "epoch": 0.8529882352941176, "grad_norm": 0.4488073424841757, "learning_rate": 2.1660228505307083e-06, "loss": 0.012061377614736557, "step": 90630 }, { "epoch": 0.8530352941176471, "grad_norm": 0.5880752613869215, "learning_rate": 2.165963103285619e-06, "loss": 0.013808494806289673, "step": 90635 }, { "epoch": 0.8530823529411765, "grad_norm": 0.5236529063487015, "learning_rate": 2.1659033609844338e-06, "loss": 0.013826164603233337, "step": 90640 }, { "epoch": 0.8531294117647059, "grad_norm": 0.5571470487251141, "learning_rate": 2.165843623626471e-06, "loss": 0.014662685990333556, "step": 90645 }, { "epoch": 0.8531764705882353, "grad_norm": 0.39039272685279985, "learning_rate": 2.1657838912110486e-06, "loss": 0.011379557847976684, "step": 90650 }, { "epoch": 0.8532235294117647, "grad_norm": 0.42516459737554063, "learning_rate": 2.1657241637374853e-06, "loss": 0.02035091519355774, "step": 90655 }, { "epoch": 0.8532705882352941, "grad_norm": 0.242182023200177, "learning_rate": 2.1656644412050997e-06, "loss": 0.0149183988571167, "step": 90660 }, { "epoch": 0.8533176470588235, "grad_norm": 1.0542581407021334, "learning_rate": 2.1656047236132107e-06, "loss": 0.021434029936790465, "step": 90665 }, { "epoch": 0.8533647058823529, "grad_norm": 0.4530089289159187, "learning_rate": 2.1655450109611375e-06, "loss": 0.0171076238155365, "step": 90670 }, { "epoch": 0.8534117647058823, "grad_norm": 0.5567294017117443, "learning_rate": 2.1654853032481976e-06, "loss": 0.014253705739974976, "step": 90675 }, { "epoch": 0.8534588235294117, "grad_norm": 0.4524090530987728, "learning_rate": 2.165425600473712e-06, "loss": 0.015563035011291504, "step": 90680 }, { "epoch": 0.8535058823529412, "grad_norm": 0.31312108675602657, "learning_rate": 2.1653659026369982e-06, "loss": 0.013036191463470459, "step": 90685 }, { "epoch": 0.8535529411764706, "grad_norm": 0.39014511523683687, "learning_rate": 2.165306209737377e-06, "loss": 0.01280215084552765, "step": 90690 }, { "epoch": 0.8536, "grad_norm": 0.5683944096577402, "learning_rate": 2.165246521774168e-06, "loss": 0.014234623312950135, "step": 90695 }, { "epoch": 0.8536470588235294, "grad_norm": 0.2779923188157507, "learning_rate": 2.165186838746689e-06, "loss": 0.013668200373649598, "step": 90700 }, { "epoch": 0.8536941176470588, "grad_norm": 0.6291044405520599, "learning_rate": 2.1651271606542627e-06, "loss": 0.023129843175411224, "step": 90705 }, { "epoch": 0.8537411764705882, "grad_norm": 0.3620258936573557, "learning_rate": 2.165067487496206e-06, "loss": 0.014148706197738647, "step": 90710 }, { "epoch": 0.8537882352941176, "grad_norm": 0.7798330798639662, "learning_rate": 2.165007819271841e-06, "loss": 0.013296067714691162, "step": 90715 }, { "epoch": 0.853835294117647, "grad_norm": 1.1795942799833798, "learning_rate": 2.1649481559804878e-06, "loss": 0.01167563870549202, "step": 90720 }, { "epoch": 0.8538823529411764, "grad_norm": 0.4702142679627864, "learning_rate": 2.164888497621465e-06, "loss": 0.016709743440151213, "step": 90725 }, { "epoch": 0.8539294117647059, "grad_norm": 0.5000574352694698, "learning_rate": 2.164828844194095e-06, "loss": 0.01245495080947876, "step": 90730 }, { "epoch": 0.8539764705882353, "grad_norm": 0.5423923972196312, "learning_rate": 2.164769195697698e-06, "loss": 0.01586439907550812, "step": 90735 }, { "epoch": 0.8540235294117647, "grad_norm": 0.4228075922480774, "learning_rate": 2.1647095521315935e-06, "loss": 0.015426036715507508, "step": 90740 }, { "epoch": 0.8540705882352941, "grad_norm": 0.3979061523030075, "learning_rate": 2.164649913495103e-06, "loss": 0.015016260743141174, "step": 90745 }, { "epoch": 0.8541176470588235, "grad_norm": 0.3721271123428756, "learning_rate": 2.1645902797875488e-06, "loss": 0.014924734830856323, "step": 90750 }, { "epoch": 0.8541647058823529, "grad_norm": 0.6396326225278187, "learning_rate": 2.1645306510082498e-06, "loss": 0.013231049478054046, "step": 90755 }, { "epoch": 0.8542117647058823, "grad_norm": 0.40422176750569067, "learning_rate": 2.1644710271565285e-06, "loss": 0.013214296102523804, "step": 90760 }, { "epoch": 0.8542588235294117, "grad_norm": 0.43938088430868466, "learning_rate": 2.164411408231706e-06, "loss": 0.010353031754493713, "step": 90765 }, { "epoch": 0.8543058823529411, "grad_norm": 0.36851491130602954, "learning_rate": 2.1643517942331045e-06, "loss": 0.00964466631412506, "step": 90770 }, { "epoch": 0.8543529411764705, "grad_norm": 0.4976403435532033, "learning_rate": 2.1642921851600444e-06, "loss": 0.015279726684093475, "step": 90775 }, { "epoch": 0.8544, "grad_norm": 0.3192959643687824, "learning_rate": 2.164232581011848e-06, "loss": 0.01197393387556076, "step": 90780 }, { "epoch": 0.8544470588235294, "grad_norm": 0.4782625656021092, "learning_rate": 2.1641729817878373e-06, "loss": 0.013503152132034301, "step": 90785 }, { "epoch": 0.8544941176470588, "grad_norm": 0.44681540605609893, "learning_rate": 2.164113387487334e-06, "loss": 0.012734714150428771, "step": 90790 }, { "epoch": 0.8545411764705882, "grad_norm": 0.5143997928192273, "learning_rate": 2.164053798109661e-06, "loss": 0.014351180195808411, "step": 90795 }, { "epoch": 0.8545882352941176, "grad_norm": 0.5034744591105677, "learning_rate": 2.16399421365414e-06, "loss": 0.01408310979604721, "step": 90800 }, { "epoch": 0.854635294117647, "grad_norm": 0.4313486936196551, "learning_rate": 2.163934634120093e-06, "loss": 0.013936266303062439, "step": 90805 }, { "epoch": 0.8546823529411764, "grad_norm": 0.4149352105690122, "learning_rate": 2.1638750595068433e-06, "loss": 0.015049965679645538, "step": 90810 }, { "epoch": 0.8547294117647058, "grad_norm": 0.6096054184668874, "learning_rate": 2.1638154898137133e-06, "loss": 0.016104060411453246, "step": 90815 }, { "epoch": 0.8547764705882352, "grad_norm": 0.3583427453447284, "learning_rate": 2.163755925040026e-06, "loss": 0.014707672595977783, "step": 90820 }, { "epoch": 0.8548235294117648, "grad_norm": 0.5049603224319132, "learning_rate": 2.1636963651851038e-06, "loss": 0.013660073280334473, "step": 90825 }, { "epoch": 0.8548705882352942, "grad_norm": 0.5002816598728227, "learning_rate": 2.1636368102482703e-06, "loss": 0.015846240520477294, "step": 90830 }, { "epoch": 0.8549176470588236, "grad_norm": 0.533351921376144, "learning_rate": 2.1635772602288484e-06, "loss": 0.012073000520467758, "step": 90835 }, { "epoch": 0.854964705882353, "grad_norm": 0.5144972586450047, "learning_rate": 2.1635177151261614e-06, "loss": 0.013132350146770477, "step": 90840 }, { "epoch": 0.8550117647058824, "grad_norm": 0.6146246341250524, "learning_rate": 2.163458174939533e-06, "loss": 0.01523129940032959, "step": 90845 }, { "epoch": 0.8550588235294118, "grad_norm": 0.47374255785647335, "learning_rate": 2.1633986396682867e-06, "loss": 0.016136883199214934, "step": 90850 }, { "epoch": 0.8551058823529412, "grad_norm": 0.6759066601792162, "learning_rate": 2.163339109311746e-06, "loss": 0.014241647720336915, "step": 90855 }, { "epoch": 0.8551529411764706, "grad_norm": 0.25358647548479896, "learning_rate": 2.163279583869235e-06, "loss": 0.012092809379100799, "step": 90860 }, { "epoch": 0.8552, "grad_norm": 0.5380942944390202, "learning_rate": 2.1632200633400777e-06, "loss": 0.01486285924911499, "step": 90865 }, { "epoch": 0.8552470588235294, "grad_norm": 0.5023425872806723, "learning_rate": 2.163160547723598e-06, "loss": 0.013055852055549622, "step": 90870 }, { "epoch": 0.8552941176470589, "grad_norm": 0.5614536016927346, "learning_rate": 2.1631010370191204e-06, "loss": 0.013082221150398254, "step": 90875 }, { "epoch": 0.8553411764705883, "grad_norm": 0.40244295725346735, "learning_rate": 2.1630415312259683e-06, "loss": 0.013055256009101868, "step": 90880 }, { "epoch": 0.8553882352941177, "grad_norm": 0.312016499597989, "learning_rate": 2.162982030343468e-06, "loss": 0.011618709564208985, "step": 90885 }, { "epoch": 0.8554352941176471, "grad_norm": 0.42967510441572326, "learning_rate": 2.1629225343709427e-06, "loss": 0.016266652941703798, "step": 90890 }, { "epoch": 0.8554823529411765, "grad_norm": 0.4712146507235625, "learning_rate": 2.162863043307718e-06, "loss": 0.016223445534706116, "step": 90895 }, { "epoch": 0.8555294117647059, "grad_norm": 0.5083775323216192, "learning_rate": 2.1628035571531177e-06, "loss": 0.01542716920375824, "step": 90900 }, { "epoch": 0.8555764705882353, "grad_norm": 0.4973853680752562, "learning_rate": 2.162744075906468e-06, "loss": 0.00928896963596344, "step": 90905 }, { "epoch": 0.8556235294117647, "grad_norm": 0.5638345157770969, "learning_rate": 2.162684599567093e-06, "loss": 0.014705231785774231, "step": 90910 }, { "epoch": 0.8556705882352941, "grad_norm": 0.3760483838811147, "learning_rate": 2.162625128134319e-06, "loss": 0.014782288670539856, "step": 90915 }, { "epoch": 0.8557176470588236, "grad_norm": 0.9223674686905015, "learning_rate": 2.162565661607471e-06, "loss": 0.01588750183582306, "step": 90920 }, { "epoch": 0.855764705882353, "grad_norm": 0.6947604134183154, "learning_rate": 2.162506199985874e-06, "loss": 0.021199145913124086, "step": 90925 }, { "epoch": 0.8558117647058824, "grad_norm": 0.6028494128228397, "learning_rate": 2.1624467432688547e-06, "loss": 0.013321074843406677, "step": 90930 }, { "epoch": 0.8558588235294118, "grad_norm": 0.3426660221430105, "learning_rate": 2.1623872914557376e-06, "loss": 0.014681863784790038, "step": 90935 }, { "epoch": 0.8559058823529412, "grad_norm": 0.6565382669585618, "learning_rate": 2.1623278445458494e-06, "loss": 0.015412548184394836, "step": 90940 }, { "epoch": 0.8559529411764706, "grad_norm": 0.5429478628141174, "learning_rate": 2.1622684025385164e-06, "loss": 0.014595532417297363, "step": 90945 }, { "epoch": 0.856, "grad_norm": 0.6255784059886452, "learning_rate": 2.1622089654330647e-06, "loss": 0.01624877005815506, "step": 90950 }, { "epoch": 0.8560470588235294, "grad_norm": 0.4034360733391251, "learning_rate": 2.16214953322882e-06, "loss": 0.016508796811103822, "step": 90955 }, { "epoch": 0.8560941176470588, "grad_norm": 0.49729535615005516, "learning_rate": 2.1620901059251095e-06, "loss": 0.014324602484703065, "step": 90960 }, { "epoch": 0.8561411764705882, "grad_norm": 0.3895567421481583, "learning_rate": 2.162030683521259e-06, "loss": 0.01510109007358551, "step": 90965 }, { "epoch": 0.8561882352941177, "grad_norm": 0.7379916267489738, "learning_rate": 2.161971266016596e-06, "loss": 0.014389359951019287, "step": 90970 }, { "epoch": 0.8562352941176471, "grad_norm": 0.42596068958433236, "learning_rate": 2.1619118534104465e-06, "loss": 0.01376742422580719, "step": 90975 }, { "epoch": 0.8562823529411765, "grad_norm": 0.45949311752340893, "learning_rate": 2.1618524457021384e-06, "loss": 0.01615784764289856, "step": 90980 }, { "epoch": 0.8563294117647059, "grad_norm": 0.44264655396212277, "learning_rate": 2.161793042890998e-06, "loss": 0.01603754460811615, "step": 90985 }, { "epoch": 0.8563764705882353, "grad_norm": 0.4256289505218539, "learning_rate": 2.161733644976353e-06, "loss": 0.014294803142547607, "step": 90990 }, { "epoch": 0.8564235294117647, "grad_norm": 0.7046332384520145, "learning_rate": 2.1616742519575304e-06, "loss": 0.015758152306079864, "step": 90995 }, { "epoch": 0.8564705882352941, "grad_norm": 0.41680794299423585, "learning_rate": 2.161614863833858e-06, "loss": 0.01423204392194748, "step": 91000 }, { "epoch": 0.8565176470588235, "grad_norm": 0.6382132030770624, "learning_rate": 2.1615554806046634e-06, "loss": 0.012393847107887268, "step": 91005 }, { "epoch": 0.8565647058823529, "grad_norm": 0.6181927931466715, "learning_rate": 2.161496102269274e-06, "loss": 0.015565599501132964, "step": 91010 }, { "epoch": 0.8566117647058824, "grad_norm": 0.5867052218855854, "learning_rate": 2.1614367288270176e-06, "loss": 0.015025532245635987, "step": 91015 }, { "epoch": 0.8566588235294118, "grad_norm": 0.49462919109156667, "learning_rate": 2.1613773602772227e-06, "loss": 0.016740629076957704, "step": 91020 }, { "epoch": 0.8567058823529412, "grad_norm": 0.6013945418407213, "learning_rate": 2.1613179966192175e-06, "loss": 0.014567725360393524, "step": 91025 }, { "epoch": 0.8567529411764706, "grad_norm": 0.4148719928062414, "learning_rate": 2.1612586378523296e-06, "loss": 0.014554734528064727, "step": 91030 }, { "epoch": 0.8568, "grad_norm": 0.27830200398739413, "learning_rate": 2.161199283975888e-06, "loss": 0.012483517825603484, "step": 91035 }, { "epoch": 0.8568470588235294, "grad_norm": 0.5594057267279554, "learning_rate": 2.1611399349892207e-06, "loss": 0.014487339556217194, "step": 91040 }, { "epoch": 0.8568941176470588, "grad_norm": 0.5647466085238115, "learning_rate": 2.161080590891657e-06, "loss": 0.014217643439769745, "step": 91045 }, { "epoch": 0.8569411764705882, "grad_norm": 0.3774715920120086, "learning_rate": 2.1610212516825246e-06, "loss": 0.012435811758041381, "step": 91050 }, { "epoch": 0.8569882352941176, "grad_norm": 0.3332560155547305, "learning_rate": 2.1609619173611536e-06, "loss": 0.01612735390663147, "step": 91055 }, { "epoch": 0.857035294117647, "grad_norm": 0.5745575964981107, "learning_rate": 2.1609025879268727e-06, "loss": 0.015169423818588258, "step": 91060 }, { "epoch": 0.8570823529411765, "grad_norm": 0.2890457449004791, "learning_rate": 2.1608432633790104e-06, "loss": 0.01208670437335968, "step": 91065 }, { "epoch": 0.8571294117647059, "grad_norm": 0.5647708214065993, "learning_rate": 2.1607839437168965e-06, "loss": 0.01225019320845604, "step": 91070 }, { "epoch": 0.8571764705882353, "grad_norm": 0.4715612152193375, "learning_rate": 2.1607246289398604e-06, "loss": 0.014713999629020692, "step": 91075 }, { "epoch": 0.8572235294117647, "grad_norm": 0.522980132287365, "learning_rate": 2.160665319047232e-06, "loss": 0.015827785432338714, "step": 91080 }, { "epoch": 0.8572705882352941, "grad_norm": 0.6539689254743081, "learning_rate": 2.1606060140383402e-06, "loss": 0.016531491279602052, "step": 91085 }, { "epoch": 0.8573176470588235, "grad_norm": 0.5667134730448169, "learning_rate": 2.160546713912515e-06, "loss": 0.014925754070281983, "step": 91090 }, { "epoch": 0.8573647058823529, "grad_norm": 0.3900104553207638, "learning_rate": 2.1604874186690868e-06, "loss": 0.01053546518087387, "step": 91095 }, { "epoch": 0.8574117647058823, "grad_norm": 0.3396421809111092, "learning_rate": 2.1604281283073854e-06, "loss": 0.013556638360023498, "step": 91100 }, { "epoch": 0.8574588235294117, "grad_norm": 0.5062579252687229, "learning_rate": 2.160368842826741e-06, "loss": 0.014396771788597107, "step": 91105 }, { "epoch": 0.8575058823529412, "grad_norm": 0.26124925645586927, "learning_rate": 2.1603095622264835e-06, "loss": 0.011144837737083435, "step": 91110 }, { "epoch": 0.8575529411764706, "grad_norm": 0.500252788240804, "learning_rate": 2.1602502865059444e-06, "loss": 0.023725830018520355, "step": 91115 }, { "epoch": 0.8576, "grad_norm": 0.4453707476995376, "learning_rate": 2.160191015664453e-06, "loss": 0.013995295763015747, "step": 91120 }, { "epoch": 0.8576470588235294, "grad_norm": 0.4649339293237838, "learning_rate": 2.160131749701341e-06, "loss": 0.014533042907714844, "step": 91125 }, { "epoch": 0.8576941176470588, "grad_norm": 0.3691620591050132, "learning_rate": 2.1600724886159387e-06, "loss": 0.0116790771484375, "step": 91130 }, { "epoch": 0.8577411764705882, "grad_norm": 0.4231216098292195, "learning_rate": 2.1600132324075772e-06, "loss": 0.014976876974105834, "step": 91135 }, { "epoch": 0.8577882352941176, "grad_norm": 0.3996287502330411, "learning_rate": 2.1599539810755874e-06, "loss": 0.014458957314491271, "step": 91140 }, { "epoch": 0.857835294117647, "grad_norm": 0.40655963777838167, "learning_rate": 2.159894734619301e-06, "loss": 0.013126295804977418, "step": 91145 }, { "epoch": 0.8578823529411764, "grad_norm": 0.3806334540809496, "learning_rate": 2.159835493038049e-06, "loss": 0.014446905255317688, "step": 91150 }, { "epoch": 0.8579294117647058, "grad_norm": 0.42068580834475544, "learning_rate": 2.159776256331163e-06, "loss": 0.015925322473049165, "step": 91155 }, { "epoch": 0.8579764705882353, "grad_norm": 0.585699838635972, "learning_rate": 2.1597170244979747e-06, "loss": 0.011743452399969101, "step": 91160 }, { "epoch": 0.8580235294117647, "grad_norm": 0.6817716337509521, "learning_rate": 2.159657797537815e-06, "loss": 0.015513202548027039, "step": 91165 }, { "epoch": 0.8580705882352941, "grad_norm": 0.5996222831718517, "learning_rate": 2.159598575450017e-06, "loss": 0.012934510409832001, "step": 91170 }, { "epoch": 0.8581176470588235, "grad_norm": 0.5900310050213919, "learning_rate": 2.159539358233912e-06, "loss": 0.014272813498973847, "step": 91175 }, { "epoch": 0.8581647058823529, "grad_norm": 0.48623096046090786, "learning_rate": 2.159480145888832e-06, "loss": 0.011806590855121613, "step": 91180 }, { "epoch": 0.8582117647058823, "grad_norm": 0.4138151004153002, "learning_rate": 2.1594209384141097e-06, "loss": 0.013223370909690857, "step": 91185 }, { "epoch": 0.8582588235294117, "grad_norm": 0.37543680363125437, "learning_rate": 2.1593617358090772e-06, "loss": 0.023614324629306793, "step": 91190 }, { "epoch": 0.8583058823529411, "grad_norm": 0.40873869841827315, "learning_rate": 2.159302538073067e-06, "loss": 0.012436716258525849, "step": 91195 }, { "epoch": 0.8583529411764705, "grad_norm": 0.501015583548185, "learning_rate": 2.1592433452054123e-06, "loss": 0.01377216875553131, "step": 91200 }, { "epoch": 0.8584, "grad_norm": 0.4188553186257934, "learning_rate": 2.1591841572054445e-06, "loss": 0.014920686185359956, "step": 91205 }, { "epoch": 0.8584470588235295, "grad_norm": 0.5955890695674875, "learning_rate": 2.159124974072498e-06, "loss": 0.014649879932403565, "step": 91210 }, { "epoch": 0.8584941176470589, "grad_norm": 0.5325717104099884, "learning_rate": 2.159065795805905e-06, "loss": 0.019434566795825958, "step": 91215 }, { "epoch": 0.8585411764705883, "grad_norm": 0.4148900950173139, "learning_rate": 2.1590066224049987e-06, "loss": 0.011728543788194656, "step": 91220 }, { "epoch": 0.8585882352941177, "grad_norm": 0.3401318769504235, "learning_rate": 2.158947453869112e-06, "loss": 0.011907093226909637, "step": 91225 }, { "epoch": 0.858635294117647, "grad_norm": 0.7915074673137948, "learning_rate": 2.158888290197579e-06, "loss": 0.023964324593544008, "step": 91230 }, { "epoch": 0.8586823529411765, "grad_norm": 0.4575981123613839, "learning_rate": 2.158829131389733e-06, "loss": 0.015547078847885133, "step": 91235 }, { "epoch": 0.8587294117647059, "grad_norm": 0.5957658424767504, "learning_rate": 2.1587699774449077e-06, "loss": 0.020564720034599304, "step": 91240 }, { "epoch": 0.8587764705882353, "grad_norm": 0.5691608557693554, "learning_rate": 2.1587108283624366e-06, "loss": 0.01468316912651062, "step": 91245 }, { "epoch": 0.8588235294117647, "grad_norm": 0.4144415747953427, "learning_rate": 2.1586516841416542e-06, "loss": 0.011972658336162567, "step": 91250 }, { "epoch": 0.8588705882352942, "grad_norm": 0.2638217995019318, "learning_rate": 2.158592544781894e-06, "loss": 0.017211349308490755, "step": 91255 }, { "epoch": 0.8589176470588236, "grad_norm": 0.43351495359754494, "learning_rate": 2.15853341028249e-06, "loss": 0.01015132963657379, "step": 91260 }, { "epoch": 0.858964705882353, "grad_norm": 0.6229859374542238, "learning_rate": 2.1584742806427773e-06, "loss": 0.012146078050136566, "step": 91265 }, { "epoch": 0.8590117647058824, "grad_norm": 0.4750487412671241, "learning_rate": 2.1584151558620894e-06, "loss": 0.013291391730308532, "step": 91270 }, { "epoch": 0.8590588235294118, "grad_norm": 0.9438828393101516, "learning_rate": 2.158356035939761e-06, "loss": 0.01807980388402939, "step": 91275 }, { "epoch": 0.8591058823529412, "grad_norm": 0.5459768259644023, "learning_rate": 2.1582969208751278e-06, "loss": 0.010574198514223098, "step": 91280 }, { "epoch": 0.8591529411764706, "grad_norm": 0.5381417899241056, "learning_rate": 2.1582378106675235e-06, "loss": 0.012327811866998672, "step": 91285 }, { "epoch": 0.8592, "grad_norm": 0.3846303326633148, "learning_rate": 2.158178705316283e-06, "loss": 0.013357985019683837, "step": 91290 }, { "epoch": 0.8592470588235294, "grad_norm": 0.30302486603866813, "learning_rate": 2.158119604820742e-06, "loss": 0.01291947066783905, "step": 91295 }, { "epoch": 0.8592941176470589, "grad_norm": 0.4706170793691691, "learning_rate": 2.158060509180236e-06, "loss": 0.014180953800678252, "step": 91300 }, { "epoch": 0.8593411764705883, "grad_norm": 0.4223572776775641, "learning_rate": 2.1580014183940984e-06, "loss": 0.011285632848739624, "step": 91305 }, { "epoch": 0.8593882352941177, "grad_norm": 0.5260017962681864, "learning_rate": 2.1579423324616668e-06, "loss": 0.015141691267490386, "step": 91310 }, { "epoch": 0.8594352941176471, "grad_norm": 0.34405282144228744, "learning_rate": 2.1578832513822757e-06, "loss": 0.011288423836231232, "step": 91315 }, { "epoch": 0.8594823529411765, "grad_norm": 0.5944427345660983, "learning_rate": 2.157824175155261e-06, "loss": 0.019639036059379576, "step": 91320 }, { "epoch": 0.8595294117647059, "grad_norm": 0.4063411990821821, "learning_rate": 2.1577651037799586e-06, "loss": 0.015523436665534972, "step": 91325 }, { "epoch": 0.8595764705882353, "grad_norm": 0.4399698516936817, "learning_rate": 2.1577060372557043e-06, "loss": 0.014593519270420074, "step": 91330 }, { "epoch": 0.8596235294117647, "grad_norm": 0.46214850198990454, "learning_rate": 2.1576469755818343e-06, "loss": 0.016173262894153596, "step": 91335 }, { "epoch": 0.8596705882352941, "grad_norm": 0.8421722736785707, "learning_rate": 2.157587918757685e-06, "loss": 0.021510088443756105, "step": 91340 }, { "epoch": 0.8597176470588235, "grad_norm": 0.34156355843486735, "learning_rate": 2.1575288667825924e-06, "loss": 0.015165409445762635, "step": 91345 }, { "epoch": 0.859764705882353, "grad_norm": 0.4818229077096342, "learning_rate": 2.1574698196558928e-06, "loss": 0.012718243896961213, "step": 91350 }, { "epoch": 0.8598117647058824, "grad_norm": 0.6033675352852171, "learning_rate": 2.1574107773769228e-06, "loss": 0.0125446617603302, "step": 91355 }, { "epoch": 0.8598588235294118, "grad_norm": 0.4344631056328505, "learning_rate": 2.1573517399450197e-06, "loss": 0.013900808990001678, "step": 91360 }, { "epoch": 0.8599058823529412, "grad_norm": 0.602337758672613, "learning_rate": 2.15729270735952e-06, "loss": 0.018566128611564637, "step": 91365 }, { "epoch": 0.8599529411764706, "grad_norm": 0.4717416577711937, "learning_rate": 2.15723367961976e-06, "loss": 0.013615033030509949, "step": 91370 }, { "epoch": 0.86, "grad_norm": 0.7834321758530951, "learning_rate": 2.1571746567250782e-06, "loss": 0.014556685090065002, "step": 91375 }, { "epoch": 0.8600470588235294, "grad_norm": 0.2606840847459397, "learning_rate": 2.157115638674811e-06, "loss": 0.012731364369392395, "step": 91380 }, { "epoch": 0.8600941176470588, "grad_norm": 0.5118087247566241, "learning_rate": 2.1570566254682955e-06, "loss": 0.012190202623605728, "step": 91385 }, { "epoch": 0.8601411764705882, "grad_norm": 0.6983984917437266, "learning_rate": 2.1569976171048694e-06, "loss": 0.01722683310508728, "step": 91390 }, { "epoch": 0.8601882352941177, "grad_norm": 0.5560385784842939, "learning_rate": 2.1569386135838707e-06, "loss": 0.01635095477104187, "step": 91395 }, { "epoch": 0.8602352941176471, "grad_norm": 0.5886652579678602, "learning_rate": 2.1568796149046367e-06, "loss": 0.015495139360427856, "step": 91400 }, { "epoch": 0.8602823529411765, "grad_norm": 0.5529273753043066, "learning_rate": 2.1568206210665056e-06, "loss": 0.012354426085948944, "step": 91405 }, { "epoch": 0.8603294117647059, "grad_norm": 0.37128090521573254, "learning_rate": 2.1567616320688142e-06, "loss": 0.01196001172065735, "step": 91410 }, { "epoch": 0.8603764705882353, "grad_norm": 0.42439429929840244, "learning_rate": 2.156702647910902e-06, "loss": 0.012864288687705994, "step": 91415 }, { "epoch": 0.8604235294117647, "grad_norm": 0.47825150850037285, "learning_rate": 2.156643668592107e-06, "loss": 0.010951948165893555, "step": 91420 }, { "epoch": 0.8604705882352941, "grad_norm": 0.8026479553464192, "learning_rate": 2.1565846941117673e-06, "loss": 0.01369694322347641, "step": 91425 }, { "epoch": 0.8605176470588235, "grad_norm": 0.5033877935205882, "learning_rate": 2.1565257244692213e-06, "loss": 0.012742310762405396, "step": 91430 }, { "epoch": 0.8605647058823529, "grad_norm": 0.38492468702673244, "learning_rate": 2.1564667596638077e-06, "loss": 0.01355268806219101, "step": 91435 }, { "epoch": 0.8606117647058823, "grad_norm": 0.4357373301041988, "learning_rate": 2.1564077996948654e-06, "loss": 0.011618417501449586, "step": 91440 }, { "epoch": 0.8606588235294118, "grad_norm": 0.4750896703952249, "learning_rate": 2.1563488445617325e-06, "loss": 0.015967229008674623, "step": 91445 }, { "epoch": 0.8607058823529412, "grad_norm": 0.3250249338195156, "learning_rate": 2.156289894263749e-06, "loss": 0.014539067447185517, "step": 91450 }, { "epoch": 0.8607529411764706, "grad_norm": 0.609363505967349, "learning_rate": 2.156230948800254e-06, "loss": 0.016054460406303407, "step": 91455 }, { "epoch": 0.8608, "grad_norm": 0.5085373628095924, "learning_rate": 2.1561720081705864e-06, "loss": 0.018295130133628844, "step": 91460 }, { "epoch": 0.8608470588235294, "grad_norm": 0.5382706122694382, "learning_rate": 2.1561130723740853e-06, "loss": 0.017115025222301482, "step": 91465 }, { "epoch": 0.8608941176470588, "grad_norm": 0.4428925749803764, "learning_rate": 2.15605414141009e-06, "loss": 0.01254793256521225, "step": 91470 }, { "epoch": 0.8609411764705882, "grad_norm": 0.47161400459096486, "learning_rate": 2.155995215277941e-06, "loss": 0.01574590802192688, "step": 91475 }, { "epoch": 0.8609882352941176, "grad_norm": 0.5300609883266676, "learning_rate": 2.1559362939769778e-06, "loss": 0.018234658241271972, "step": 91480 }, { "epoch": 0.861035294117647, "grad_norm": 0.5269078517157006, "learning_rate": 2.15587737750654e-06, "loss": 0.020763953030109406, "step": 91485 }, { "epoch": 0.8610823529411765, "grad_norm": 0.22247617157683217, "learning_rate": 2.1558184658659677e-06, "loss": 0.013574942946434021, "step": 91490 }, { "epoch": 0.8611294117647059, "grad_norm": 0.5689999441621179, "learning_rate": 2.155759559054601e-06, "loss": 0.017083825170993806, "step": 91495 }, { "epoch": 0.8611764705882353, "grad_norm": 0.561421024608172, "learning_rate": 2.1557006570717802e-06, "loss": 0.016432419419288635, "step": 91500 }, { "epoch": 0.8612235294117647, "grad_norm": 0.4507035951905139, "learning_rate": 2.1556417599168455e-06, "loss": 0.014059221744537354, "step": 91505 }, { "epoch": 0.8612705882352941, "grad_norm": 0.46952503250322697, "learning_rate": 2.155582867589138e-06, "loss": 0.01300247311592102, "step": 91510 }, { "epoch": 0.8613176470588235, "grad_norm": 0.44269294628191586, "learning_rate": 2.1555239800879975e-06, "loss": 0.015097671747207641, "step": 91515 }, { "epoch": 0.8613647058823529, "grad_norm": 0.566406618654887, "learning_rate": 2.1554650974127654e-06, "loss": 0.012690776586532592, "step": 91520 }, { "epoch": 0.8614117647058823, "grad_norm": 0.40732671782867746, "learning_rate": 2.155406219562782e-06, "loss": 0.012138204276561737, "step": 91525 }, { "epoch": 0.8614588235294117, "grad_norm": 0.6420943058535065, "learning_rate": 2.1553473465373894e-06, "loss": 0.011403899639844894, "step": 91530 }, { "epoch": 0.8615058823529411, "grad_norm": 0.39398146180799626, "learning_rate": 2.155288478335928e-06, "loss": 0.01153499335050583, "step": 91535 }, { "epoch": 0.8615529411764706, "grad_norm": 0.3510792253864621, "learning_rate": 2.1552296149577382e-06, "loss": 0.015543819963932037, "step": 91540 }, { "epoch": 0.8616, "grad_norm": 0.43933821642815396, "learning_rate": 2.1551707564021627e-06, "loss": 0.014129000902175903, "step": 91545 }, { "epoch": 0.8616470588235294, "grad_norm": 0.48340021669213457, "learning_rate": 2.155111902668543e-06, "loss": 0.01654156744480133, "step": 91550 }, { "epoch": 0.8616941176470588, "grad_norm": 0.4350767984907775, "learning_rate": 2.15505305375622e-06, "loss": 0.015988752245903015, "step": 91555 }, { "epoch": 0.8617411764705882, "grad_norm": 0.5570722402974301, "learning_rate": 2.1549942096645356e-06, "loss": 0.013884535431861878, "step": 91560 }, { "epoch": 0.8617882352941176, "grad_norm": 0.4426220303342944, "learning_rate": 2.154935370392833e-06, "loss": 0.015455180406570434, "step": 91565 }, { "epoch": 0.861835294117647, "grad_norm": 0.440373944045526, "learning_rate": 2.154876535940452e-06, "loss": 0.01565970480442047, "step": 91570 }, { "epoch": 0.8618823529411764, "grad_norm": 0.575535442490844, "learning_rate": 2.154817706306736e-06, "loss": 0.010536736994981765, "step": 91575 }, { "epoch": 0.8619294117647058, "grad_norm": 0.34890007583609134, "learning_rate": 2.154758881491027e-06, "loss": 0.014536827802658081, "step": 91580 }, { "epoch": 0.8619764705882353, "grad_norm": 0.1716226298643154, "learning_rate": 2.154700061492668e-06, "loss": 0.012513592839241028, "step": 91585 }, { "epoch": 0.8620235294117647, "grad_norm": 0.4757852626813683, "learning_rate": 2.154641246311001e-06, "loss": 0.01565796136856079, "step": 91590 }, { "epoch": 0.8620705882352941, "grad_norm": 0.4361531997758162, "learning_rate": 2.1545824359453684e-06, "loss": 0.014414194226264953, "step": 91595 }, { "epoch": 0.8621176470588235, "grad_norm": 0.8003453720451676, "learning_rate": 2.154523630395113e-06, "loss": 0.0157419815659523, "step": 91600 }, { "epoch": 0.8621647058823529, "grad_norm": 0.4787464780744199, "learning_rate": 2.1544648296595787e-06, "loss": 0.015511995553970337, "step": 91605 }, { "epoch": 0.8622117647058823, "grad_norm": 0.5321627476306017, "learning_rate": 2.1544060337381077e-06, "loss": 0.016635525226593017, "step": 91610 }, { "epoch": 0.8622588235294117, "grad_norm": 0.47152090872291236, "learning_rate": 2.1543472426300425e-06, "loss": 0.01297255903482437, "step": 91615 }, { "epoch": 0.8623058823529411, "grad_norm": 0.5448320786749178, "learning_rate": 2.154288456334728e-06, "loss": 0.015565115213394164, "step": 91620 }, { "epoch": 0.8623529411764705, "grad_norm": 0.551793562294535, "learning_rate": 2.154229674851507e-06, "loss": 0.013931405544281007, "step": 91625 }, { "epoch": 0.8624, "grad_norm": 0.3969353343882903, "learning_rate": 2.154170898179722e-06, "loss": 0.014841440320014953, "step": 91630 }, { "epoch": 0.8624470588235295, "grad_norm": 0.5945807108917996, "learning_rate": 2.1541121263187173e-06, "loss": 0.01620238721370697, "step": 91635 }, { "epoch": 0.8624941176470589, "grad_norm": 0.772491778055499, "learning_rate": 2.154053359267837e-06, "loss": 0.014161862432956696, "step": 91640 }, { "epoch": 0.8625411764705883, "grad_norm": 0.4580600096075135, "learning_rate": 2.153994597026425e-06, "loss": 0.012114354223012925, "step": 91645 }, { "epoch": 0.8625882352941177, "grad_norm": 0.25406095952631, "learning_rate": 2.1539358395938246e-06, "loss": 0.013996908068656921, "step": 91650 }, { "epoch": 0.8626352941176471, "grad_norm": 0.40486883995576844, "learning_rate": 2.1538770869693813e-06, "loss": 0.013128627836704255, "step": 91655 }, { "epoch": 0.8626823529411765, "grad_norm": 0.3917148508604859, "learning_rate": 2.1538183391524377e-06, "loss": 0.01651044487953186, "step": 91660 }, { "epoch": 0.8627294117647059, "grad_norm": 0.43083533592373163, "learning_rate": 2.1537595961423397e-06, "loss": 0.0170443594455719, "step": 91665 }, { "epoch": 0.8627764705882353, "grad_norm": 0.540490994181027, "learning_rate": 2.153700857938431e-06, "loss": 0.01455557942390442, "step": 91670 }, { "epoch": 0.8628235294117647, "grad_norm": 0.4043598084820605, "learning_rate": 2.1536421245400567e-06, "loss": 0.013378767669200898, "step": 91675 }, { "epoch": 0.8628705882352942, "grad_norm": 0.5774487521429169, "learning_rate": 2.1535833959465606e-06, "loss": 0.012692736089229583, "step": 91680 }, { "epoch": 0.8629176470588236, "grad_norm": 1.1043126099124987, "learning_rate": 2.153524672157289e-06, "loss": 0.014558050036430358, "step": 91685 }, { "epoch": 0.862964705882353, "grad_norm": 0.3922055040825217, "learning_rate": 2.153465953171586e-06, "loss": 0.017519618570804595, "step": 91690 }, { "epoch": 0.8630117647058824, "grad_norm": 0.32695737699049376, "learning_rate": 2.153407238988797e-06, "loss": 0.01277434229850769, "step": 91695 }, { "epoch": 0.8630588235294118, "grad_norm": 0.5937617965186218, "learning_rate": 2.1533485296082666e-06, "loss": 0.013775229454040527, "step": 91700 }, { "epoch": 0.8631058823529412, "grad_norm": 0.5379498537615497, "learning_rate": 2.153289825029342e-06, "loss": 0.017196674644947053, "step": 91705 }, { "epoch": 0.8631529411764706, "grad_norm": 0.3594177425365629, "learning_rate": 2.1532311252513673e-06, "loss": 0.009852826595306396, "step": 91710 }, { "epoch": 0.8632, "grad_norm": 0.5031928166611297, "learning_rate": 2.153172430273688e-06, "loss": 0.013932988047599792, "step": 91715 }, { "epoch": 0.8632470588235294, "grad_norm": 0.5191674675821812, "learning_rate": 2.1531137400956504e-06, "loss": 0.01547825038433075, "step": 91720 }, { "epoch": 0.8632941176470589, "grad_norm": 0.4449636282605318, "learning_rate": 2.153055054716601e-06, "loss": 0.015656621754169465, "step": 91725 }, { "epoch": 0.8633411764705883, "grad_norm": 0.4537591678088135, "learning_rate": 2.1529963741358846e-06, "loss": 0.016306087374687195, "step": 91730 }, { "epoch": 0.8633882352941177, "grad_norm": 0.48896704650508466, "learning_rate": 2.1529376983528484e-06, "loss": 0.014735417068004608, "step": 91735 }, { "epoch": 0.8634352941176471, "grad_norm": 0.3712197179379004, "learning_rate": 2.1528790273668377e-06, "loss": 0.014290055632591248, "step": 91740 }, { "epoch": 0.8634823529411765, "grad_norm": 0.44013333692038553, "learning_rate": 2.1528203611771997e-06, "loss": 0.015440639853477479, "step": 91745 }, { "epoch": 0.8635294117647059, "grad_norm": 0.3225955765632187, "learning_rate": 2.1527616997832806e-06, "loss": 0.011064872890710831, "step": 91750 }, { "epoch": 0.8635764705882353, "grad_norm": 0.3181679739784273, "learning_rate": 2.152703043184427e-06, "loss": 0.012484049797058106, "step": 91755 }, { "epoch": 0.8636235294117647, "grad_norm": 0.4758933313098985, "learning_rate": 2.152644391379986e-06, "loss": 0.013787259161472321, "step": 91760 }, { "epoch": 0.8636705882352941, "grad_norm": 0.4805226650005704, "learning_rate": 2.1525857443693037e-06, "loss": 0.017187124490737914, "step": 91765 }, { "epoch": 0.8637176470588235, "grad_norm": 0.3593145359354769, "learning_rate": 2.1525271021517286e-06, "loss": 0.014563006162643433, "step": 91770 }, { "epoch": 0.863764705882353, "grad_norm": 0.3541236728338028, "learning_rate": 2.1524684647266066e-06, "loss": 0.016811515390872955, "step": 91775 }, { "epoch": 0.8638117647058824, "grad_norm": 0.5081716495343571, "learning_rate": 2.152409832093285e-06, "loss": 0.012424628436565398, "step": 91780 }, { "epoch": 0.8638588235294118, "grad_norm": 0.3012460304249478, "learning_rate": 2.1523512042511118e-06, "loss": 0.01177125722169876, "step": 91785 }, { "epoch": 0.8639058823529412, "grad_norm": 1.0939214876912284, "learning_rate": 2.1522925811994337e-06, "loss": 0.010524944216012955, "step": 91790 }, { "epoch": 0.8639529411764706, "grad_norm": 0.3710646091251899, "learning_rate": 2.1522339629375995e-06, "loss": 0.015916059911251067, "step": 91795 }, { "epoch": 0.864, "grad_norm": 0.5043288401329427, "learning_rate": 2.1521753494649565e-06, "loss": 0.013304798305034638, "step": 91800 }, { "epoch": 0.8640470588235294, "grad_norm": 0.41848105172649314, "learning_rate": 2.152116740780852e-06, "loss": 0.015053886175155639, "step": 91805 }, { "epoch": 0.8640941176470588, "grad_norm": 0.3802483156792387, "learning_rate": 2.1520581368846347e-06, "loss": 0.01963978260755539, "step": 91810 }, { "epoch": 0.8641411764705882, "grad_norm": 0.6070208019562152, "learning_rate": 2.1519995377756524e-06, "loss": 0.015357011556625366, "step": 91815 }, { "epoch": 0.8641882352941177, "grad_norm": 0.7697504209195302, "learning_rate": 2.1519409434532538e-06, "loss": 0.014923399686813355, "step": 91820 }, { "epoch": 0.8642352941176471, "grad_norm": 0.5821590431996876, "learning_rate": 2.1518823539167865e-06, "loss": 0.013138912618160248, "step": 91825 }, { "epoch": 0.8642823529411765, "grad_norm": 0.3612121348447882, "learning_rate": 2.1518237691655996e-06, "loss": 0.010030806064605713, "step": 91830 }, { "epoch": 0.8643294117647059, "grad_norm": 0.44495628756261746, "learning_rate": 2.1517651891990423e-06, "loss": 0.014343015849590302, "step": 91835 }, { "epoch": 0.8643764705882353, "grad_norm": 0.585139885298464, "learning_rate": 2.151706614016462e-06, "loss": 0.013032262027263642, "step": 91840 }, { "epoch": 0.8644235294117647, "grad_norm": 0.3614871350335091, "learning_rate": 2.1516480436172085e-06, "loss": 0.014041584730148316, "step": 91845 }, { "epoch": 0.8644705882352941, "grad_norm": 0.26700603884057983, "learning_rate": 2.1515894780006306e-06, "loss": 0.015623009204864502, "step": 91850 }, { "epoch": 0.8645176470588235, "grad_norm": 0.5906844449170188, "learning_rate": 2.151530917166077e-06, "loss": 0.01664411574602127, "step": 91855 }, { "epoch": 0.8645647058823529, "grad_norm": 0.5355073076221268, "learning_rate": 2.1514723611128986e-06, "loss": 0.010628529638051987, "step": 91860 }, { "epoch": 0.8646117647058823, "grad_norm": 0.5940562264572189, "learning_rate": 2.151413809840443e-06, "loss": 0.015707388520240784, "step": 91865 }, { "epoch": 0.8646588235294118, "grad_norm": 0.5637939179669599, "learning_rate": 2.1513552633480597e-06, "loss": 0.012513989210128784, "step": 91870 }, { "epoch": 0.8647058823529412, "grad_norm": 0.4566108780766135, "learning_rate": 2.1512967216351e-06, "loss": 0.013182322680950164, "step": 91875 }, { "epoch": 0.8647529411764706, "grad_norm": 0.48828177837015735, "learning_rate": 2.1512381847009116e-06, "loss": 0.014297819137573243, "step": 91880 }, { "epoch": 0.8648, "grad_norm": 0.586922314391407, "learning_rate": 2.151179652544846e-06, "loss": 0.017774969339370728, "step": 91885 }, { "epoch": 0.8648470588235294, "grad_norm": 0.32739801653820905, "learning_rate": 2.1511211251662526e-06, "loss": 0.010618314892053605, "step": 91890 }, { "epoch": 0.8648941176470588, "grad_norm": 0.7923282119156267, "learning_rate": 2.151062602564481e-06, "loss": 0.01715931445360184, "step": 91895 }, { "epoch": 0.8649411764705882, "grad_norm": 0.3607827579646796, "learning_rate": 2.1510040847388823e-06, "loss": 0.013864156603813172, "step": 91900 }, { "epoch": 0.8649882352941176, "grad_norm": 0.5353857276594078, "learning_rate": 2.150945571688807e-06, "loss": 0.016524964570999147, "step": 91905 }, { "epoch": 0.865035294117647, "grad_norm": 0.6554230850513688, "learning_rate": 2.150887063413605e-06, "loss": 0.014463840425014496, "step": 91910 }, { "epoch": 0.8650823529411765, "grad_norm": 0.39831339420218614, "learning_rate": 2.1508285599126265e-06, "loss": 0.01370839774608612, "step": 91915 }, { "epoch": 0.8651294117647059, "grad_norm": 0.6487553107125215, "learning_rate": 2.150770061185223e-06, "loss": 0.01505228728055954, "step": 91920 }, { "epoch": 0.8651764705882353, "grad_norm": 0.5115269891195432, "learning_rate": 2.150711567230746e-06, "loss": 0.01466507911682129, "step": 91925 }, { "epoch": 0.8652235294117647, "grad_norm": 0.6534486072918434, "learning_rate": 2.150653078048545e-06, "loss": 0.012691915035247803, "step": 91930 }, { "epoch": 0.8652705882352941, "grad_norm": 0.42660698126214136, "learning_rate": 2.150594593637972e-06, "loss": 0.018210354447364806, "step": 91935 }, { "epoch": 0.8653176470588235, "grad_norm": 0.6008718036418482, "learning_rate": 2.150536113998378e-06, "loss": 0.014221936464309692, "step": 91940 }, { "epoch": 0.8653647058823529, "grad_norm": 0.42886395855596443, "learning_rate": 2.150477639129114e-06, "loss": 0.013865995407104491, "step": 91945 }, { "epoch": 0.8654117647058823, "grad_norm": 0.4604305046400884, "learning_rate": 2.150419169029532e-06, "loss": 0.015033990144729614, "step": 91950 }, { "epoch": 0.8654588235294117, "grad_norm": 0.42697770321808637, "learning_rate": 2.150360703698984e-06, "loss": 0.012389138340950012, "step": 91955 }, { "epoch": 0.8655058823529411, "grad_norm": 0.3535245147954076, "learning_rate": 2.150302243136821e-06, "loss": 0.011623837053775787, "step": 91960 }, { "epoch": 0.8655529411764706, "grad_norm": 0.5131559879375638, "learning_rate": 2.150243787342395e-06, "loss": 0.02333523631095886, "step": 91965 }, { "epoch": 0.8656, "grad_norm": 0.6952155587718987, "learning_rate": 2.150185336315059e-06, "loss": 0.014930763840675354, "step": 91970 }, { "epoch": 0.8656470588235294, "grad_norm": 0.5779206277431153, "learning_rate": 2.1501268900541626e-06, "loss": 0.015317267179489136, "step": 91975 }, { "epoch": 0.8656941176470588, "grad_norm": 0.4617622733547401, "learning_rate": 2.150068448559061e-06, "loss": 0.012550321221351624, "step": 91980 }, { "epoch": 0.8657411764705882, "grad_norm": 0.5384741659713854, "learning_rate": 2.1500100118291044e-06, "loss": 0.0150761678814888, "step": 91985 }, { "epoch": 0.8657882352941176, "grad_norm": 0.7596948317139878, "learning_rate": 2.1499515798636466e-06, "loss": 0.015932586789131165, "step": 91990 }, { "epoch": 0.865835294117647, "grad_norm": 0.39634083498722894, "learning_rate": 2.149893152662039e-06, "loss": 0.01104779839515686, "step": 91995 }, { "epoch": 0.8658823529411764, "grad_norm": 0.5316685231761524, "learning_rate": 2.149834730223635e-06, "loss": 0.01464027464389801, "step": 92000 }, { "epoch": 0.8659294117647058, "grad_norm": 0.4168953664665593, "learning_rate": 2.1497763125477883e-06, "loss": 0.013134428858757019, "step": 92005 }, { "epoch": 0.8659764705882353, "grad_norm": 0.41522077612999175, "learning_rate": 2.1497178996338506e-06, "loss": 0.015154126286506652, "step": 92010 }, { "epoch": 0.8660235294117647, "grad_norm": 0.32033241965608816, "learning_rate": 2.1496594914811753e-06, "loss": 0.016908687353134156, "step": 92015 }, { "epoch": 0.8660705882352941, "grad_norm": 0.39431067578802265, "learning_rate": 2.1496010880891158e-06, "loss": 0.016483475267887116, "step": 92020 }, { "epoch": 0.8661176470588235, "grad_norm": 0.4702165301041982, "learning_rate": 2.149542689457025e-06, "loss": 0.016688989102840425, "step": 92025 }, { "epoch": 0.866164705882353, "grad_norm": 0.45238368635371756, "learning_rate": 2.149484295584257e-06, "loss": 0.014145031571388245, "step": 92030 }, { "epoch": 0.8662117647058823, "grad_norm": 0.5370100649302207, "learning_rate": 2.149425906470165e-06, "loss": 0.015349888801574707, "step": 92035 }, { "epoch": 0.8662588235294117, "grad_norm": 0.4464152295281677, "learning_rate": 2.1493675221141032e-06, "loss": 0.011077947914600372, "step": 92040 }, { "epoch": 0.8663058823529411, "grad_norm": 0.4578462231838635, "learning_rate": 2.149309142515424e-06, "loss": 0.014635172486305238, "step": 92045 }, { "epoch": 0.8663529411764705, "grad_norm": 0.5234456871302792, "learning_rate": 2.1492507676734832e-06, "loss": 0.016378578543663026, "step": 92050 }, { "epoch": 0.8664, "grad_norm": 0.33375243155157336, "learning_rate": 2.1491923975876333e-06, "loss": 0.012180561572313309, "step": 92055 }, { "epoch": 0.8664470588235295, "grad_norm": 0.6426094684986783, "learning_rate": 2.14913403225723e-06, "loss": 0.01369394063949585, "step": 92060 }, { "epoch": 0.8664941176470589, "grad_norm": 0.5471198489263956, "learning_rate": 2.1490756716816267e-06, "loss": 0.01251676380634308, "step": 92065 }, { "epoch": 0.8665411764705883, "grad_norm": 0.6257120034100472, "learning_rate": 2.1490173158601775e-06, "loss": 0.015350954234600067, "step": 92070 }, { "epoch": 0.8665882352941177, "grad_norm": 0.5044349948402034, "learning_rate": 2.1489589647922383e-06, "loss": 0.022539907693862916, "step": 92075 }, { "epoch": 0.8666352941176471, "grad_norm": 0.4413270089535246, "learning_rate": 2.1489006184771622e-06, "loss": 0.015340153872966767, "step": 92080 }, { "epoch": 0.8666823529411765, "grad_norm": 0.5540799290189178, "learning_rate": 2.148842276914305e-06, "loss": 0.012532518804073333, "step": 92085 }, { "epoch": 0.8667294117647059, "grad_norm": 0.6996265689767575, "learning_rate": 2.1487839401030215e-06, "loss": 0.017367592453956603, "step": 92090 }, { "epoch": 0.8667764705882353, "grad_norm": 0.5859215873886563, "learning_rate": 2.1487256080426667e-06, "loss": 0.01165924221277237, "step": 92095 }, { "epoch": 0.8668235294117647, "grad_norm": 0.5214201017944052, "learning_rate": 2.148667280732596e-06, "loss": 0.01252278983592987, "step": 92100 }, { "epoch": 0.8668705882352942, "grad_norm": 0.3689478059214512, "learning_rate": 2.148608958172164e-06, "loss": 0.015737974643707277, "step": 92105 }, { "epoch": 0.8669176470588236, "grad_norm": 0.31318572990246246, "learning_rate": 2.148550640360726e-06, "loss": 0.011975517123937606, "step": 92110 }, { "epoch": 0.866964705882353, "grad_norm": 0.49171134276352957, "learning_rate": 2.1484923272976394e-06, "loss": 0.016274596750736236, "step": 92115 }, { "epoch": 0.8670117647058824, "grad_norm": 0.5183474356066197, "learning_rate": 2.148434018982258e-06, "loss": 0.015151330828666687, "step": 92120 }, { "epoch": 0.8670588235294118, "grad_norm": 0.36672049008634844, "learning_rate": 2.148375715413938e-06, "loss": 0.020623651146888734, "step": 92125 }, { "epoch": 0.8671058823529412, "grad_norm": 0.4398427536813696, "learning_rate": 2.1483174165920353e-06, "loss": 0.013667196035385132, "step": 92130 }, { "epoch": 0.8671529411764706, "grad_norm": 0.4891663749551243, "learning_rate": 2.148259122515906e-06, "loss": 0.015998455882072448, "step": 92135 }, { "epoch": 0.8672, "grad_norm": 0.77739816445937, "learning_rate": 2.1482008331849073e-06, "loss": 0.016815175116062165, "step": 92140 }, { "epoch": 0.8672470588235294, "grad_norm": 0.4564421877072834, "learning_rate": 2.148142548598394e-06, "loss": 0.012358614802360534, "step": 92145 }, { "epoch": 0.8672941176470588, "grad_norm": 0.43402315787956697, "learning_rate": 2.148084268755723e-06, "loss": 0.016287970542907714, "step": 92150 }, { "epoch": 0.8673411764705883, "grad_norm": 0.5992978278967477, "learning_rate": 2.1480259936562504e-06, "loss": 0.01572204828262329, "step": 92155 }, { "epoch": 0.8673882352941177, "grad_norm": 0.6795850995015636, "learning_rate": 2.147967723299334e-06, "loss": 0.021625694632530213, "step": 92160 }, { "epoch": 0.8674352941176471, "grad_norm": 0.4091423172444363, "learning_rate": 2.14790945768433e-06, "loss": 0.013785970211029053, "step": 92165 }, { "epoch": 0.8674823529411765, "grad_norm": 0.5312916767138876, "learning_rate": 2.147851196810595e-06, "loss": 0.019104798138141633, "step": 92170 }, { "epoch": 0.8675294117647059, "grad_norm": 0.5364493614115661, "learning_rate": 2.147792940677486e-06, "loss": 0.013086366653442382, "step": 92175 }, { "epoch": 0.8675764705882353, "grad_norm": 0.6541763633345758, "learning_rate": 2.14773468928436e-06, "loss": 0.013709652423858642, "step": 92180 }, { "epoch": 0.8676235294117647, "grad_norm": 0.41635716613184026, "learning_rate": 2.147676442630575e-06, "loss": 0.011487314850091935, "step": 92185 }, { "epoch": 0.8676705882352941, "grad_norm": 0.8017486564540807, "learning_rate": 2.1476182007154882e-06, "loss": 0.017756035923957823, "step": 92190 }, { "epoch": 0.8677176470588235, "grad_norm": 0.4549702088847261, "learning_rate": 2.147559963538457e-06, "loss": 0.014837989211082458, "step": 92195 }, { "epoch": 0.867764705882353, "grad_norm": 0.40930830953983555, "learning_rate": 2.147501731098838e-06, "loss": 0.01404690146446228, "step": 92200 }, { "epoch": 0.8678117647058824, "grad_norm": 0.3778946131813129, "learning_rate": 2.1474435033959908e-06, "loss": 0.01260892003774643, "step": 92205 }, { "epoch": 0.8678588235294118, "grad_norm": 0.41143483788878127, "learning_rate": 2.1473852804292716e-06, "loss": 0.014413587749004364, "step": 92210 }, { "epoch": 0.8679058823529412, "grad_norm": 0.3433728566807659, "learning_rate": 2.1473270621980396e-06, "loss": 0.012280742824077606, "step": 92215 }, { "epoch": 0.8679529411764706, "grad_norm": 0.2541856038126231, "learning_rate": 2.147268848701652e-06, "loss": 0.011806125938892364, "step": 92220 }, { "epoch": 0.868, "grad_norm": 0.5388646304123019, "learning_rate": 2.1472106399394677e-06, "loss": 0.012612879276275635, "step": 92225 }, { "epoch": 0.8680470588235294, "grad_norm": 0.7342461153551256, "learning_rate": 2.147152435910845e-06, "loss": 0.01579917371273041, "step": 92230 }, { "epoch": 0.8680941176470588, "grad_norm": 0.471945351774365, "learning_rate": 2.1470942366151416e-06, "loss": 0.014110162854194641, "step": 92235 }, { "epoch": 0.8681411764705882, "grad_norm": 0.8466612929160706, "learning_rate": 2.147036042051717e-06, "loss": 0.018958285450935364, "step": 92240 }, { "epoch": 0.8681882352941176, "grad_norm": 0.5414642363482622, "learning_rate": 2.1469778522199296e-06, "loss": 0.015291571617126465, "step": 92245 }, { "epoch": 0.8682352941176471, "grad_norm": 0.4579411899507494, "learning_rate": 2.146919667119138e-06, "loss": 0.016150423884391786, "step": 92250 }, { "epoch": 0.8682823529411765, "grad_norm": 0.5900770959764735, "learning_rate": 2.1468614867487013e-06, "loss": 0.013874471187591553, "step": 92255 }, { "epoch": 0.8683294117647059, "grad_norm": 0.3559165542230584, "learning_rate": 2.146803311107979e-06, "loss": 0.011204567551612855, "step": 92260 }, { "epoch": 0.8683764705882353, "grad_norm": 0.44581745758061564, "learning_rate": 2.1467451401963304e-06, "loss": 0.014889249205589294, "step": 92265 }, { "epoch": 0.8684235294117647, "grad_norm": 0.5419050585415139, "learning_rate": 2.1466869740131134e-06, "loss": 0.014556437730789185, "step": 92270 }, { "epoch": 0.8684705882352941, "grad_norm": 0.47065387057805025, "learning_rate": 2.146628812557689e-06, "loss": 0.01309395134449005, "step": 92275 }, { "epoch": 0.8685176470588235, "grad_norm": 0.29398160389190614, "learning_rate": 2.1465706558294162e-06, "loss": 0.008274216949939728, "step": 92280 }, { "epoch": 0.8685647058823529, "grad_norm": 0.3502504795074365, "learning_rate": 2.1465125038276544e-06, "loss": 0.012804260849952698, "step": 92285 }, { "epoch": 0.8686117647058823, "grad_norm": 0.4941319569059191, "learning_rate": 2.146454356551764e-06, "loss": 0.013906115293502807, "step": 92290 }, { "epoch": 0.8686588235294118, "grad_norm": 0.5015110504945377, "learning_rate": 2.146396214001105e-06, "loss": 0.014981722831726075, "step": 92295 }, { "epoch": 0.8687058823529412, "grad_norm": 0.423451903582596, "learning_rate": 2.1463380761750365e-06, "loss": 0.009882891178131103, "step": 92300 }, { "epoch": 0.8687529411764706, "grad_norm": 0.4997005816219687, "learning_rate": 2.1462799430729197e-06, "loss": 0.012184766680002212, "step": 92305 }, { "epoch": 0.8688, "grad_norm": 0.3027951324630284, "learning_rate": 2.1462218146941147e-06, "loss": 0.011100389063358307, "step": 92310 }, { "epoch": 0.8688470588235294, "grad_norm": 0.5050214678390631, "learning_rate": 2.1461636910379814e-06, "loss": 0.015605399012565612, "step": 92315 }, { "epoch": 0.8688941176470588, "grad_norm": 0.41807496982311926, "learning_rate": 2.1461055721038805e-06, "loss": 0.014285387098789215, "step": 92320 }, { "epoch": 0.8689411764705882, "grad_norm": 0.43278446222972117, "learning_rate": 2.146047457891173e-06, "loss": 0.015336713194847107, "step": 92325 }, { "epoch": 0.8689882352941176, "grad_norm": 0.5743578448910867, "learning_rate": 2.1459893483992195e-06, "loss": 0.016827215254306794, "step": 92330 }, { "epoch": 0.869035294117647, "grad_norm": 0.3964746373154283, "learning_rate": 2.145931243627381e-06, "loss": 0.013764230906963349, "step": 92335 }, { "epoch": 0.8690823529411764, "grad_norm": 0.48807145773846616, "learning_rate": 2.145873143575018e-06, "loss": 0.016011933982372283, "step": 92340 }, { "epoch": 0.8691294117647059, "grad_norm": 0.536302116918529, "learning_rate": 2.1458150482414923e-06, "loss": 0.014395686984062194, "step": 92345 }, { "epoch": 0.8691764705882353, "grad_norm": 0.5542283777075974, "learning_rate": 2.145756957626165e-06, "loss": 0.015516875684261322, "step": 92350 }, { "epoch": 0.8692235294117647, "grad_norm": 0.3968228940506536, "learning_rate": 2.1456988717283974e-06, "loss": 0.014541259407997132, "step": 92355 }, { "epoch": 0.8692705882352941, "grad_norm": 0.29630709202231237, "learning_rate": 2.1456407905475515e-06, "loss": 0.0131049245595932, "step": 92360 }, { "epoch": 0.8693176470588235, "grad_norm": 0.5017937729854494, "learning_rate": 2.145582714082988e-06, "loss": 0.013282456994056701, "step": 92365 }, { "epoch": 0.8693647058823529, "grad_norm": 0.4407019818103832, "learning_rate": 2.1455246423340692e-06, "loss": 0.01893666684627533, "step": 92370 }, { "epoch": 0.8694117647058823, "grad_norm": 0.34039560480462383, "learning_rate": 2.1454665753001567e-06, "loss": 0.02005729079246521, "step": 92375 }, { "epoch": 0.8694588235294117, "grad_norm": 0.3852932895842796, "learning_rate": 2.1454085129806136e-06, "loss": 0.011713089793920517, "step": 92380 }, { "epoch": 0.8695058823529411, "grad_norm": 0.498007421495105, "learning_rate": 2.1453504553748004e-06, "loss": 0.01586487591266632, "step": 92385 }, { "epoch": 0.8695529411764706, "grad_norm": 0.4438088978648882, "learning_rate": 2.1452924024820797e-06, "loss": 0.015027880668640137, "step": 92390 }, { "epoch": 0.8696, "grad_norm": 0.4128519878567922, "learning_rate": 2.145234354301815e-06, "loss": 0.017361393570899962, "step": 92395 }, { "epoch": 0.8696470588235294, "grad_norm": 0.3024105537257983, "learning_rate": 2.1451763108333674e-06, "loss": 0.012634971737861633, "step": 92400 }, { "epoch": 0.8696941176470588, "grad_norm": 0.5020595156146754, "learning_rate": 2.1451182720761006e-06, "loss": 0.015144816040992737, "step": 92405 }, { "epoch": 0.8697411764705882, "grad_norm": 0.41808414081605316, "learning_rate": 2.1450602380293764e-06, "loss": 0.01707119047641754, "step": 92410 }, { "epoch": 0.8697882352941176, "grad_norm": 0.24246487018931526, "learning_rate": 2.145002208692558e-06, "loss": 0.00986834242939949, "step": 92415 }, { "epoch": 0.869835294117647, "grad_norm": 0.26375622360965867, "learning_rate": 2.1449441840650083e-06, "loss": 0.00930827111005783, "step": 92420 }, { "epoch": 0.8698823529411764, "grad_norm": 0.42059454807239777, "learning_rate": 2.1448861641460905e-06, "loss": 0.012415263056755065, "step": 92425 }, { "epoch": 0.8699294117647058, "grad_norm": 0.249807882816404, "learning_rate": 2.144828148935168e-06, "loss": 0.013051417469978333, "step": 92430 }, { "epoch": 0.8699764705882352, "grad_norm": 0.5629877937558488, "learning_rate": 2.144770138431604e-06, "loss": 0.016747905313968657, "step": 92435 }, { "epoch": 0.8700235294117648, "grad_norm": 0.4431975008848467, "learning_rate": 2.1447121326347618e-06, "loss": 0.010194049030542374, "step": 92440 }, { "epoch": 0.8700705882352942, "grad_norm": 0.5033598974218566, "learning_rate": 2.144654131544005e-06, "loss": 0.01189904659986496, "step": 92445 }, { "epoch": 0.8701176470588236, "grad_norm": 0.5447867979444616, "learning_rate": 2.1445961351586974e-06, "loss": 0.012925751507282257, "step": 92450 }, { "epoch": 0.870164705882353, "grad_norm": 0.4523198870423073, "learning_rate": 2.144538143478202e-06, "loss": 0.012322710454463958, "step": 92455 }, { "epoch": 0.8702117647058824, "grad_norm": 0.335224148388593, "learning_rate": 2.144480156501884e-06, "loss": 0.015200692415237426, "step": 92460 }, { "epoch": 0.8702588235294118, "grad_norm": 0.32531959655359954, "learning_rate": 2.1444221742291067e-06, "loss": 0.012898516654968262, "step": 92465 }, { "epoch": 0.8703058823529412, "grad_norm": 0.3416909823806707, "learning_rate": 2.1443641966592346e-06, "loss": 0.014996582269668579, "step": 92470 }, { "epoch": 0.8703529411764706, "grad_norm": 0.5419067866387381, "learning_rate": 2.1443062237916317e-06, "loss": 0.01617233604192734, "step": 92475 }, { "epoch": 0.8704, "grad_norm": 0.6176578123131772, "learning_rate": 2.1442482556256626e-06, "loss": 0.01549534797668457, "step": 92480 }, { "epoch": 0.8704470588235295, "grad_norm": 0.5183275391023687, "learning_rate": 2.1441902921606916e-06, "loss": 0.011965776234865189, "step": 92485 }, { "epoch": 0.8704941176470589, "grad_norm": 0.607205657915152, "learning_rate": 2.144132333396084e-06, "loss": 0.01194751262664795, "step": 92490 }, { "epoch": 0.8705411764705883, "grad_norm": 0.38382514956557484, "learning_rate": 2.144074379331203e-06, "loss": 0.009284029901027679, "step": 92495 }, { "epoch": 0.8705882352941177, "grad_norm": 0.46514119050346847, "learning_rate": 2.1440164299654146e-06, "loss": 0.01958763599395752, "step": 92500 }, { "epoch": 0.8706352941176471, "grad_norm": 0.6784294685247163, "learning_rate": 2.1439584852980847e-06, "loss": 0.01302567720413208, "step": 92505 }, { "epoch": 0.8706823529411765, "grad_norm": 0.533188246804447, "learning_rate": 2.1439005453285773e-06, "loss": 0.014617609977722167, "step": 92510 }, { "epoch": 0.8707294117647059, "grad_norm": 0.45461640207274917, "learning_rate": 2.1438426100562573e-06, "loss": 0.01886850893497467, "step": 92515 }, { "epoch": 0.8707764705882353, "grad_norm": 0.3021361371859617, "learning_rate": 2.1437846794804906e-06, "loss": 0.021529069542884825, "step": 92520 }, { "epoch": 0.8708235294117647, "grad_norm": 0.5768838398337421, "learning_rate": 2.143726753600643e-06, "loss": 0.013763725757598877, "step": 92525 }, { "epoch": 0.8708705882352941, "grad_norm": 0.45918083622432204, "learning_rate": 2.14366883241608e-06, "loss": 0.014625628292560578, "step": 92530 }, { "epoch": 0.8709176470588236, "grad_norm": 0.46849992343425106, "learning_rate": 2.1436109159261666e-06, "loss": 0.014635249972343445, "step": 92535 }, { "epoch": 0.870964705882353, "grad_norm": 0.4464065366179301, "learning_rate": 2.143553004130269e-06, "loss": 0.01466439813375473, "step": 92540 }, { "epoch": 0.8710117647058824, "grad_norm": 0.599113861994943, "learning_rate": 2.143495097027754e-06, "loss": 0.019563248753547667, "step": 92545 }, { "epoch": 0.8710588235294118, "grad_norm": 0.38735139104645905, "learning_rate": 2.1434371946179865e-06, "loss": 0.014651077985763549, "step": 92550 }, { "epoch": 0.8711058823529412, "grad_norm": 0.3265815017078224, "learning_rate": 2.1433792969003333e-06, "loss": 0.00981106013059616, "step": 92555 }, { "epoch": 0.8711529411764706, "grad_norm": 0.30717098143400207, "learning_rate": 2.1433214038741606e-06, "loss": 0.01594657599925995, "step": 92560 }, { "epoch": 0.8712, "grad_norm": 0.43865361676986503, "learning_rate": 2.143263515538835e-06, "loss": 0.011839683353900909, "step": 92565 }, { "epoch": 0.8712470588235294, "grad_norm": 0.36948796774748716, "learning_rate": 2.1432056318937234e-06, "loss": 0.01277136206626892, "step": 92570 }, { "epoch": 0.8712941176470588, "grad_norm": 0.5954190460446555, "learning_rate": 2.1431477529381913e-06, "loss": 0.01438605785369873, "step": 92575 }, { "epoch": 0.8713411764705883, "grad_norm": 1.5071442569323326, "learning_rate": 2.1430898786716063e-06, "loss": 0.013580520451068879, "step": 92580 }, { "epoch": 0.8713882352941177, "grad_norm": 0.5344526229722271, "learning_rate": 2.143032009093336e-06, "loss": 0.01296439915895462, "step": 92585 }, { "epoch": 0.8714352941176471, "grad_norm": 0.5268920010721732, "learning_rate": 2.1429741442027457e-06, "loss": 0.015346205234527588, "step": 92590 }, { "epoch": 0.8714823529411765, "grad_norm": 0.5402004605111774, "learning_rate": 2.142916283999204e-06, "loss": 0.013459818065166473, "step": 92595 }, { "epoch": 0.8715294117647059, "grad_norm": 0.399665487848261, "learning_rate": 2.1428584284820776e-06, "loss": 0.01278878003358841, "step": 92600 }, { "epoch": 0.8715764705882353, "grad_norm": 0.3636636757248301, "learning_rate": 2.1428005776507343e-06, "loss": 0.012712310254573821, "step": 92605 }, { "epoch": 0.8716235294117647, "grad_norm": 0.49147796206923333, "learning_rate": 2.142742731504542e-06, "loss": 0.010336069762706757, "step": 92610 }, { "epoch": 0.8716705882352941, "grad_norm": 0.5304092468192094, "learning_rate": 2.1426848900428665e-06, "loss": 0.01462797224521637, "step": 92615 }, { "epoch": 0.8717176470588235, "grad_norm": 0.40384281307298586, "learning_rate": 2.1426270532650775e-06, "loss": 0.013176469504833222, "step": 92620 }, { "epoch": 0.8717647058823529, "grad_norm": 0.6809387143653822, "learning_rate": 2.1425692211705417e-06, "loss": 0.012849763035774231, "step": 92625 }, { "epoch": 0.8718117647058824, "grad_norm": 0.41905149960409566, "learning_rate": 2.1425113937586277e-06, "loss": 0.017667096853256226, "step": 92630 }, { "epoch": 0.8718588235294118, "grad_norm": 0.341314466316726, "learning_rate": 2.142453571028704e-06, "loss": 0.01498648226261139, "step": 92635 }, { "epoch": 0.8719058823529412, "grad_norm": 0.6664974330560409, "learning_rate": 2.1423957529801377e-06, "loss": 0.020078900456428527, "step": 92640 }, { "epoch": 0.8719529411764706, "grad_norm": 0.43076373749465563, "learning_rate": 2.1423379396122984e-06, "loss": 0.01383819878101349, "step": 92645 }, { "epoch": 0.872, "grad_norm": 0.5897206011702933, "learning_rate": 2.1422801309245533e-06, "loss": 0.011915449053049088, "step": 92650 }, { "epoch": 0.8720470588235294, "grad_norm": 0.4996553394336494, "learning_rate": 2.142222326916272e-06, "loss": 0.014002196490764618, "step": 92655 }, { "epoch": 0.8720941176470588, "grad_norm": 0.6151110187635781, "learning_rate": 2.142164527586823e-06, "loss": 0.0167751282453537, "step": 92660 }, { "epoch": 0.8721411764705882, "grad_norm": 0.3816155945925019, "learning_rate": 2.142106732935575e-06, "loss": 0.014185145497322083, "step": 92665 }, { "epoch": 0.8721882352941176, "grad_norm": 0.45744212343894286, "learning_rate": 2.142048942961897e-06, "loss": 0.01209397315979004, "step": 92670 }, { "epoch": 0.8722352941176471, "grad_norm": 0.5436954344965765, "learning_rate": 2.1419911576651576e-06, "loss": 0.015681920945644377, "step": 92675 }, { "epoch": 0.8722823529411765, "grad_norm": 0.3991268527708985, "learning_rate": 2.141933377044727e-06, "loss": 0.010619297623634338, "step": 92680 }, { "epoch": 0.8723294117647059, "grad_norm": 0.42022618616963914, "learning_rate": 2.1418756010999737e-06, "loss": 0.01382216215133667, "step": 92685 }, { "epoch": 0.8723764705882353, "grad_norm": 0.4871513167508646, "learning_rate": 2.1418178298302674e-06, "loss": 0.017647570371627806, "step": 92690 }, { "epoch": 0.8724235294117647, "grad_norm": 0.60153322697177, "learning_rate": 2.1417600632349776e-06, "loss": 0.016807177662849428, "step": 92695 }, { "epoch": 0.8724705882352941, "grad_norm": 0.5897643466175818, "learning_rate": 2.1417023013134737e-06, "loss": 0.016503025591373444, "step": 92700 }, { "epoch": 0.8725176470588235, "grad_norm": 0.386332045865849, "learning_rate": 2.141644544065126e-06, "loss": 0.01703363060951233, "step": 92705 }, { "epoch": 0.8725647058823529, "grad_norm": 0.604959552575416, "learning_rate": 2.1415867914893043e-06, "loss": 0.012598735094070435, "step": 92710 }, { "epoch": 0.8726117647058823, "grad_norm": 0.35787044566295845, "learning_rate": 2.1415290435853787e-06, "loss": 0.014394013583660126, "step": 92715 }, { "epoch": 0.8726588235294118, "grad_norm": 0.5086853748048437, "learning_rate": 2.1414713003527187e-06, "loss": 0.014569735527038575, "step": 92720 }, { "epoch": 0.8727058823529412, "grad_norm": 0.31659800539421673, "learning_rate": 2.1414135617906954e-06, "loss": 0.015946997702121733, "step": 92725 }, { "epoch": 0.8727529411764706, "grad_norm": 0.46731373833475687, "learning_rate": 2.1413558278986786e-06, "loss": 0.01481437385082245, "step": 92730 }, { "epoch": 0.8728, "grad_norm": 0.5380975580113606, "learning_rate": 2.1412980986760393e-06, "loss": 0.01499904841184616, "step": 92735 }, { "epoch": 0.8728470588235294, "grad_norm": 0.46179244881816195, "learning_rate": 2.1412403741221477e-06, "loss": 0.016111549735069276, "step": 92740 }, { "epoch": 0.8728941176470588, "grad_norm": 0.5386785425176182, "learning_rate": 2.1411826542363746e-06, "loss": 0.013535125553607941, "step": 92745 }, { "epoch": 0.8729411764705882, "grad_norm": 0.4094855174869061, "learning_rate": 2.141124939018091e-06, "loss": 0.012292271852493286, "step": 92750 }, { "epoch": 0.8729882352941176, "grad_norm": 0.4855083223324281, "learning_rate": 2.141067228466668e-06, "loss": 0.010819617658853531, "step": 92755 }, { "epoch": 0.873035294117647, "grad_norm": 0.5053102095012613, "learning_rate": 2.141009522581476e-06, "loss": 0.0160160094499588, "step": 92760 }, { "epoch": 0.8730823529411764, "grad_norm": 0.5515527847807272, "learning_rate": 2.1409518213618876e-06, "loss": 0.018320488929748534, "step": 92765 }, { "epoch": 0.8731294117647059, "grad_norm": 0.38074812515008766, "learning_rate": 2.1408941248072724e-06, "loss": 0.012493284046649933, "step": 92770 }, { "epoch": 0.8731764705882353, "grad_norm": 0.351740100010907, "learning_rate": 2.140836432917003e-06, "loss": 0.014918458461761475, "step": 92775 }, { "epoch": 0.8732235294117647, "grad_norm": 0.5849153761814034, "learning_rate": 2.1407787456904506e-06, "loss": 0.01520141214132309, "step": 92780 }, { "epoch": 0.8732705882352941, "grad_norm": 0.5307466619823112, "learning_rate": 2.140721063126987e-06, "loss": 0.012329884618520737, "step": 92785 }, { "epoch": 0.8733176470588235, "grad_norm": 0.7298206398644201, "learning_rate": 2.1406633852259843e-06, "loss": 0.017821656167507173, "step": 92790 }, { "epoch": 0.8733647058823529, "grad_norm": 0.49029791922767824, "learning_rate": 2.1406057119868137e-06, "loss": 0.014910408854484558, "step": 92795 }, { "epoch": 0.8734117647058823, "grad_norm": 0.32233884183734474, "learning_rate": 2.140548043408848e-06, "loss": 0.016620105504989623, "step": 92800 }, { "epoch": 0.8734588235294117, "grad_norm": 0.27647117752610306, "learning_rate": 2.1404903794914583e-06, "loss": 0.012516659498214722, "step": 92805 }, { "epoch": 0.8735058823529411, "grad_norm": 0.390926643447014, "learning_rate": 2.1404327202340182e-06, "loss": 0.012570327520370484, "step": 92810 }, { "epoch": 0.8735529411764706, "grad_norm": 0.5185054923093799, "learning_rate": 2.1403750656358993e-06, "loss": 0.014457529783248902, "step": 92815 }, { "epoch": 0.8736, "grad_norm": 0.6484082915704157, "learning_rate": 2.1403174156964744e-06, "loss": 0.01555749773979187, "step": 92820 }, { "epoch": 0.8736470588235294, "grad_norm": 0.40311347855719704, "learning_rate": 2.140259770415116e-06, "loss": 0.014480692148208619, "step": 92825 }, { "epoch": 0.8736941176470588, "grad_norm": 0.43236897460036383, "learning_rate": 2.140202129791197e-06, "loss": 0.015802481770515443, "step": 92830 }, { "epoch": 0.8737411764705882, "grad_norm": 0.5606210998330711, "learning_rate": 2.14014449382409e-06, "loss": 0.01357288658618927, "step": 92835 }, { "epoch": 0.8737882352941176, "grad_norm": 0.415988372641056, "learning_rate": 2.1400868625131678e-06, "loss": 0.012231172621250152, "step": 92840 }, { "epoch": 0.873835294117647, "grad_norm": 0.5183882446805252, "learning_rate": 2.1400292358578044e-06, "loss": 0.011197950690984726, "step": 92845 }, { "epoch": 0.8738823529411764, "grad_norm": 0.4432847277352011, "learning_rate": 2.1399716138573728e-06, "loss": 0.017085690796375275, "step": 92850 }, { "epoch": 0.8739294117647058, "grad_norm": 0.5377842921344539, "learning_rate": 2.1399139965112454e-06, "loss": 0.015114448964595795, "step": 92855 }, { "epoch": 0.8739764705882352, "grad_norm": 0.760147850477208, "learning_rate": 2.1398563838187966e-06, "loss": 0.017924150824546813, "step": 92860 }, { "epoch": 0.8740235294117648, "grad_norm": 1.3264099628356558, "learning_rate": 2.1397987757793996e-06, "loss": 0.01508941650390625, "step": 92865 }, { "epoch": 0.8740705882352942, "grad_norm": 0.4781246704154517, "learning_rate": 2.139741172392428e-06, "loss": 0.014692901074886322, "step": 92870 }, { "epoch": 0.8741176470588236, "grad_norm": 0.5051125801207328, "learning_rate": 2.139683573657256e-06, "loss": 0.013170330226421357, "step": 92875 }, { "epoch": 0.874164705882353, "grad_norm": 0.6111495399341139, "learning_rate": 2.139625979573257e-06, "loss": 0.017130163311958314, "step": 92880 }, { "epoch": 0.8742117647058824, "grad_norm": 0.6106230522353158, "learning_rate": 2.1395683901398055e-06, "loss": 0.012378653138875961, "step": 92885 }, { "epoch": 0.8742588235294118, "grad_norm": 0.5365665009671154, "learning_rate": 2.139510805356276e-06, "loss": 0.020071561634540557, "step": 92890 }, { "epoch": 0.8743058823529412, "grad_norm": 0.2730663760927523, "learning_rate": 2.139453225222042e-06, "loss": 0.012484592199325562, "step": 92895 }, { "epoch": 0.8743529411764706, "grad_norm": 0.48853461996598796, "learning_rate": 2.1393956497364786e-06, "loss": 0.016285306215286253, "step": 92900 }, { "epoch": 0.8744, "grad_norm": 0.4856298000570325, "learning_rate": 2.1393380788989597e-06, "loss": 0.016282761096954347, "step": 92905 }, { "epoch": 0.8744470588235295, "grad_norm": 0.5599655820929786, "learning_rate": 2.13928051270886e-06, "loss": 0.0138864666223526, "step": 92910 }, { "epoch": 0.8744941176470589, "grad_norm": 1.767151201597557, "learning_rate": 2.1392229511655544e-06, "loss": 0.012895065546035766, "step": 92915 }, { "epoch": 0.8745411764705883, "grad_norm": 0.4931941628548775, "learning_rate": 2.1391653942684186e-06, "loss": 0.01272231936454773, "step": 92920 }, { "epoch": 0.8745882352941177, "grad_norm": 0.3256978227862936, "learning_rate": 2.139107842016826e-06, "loss": 0.01305983066558838, "step": 92925 }, { "epoch": 0.8746352941176471, "grad_norm": 0.502057722215755, "learning_rate": 2.1390502944101536e-06, "loss": 0.013378778100013733, "step": 92930 }, { "epoch": 0.8746823529411765, "grad_norm": 0.4990334258976062, "learning_rate": 2.1389927514477745e-06, "loss": 0.012957008183002472, "step": 92935 }, { "epoch": 0.8747294117647059, "grad_norm": 0.4065054463150497, "learning_rate": 2.138935213129066e-06, "loss": 0.013295190036296844, "step": 92940 }, { "epoch": 0.8747764705882353, "grad_norm": 0.47786374356165895, "learning_rate": 2.1388776794534023e-06, "loss": 0.014834776520729065, "step": 92945 }, { "epoch": 0.8748235294117647, "grad_norm": 0.5111002810721527, "learning_rate": 2.138820150420159e-06, "loss": 0.01433999538421631, "step": 92950 }, { "epoch": 0.8748705882352941, "grad_norm": 0.6623428194546449, "learning_rate": 2.1387626260287126e-06, "loss": 0.014279118180274964, "step": 92955 }, { "epoch": 0.8749176470588236, "grad_norm": 0.5159652646655717, "learning_rate": 2.138705106278439e-06, "loss": 0.01644830107688904, "step": 92960 }, { "epoch": 0.874964705882353, "grad_norm": 0.3637619701632293, "learning_rate": 2.138647591168713e-06, "loss": 0.014470405876636505, "step": 92965 }, { "epoch": 0.8750117647058824, "grad_norm": 0.4499096618594035, "learning_rate": 2.1385900806989116e-06, "loss": 0.01530911922454834, "step": 92970 }, { "epoch": 0.8750588235294118, "grad_norm": 0.3330102801251904, "learning_rate": 2.1385325748684105e-06, "loss": 0.010952562093734741, "step": 92975 }, { "epoch": 0.8751058823529412, "grad_norm": 0.447022638597584, "learning_rate": 2.1384750736765862e-06, "loss": 0.01348293125629425, "step": 92980 }, { "epoch": 0.8751529411764706, "grad_norm": 0.32706838186696535, "learning_rate": 2.138417577122815e-06, "loss": 0.013365128636360168, "step": 92985 }, { "epoch": 0.8752, "grad_norm": 0.6384179670440637, "learning_rate": 2.1383600852064737e-06, "loss": 0.015194982290267944, "step": 92990 }, { "epoch": 0.8752470588235294, "grad_norm": 0.5042893073362901, "learning_rate": 2.1383025979269386e-06, "loss": 0.015119966864585877, "step": 92995 }, { "epoch": 0.8752941176470588, "grad_norm": 0.3960602387175667, "learning_rate": 2.1382451152835866e-06, "loss": 0.012151627242565155, "step": 93000 }, { "epoch": 0.8753411764705883, "grad_norm": 0.48026924549908123, "learning_rate": 2.1381876372757953e-06, "loss": 0.013455420732498169, "step": 93005 }, { "epoch": 0.8753882352941177, "grad_norm": 0.49672012679820116, "learning_rate": 2.13813016390294e-06, "loss": 0.012023727595806121, "step": 93010 }, { "epoch": 0.8754352941176471, "grad_norm": 0.48778704367839054, "learning_rate": 2.1380726951643986e-06, "loss": 0.013152001798152924, "step": 93015 }, { "epoch": 0.8754823529411765, "grad_norm": 0.4743940583788049, "learning_rate": 2.1380152310595494e-06, "loss": 0.016258245706558226, "step": 93020 }, { "epoch": 0.8755294117647059, "grad_norm": 0.9858180584598778, "learning_rate": 2.1379577715877684e-06, "loss": 0.01850750297307968, "step": 93025 }, { "epoch": 0.8755764705882353, "grad_norm": 0.44831143782646793, "learning_rate": 2.1379003167484334e-06, "loss": 0.01566070318222046, "step": 93030 }, { "epoch": 0.8756235294117647, "grad_norm": 0.5604412406068784, "learning_rate": 2.137842866540922e-06, "loss": 0.009584710001945496, "step": 93035 }, { "epoch": 0.8756705882352941, "grad_norm": 0.514867183583221, "learning_rate": 2.1377854209646127e-06, "loss": 0.014819332957267761, "step": 93040 }, { "epoch": 0.8757176470588235, "grad_norm": 0.3989333105704433, "learning_rate": 2.1377279800188822e-06, "loss": 0.020622114837169647, "step": 93045 }, { "epoch": 0.8757647058823529, "grad_norm": 0.4533550873929022, "learning_rate": 2.137670543703108e-06, "loss": 0.014144620299339295, "step": 93050 }, { "epoch": 0.8758117647058824, "grad_norm": 0.47709072005184333, "learning_rate": 2.1376131120166702e-06, "loss": 0.014694952964782714, "step": 93055 }, { "epoch": 0.8758588235294118, "grad_norm": 0.44500686506185794, "learning_rate": 2.1375556849589453e-06, "loss": 0.017053624987602232, "step": 93060 }, { "epoch": 0.8759058823529412, "grad_norm": 0.31829854700973353, "learning_rate": 2.137498262529312e-06, "loss": 0.01845451593399048, "step": 93065 }, { "epoch": 0.8759529411764706, "grad_norm": 0.4272742259317963, "learning_rate": 2.137440844727148e-06, "loss": 0.01792823672294617, "step": 93070 }, { "epoch": 0.876, "grad_norm": 0.5435543547758721, "learning_rate": 2.1373834315518337e-06, "loss": 0.017818373441696168, "step": 93075 }, { "epoch": 0.8760470588235294, "grad_norm": 0.43630033161077586, "learning_rate": 2.137326023002746e-06, "loss": 0.013833001255989075, "step": 93080 }, { "epoch": 0.8760941176470588, "grad_norm": 0.6312784464978309, "learning_rate": 2.1372686190792645e-06, "loss": 0.015576457977294922, "step": 93085 }, { "epoch": 0.8761411764705882, "grad_norm": 0.39738299727826853, "learning_rate": 2.1372112197807672e-06, "loss": 0.014107169210910797, "step": 93090 }, { "epoch": 0.8761882352941176, "grad_norm": 0.6240575456359508, "learning_rate": 2.137153825106634e-06, "loss": 0.012705400586128235, "step": 93095 }, { "epoch": 0.8762352941176471, "grad_norm": 0.5640119095966811, "learning_rate": 2.137096435056244e-06, "loss": 0.014153391122817993, "step": 93100 }, { "epoch": 0.8762823529411765, "grad_norm": 0.4745759321593516, "learning_rate": 2.1370390496289755e-06, "loss": 0.010161564499139786, "step": 93105 }, { "epoch": 0.8763294117647059, "grad_norm": 0.5143789627945135, "learning_rate": 2.136981668824208e-06, "loss": 0.014482581615447998, "step": 93110 }, { "epoch": 0.8763764705882353, "grad_norm": 0.5233646944024544, "learning_rate": 2.1369242926413224e-06, "loss": 0.010511678457260133, "step": 93115 }, { "epoch": 0.8764235294117647, "grad_norm": 0.31776035640241296, "learning_rate": 2.1368669210796965e-06, "loss": 0.015237358212471009, "step": 93120 }, { "epoch": 0.8764705882352941, "grad_norm": 0.40568106644994134, "learning_rate": 2.136809554138711e-06, "loss": 0.015036101639270782, "step": 93125 }, { "epoch": 0.8765176470588235, "grad_norm": 0.6617018353884436, "learning_rate": 2.1367521918177454e-06, "loss": 0.014334653317928315, "step": 93130 }, { "epoch": 0.8765647058823529, "grad_norm": 0.4619875089079972, "learning_rate": 2.1366948341161794e-06, "loss": 0.01106911152601242, "step": 93135 }, { "epoch": 0.8766117647058823, "grad_norm": 0.3974146751611709, "learning_rate": 2.1366374810333933e-06, "loss": 0.014580221474170684, "step": 93140 }, { "epoch": 0.8766588235294117, "grad_norm": 0.5438151108758756, "learning_rate": 2.1365801325687673e-06, "loss": 0.02001921534538269, "step": 93145 }, { "epoch": 0.8767058823529412, "grad_norm": 0.4535427054253615, "learning_rate": 2.136522788721682e-06, "loss": 0.0177693709731102, "step": 93150 }, { "epoch": 0.8767529411764706, "grad_norm": 0.4257353331099885, "learning_rate": 2.1364654494915164e-06, "loss": 0.016116501390933992, "step": 93155 }, { "epoch": 0.8768, "grad_norm": 0.3908535687320652, "learning_rate": 2.1364081148776523e-06, "loss": 0.012563052773475646, "step": 93160 }, { "epoch": 0.8768470588235294, "grad_norm": 0.4183600122474123, "learning_rate": 2.1363507848794707e-06, "loss": 0.01088051199913025, "step": 93165 }, { "epoch": 0.8768941176470588, "grad_norm": 0.5258976077329519, "learning_rate": 2.136293459496351e-06, "loss": 0.014998584985733032, "step": 93170 }, { "epoch": 0.8769411764705882, "grad_norm": 0.48295313225980674, "learning_rate": 2.136236138727674e-06, "loss": 0.013586746156215667, "step": 93175 }, { "epoch": 0.8769882352941176, "grad_norm": 0.6983485268092967, "learning_rate": 2.136178822572822e-06, "loss": 0.016837871074676512, "step": 93180 }, { "epoch": 0.877035294117647, "grad_norm": 0.344306090921341, "learning_rate": 2.136121511031175e-06, "loss": 0.01463448405265808, "step": 93185 }, { "epoch": 0.8770823529411764, "grad_norm": 0.4562573628768169, "learning_rate": 2.136064204102115e-06, "loss": 0.012756840884685516, "step": 93190 }, { "epoch": 0.8771294117647059, "grad_norm": 0.7278599441751525, "learning_rate": 2.1360069017850224e-06, "loss": 0.015063843131065369, "step": 93195 }, { "epoch": 0.8771764705882353, "grad_norm": 0.6487193263088734, "learning_rate": 2.1359496040792794e-06, "loss": 0.015366214513778686, "step": 93200 }, { "epoch": 0.8772235294117647, "grad_norm": 0.4529295816640949, "learning_rate": 2.135892310984267e-06, "loss": 0.014617308974266052, "step": 93205 }, { "epoch": 0.8772705882352941, "grad_norm": 0.3038802777835182, "learning_rate": 2.135835022499367e-06, "loss": 0.011594760417938232, "step": 93210 }, { "epoch": 0.8773176470588235, "grad_norm": 0.6134351261431766, "learning_rate": 2.1357777386239616e-06, "loss": 0.01915762722492218, "step": 93215 }, { "epoch": 0.8773647058823529, "grad_norm": 0.5582843299754736, "learning_rate": 2.135720459357432e-06, "loss": 0.014008235931396485, "step": 93220 }, { "epoch": 0.8774117647058823, "grad_norm": 0.3381741979858234, "learning_rate": 2.135663184699161e-06, "loss": 0.013905851542949677, "step": 93225 }, { "epoch": 0.8774588235294117, "grad_norm": 0.37770133229994474, "learning_rate": 2.1356059146485304e-06, "loss": 0.0120216965675354, "step": 93230 }, { "epoch": 0.8775058823529411, "grad_norm": 0.5071034724128156, "learning_rate": 2.1355486492049217e-06, "loss": 0.016778665781021117, "step": 93235 }, { "epoch": 0.8775529411764705, "grad_norm": 0.4425442069951853, "learning_rate": 2.135491388367718e-06, "loss": 0.02269386947154999, "step": 93240 }, { "epoch": 0.8776, "grad_norm": 0.46791634006452126, "learning_rate": 2.135434132136302e-06, "loss": 0.013599218428134918, "step": 93245 }, { "epoch": 0.8776470588235294, "grad_norm": 0.44708817734565587, "learning_rate": 2.1353768805100556e-06, "loss": 0.013374677300453186, "step": 93250 }, { "epoch": 0.8776941176470588, "grad_norm": 0.5053648334918653, "learning_rate": 2.135319633488362e-06, "loss": 0.0155612051486969, "step": 93255 }, { "epoch": 0.8777411764705882, "grad_norm": 0.394618719424717, "learning_rate": 2.135262391070604e-06, "loss": 0.013350021839141846, "step": 93260 }, { "epoch": 0.8777882352941176, "grad_norm": 0.5293920748966314, "learning_rate": 2.1352051532561642e-06, "loss": 0.015521593391895294, "step": 93265 }, { "epoch": 0.877835294117647, "grad_norm": 0.6040536254228239, "learning_rate": 2.1351479200444263e-06, "loss": 0.011332191526889801, "step": 93270 }, { "epoch": 0.8778823529411764, "grad_norm": 0.31890606365444674, "learning_rate": 2.1350906914347726e-06, "loss": 0.012137314677238465, "step": 93275 }, { "epoch": 0.8779294117647058, "grad_norm": 0.4869354663544389, "learning_rate": 2.1350334674265864e-06, "loss": 0.017083357274532317, "step": 93280 }, { "epoch": 0.8779764705882352, "grad_norm": 0.4319518920797662, "learning_rate": 2.1349762480192523e-06, "loss": 0.01583395153284073, "step": 93285 }, { "epoch": 0.8780235294117648, "grad_norm": 0.48278687881920723, "learning_rate": 2.134919033212153e-06, "loss": 0.014836969971656799, "step": 93290 }, { "epoch": 0.8780705882352942, "grad_norm": 0.8032545974649485, "learning_rate": 2.1348618230046716e-06, "loss": 0.012885773181915283, "step": 93295 }, { "epoch": 0.8781176470588236, "grad_norm": 0.4436363212924294, "learning_rate": 2.1348046173961925e-06, "loss": 0.01566718816757202, "step": 93300 }, { "epoch": 0.878164705882353, "grad_norm": 0.3888513414351523, "learning_rate": 2.1347474163860998e-06, "loss": 0.011071859300136567, "step": 93305 }, { "epoch": 0.8782117647058824, "grad_norm": 0.39260727265737677, "learning_rate": 2.134690219973777e-06, "loss": 0.01792733818292618, "step": 93310 }, { "epoch": 0.8782588235294118, "grad_norm": 0.5874925374866723, "learning_rate": 2.134633028158608e-06, "loss": 0.013724517822265626, "step": 93315 }, { "epoch": 0.8783058823529412, "grad_norm": 0.6781788877431242, "learning_rate": 2.1345758409399773e-06, "loss": 0.014572912454605102, "step": 93320 }, { "epoch": 0.8783529411764706, "grad_norm": 0.256435088846196, "learning_rate": 2.13451865831727e-06, "loss": 0.01785295456647873, "step": 93325 }, { "epoch": 0.8784, "grad_norm": 0.40137248069379877, "learning_rate": 2.1344614802898687e-06, "loss": 0.012690040469169616, "step": 93330 }, { "epoch": 0.8784470588235294, "grad_norm": 0.4262437926069948, "learning_rate": 2.13440430685716e-06, "loss": 0.017016312479972838, "step": 93335 }, { "epoch": 0.8784941176470589, "grad_norm": 0.36174827532810505, "learning_rate": 2.134347138018527e-06, "loss": 0.014492809772491455, "step": 93340 }, { "epoch": 0.8785411764705883, "grad_norm": 0.4743510660282518, "learning_rate": 2.134289973773355e-06, "loss": 0.012244199216365815, "step": 93345 }, { "epoch": 0.8785882352941177, "grad_norm": 0.5717802410519294, "learning_rate": 2.1342328141210292e-06, "loss": 0.013834477961063385, "step": 93350 }, { "epoch": 0.8786352941176471, "grad_norm": 0.5996799591435336, "learning_rate": 2.1341756590609344e-06, "loss": 0.016336494684219362, "step": 93355 }, { "epoch": 0.8786823529411765, "grad_norm": 0.3993482593029599, "learning_rate": 2.134118508592456e-06, "loss": 0.014188089966773986, "step": 93360 }, { "epoch": 0.8787294117647059, "grad_norm": 0.584317007892848, "learning_rate": 2.134061362714979e-06, "loss": 0.014843681454658508, "step": 93365 }, { "epoch": 0.8787764705882353, "grad_norm": 0.46450677835748144, "learning_rate": 2.1340042214278886e-06, "loss": 0.014712449908256531, "step": 93370 }, { "epoch": 0.8788235294117647, "grad_norm": 0.5836315970260688, "learning_rate": 2.1339470847305703e-06, "loss": 0.013858893513679504, "step": 93375 }, { "epoch": 0.8788705882352941, "grad_norm": 0.5869458172643275, "learning_rate": 2.1338899526224095e-06, "loss": 0.014601890742778779, "step": 93380 }, { "epoch": 0.8789176470588236, "grad_norm": 0.5580504815423807, "learning_rate": 2.1338328251027927e-06, "loss": 0.016818469762802123, "step": 93385 }, { "epoch": 0.878964705882353, "grad_norm": 0.4813093752310858, "learning_rate": 2.133775702171105e-06, "loss": 0.015134821832180022, "step": 93390 }, { "epoch": 0.8790117647058824, "grad_norm": 0.5986685533551988, "learning_rate": 2.1337185838267327e-06, "loss": 0.015021122992038727, "step": 93395 }, { "epoch": 0.8790588235294118, "grad_norm": 0.34999681484141615, "learning_rate": 2.1336614700690618e-06, "loss": 0.010818438231945037, "step": 93400 }, { "epoch": 0.8791058823529412, "grad_norm": 0.6461007355635069, "learning_rate": 2.133604360897478e-06, "loss": 0.012368439137935639, "step": 93405 }, { "epoch": 0.8791529411764706, "grad_norm": 0.5508615573127271, "learning_rate": 2.1335472563113683e-06, "loss": 0.017057426273822784, "step": 93410 }, { "epoch": 0.8792, "grad_norm": 0.7000843087974413, "learning_rate": 2.1334901563101185e-06, "loss": 0.013976727426052094, "step": 93415 }, { "epoch": 0.8792470588235294, "grad_norm": 0.5063121139993075, "learning_rate": 2.1334330608931156e-06, "loss": 0.01576673984527588, "step": 93420 }, { "epoch": 0.8792941176470588, "grad_norm": 0.7801339422818415, "learning_rate": 2.133375970059746e-06, "loss": 0.014109325408935548, "step": 93425 }, { "epoch": 0.8793411764705882, "grad_norm": 2.212832202902346, "learning_rate": 2.133318883809396e-06, "loss": 0.01342681348323822, "step": 93430 }, { "epoch": 0.8793882352941177, "grad_norm": 0.40337305775262344, "learning_rate": 2.1332618021414533e-06, "loss": 0.015204691886901855, "step": 93435 }, { "epoch": 0.8794352941176471, "grad_norm": 0.5922086099399401, "learning_rate": 2.1332047250553043e-06, "loss": 0.013816164433956146, "step": 93440 }, { "epoch": 0.8794823529411765, "grad_norm": 0.5701638124386128, "learning_rate": 2.1331476525503364e-06, "loss": 0.014771668612957001, "step": 93445 }, { "epoch": 0.8795294117647059, "grad_norm": 0.5351490911205397, "learning_rate": 2.133090584625937e-06, "loss": 0.01888842284679413, "step": 93450 }, { "epoch": 0.8795764705882353, "grad_norm": 0.4614162546317115, "learning_rate": 2.1330335212814924e-06, "loss": 0.014333119988441468, "step": 93455 }, { "epoch": 0.8796235294117647, "grad_norm": 0.453866757733467, "learning_rate": 2.1329764625163908e-06, "loss": 0.010448850691318512, "step": 93460 }, { "epoch": 0.8796705882352941, "grad_norm": 0.4665030066060528, "learning_rate": 2.13291940833002e-06, "loss": 0.013299697637557983, "step": 93465 }, { "epoch": 0.8797176470588235, "grad_norm": 0.42238879039971033, "learning_rate": 2.1328623587217673e-06, "loss": 0.014016278088092804, "step": 93470 }, { "epoch": 0.8797647058823529, "grad_norm": 0.5082995117630175, "learning_rate": 2.1328053136910203e-06, "loss": 0.014654436707496643, "step": 93475 }, { "epoch": 0.8798117647058824, "grad_norm": 0.5626470422394368, "learning_rate": 2.1327482732371667e-06, "loss": 0.013372249901294708, "step": 93480 }, { "epoch": 0.8798588235294118, "grad_norm": 0.45913581652804675, "learning_rate": 2.1326912373595954e-06, "loss": 0.013294145464897156, "step": 93485 }, { "epoch": 0.8799058823529412, "grad_norm": 0.46647825583186575, "learning_rate": 2.132634206057694e-06, "loss": 0.015854218602180482, "step": 93490 }, { "epoch": 0.8799529411764706, "grad_norm": 0.8551897273907045, "learning_rate": 2.1325771793308503e-06, "loss": 0.019200029969215392, "step": 93495 }, { "epoch": 0.88, "grad_norm": 0.31635753185602367, "learning_rate": 2.132520157178453e-06, "loss": 0.014460435509681702, "step": 93500 }, { "epoch": 0.8800470588235294, "grad_norm": 0.42970250036022134, "learning_rate": 2.1324631395998913e-06, "loss": 0.015560343861579895, "step": 93505 }, { "epoch": 0.8800941176470588, "grad_norm": 0.3296329719071799, "learning_rate": 2.1324061265945524e-06, "loss": 0.015155941247940063, "step": 93510 }, { "epoch": 0.8801411764705882, "grad_norm": 0.41967922909298505, "learning_rate": 2.1323491181618265e-06, "loss": 0.017789340019226073, "step": 93515 }, { "epoch": 0.8801882352941176, "grad_norm": 0.36597787333088466, "learning_rate": 2.1322921143011006e-06, "loss": 0.014674097299575806, "step": 93520 }, { "epoch": 0.880235294117647, "grad_norm": 0.3983347598900024, "learning_rate": 2.1322351150117654e-06, "loss": 0.013552838563919067, "step": 93525 }, { "epoch": 0.8802823529411765, "grad_norm": 0.39372230081147186, "learning_rate": 2.132178120293209e-06, "loss": 0.013779494166374206, "step": 93530 }, { "epoch": 0.8803294117647059, "grad_norm": 0.37013613848697785, "learning_rate": 2.13212113014482e-06, "loss": 0.011369818449020385, "step": 93535 }, { "epoch": 0.8803764705882353, "grad_norm": 0.4361643566693279, "learning_rate": 2.132064144565989e-06, "loss": 0.01759438365697861, "step": 93540 }, { "epoch": 0.8804235294117647, "grad_norm": 0.5203737916292878, "learning_rate": 2.132007163556104e-06, "loss": 0.015242448449134827, "step": 93545 }, { "epoch": 0.8804705882352941, "grad_norm": 0.40332577847276313, "learning_rate": 2.1319501871145565e-06, "loss": 0.010586915165185928, "step": 93550 }, { "epoch": 0.8805176470588235, "grad_norm": 0.4502201940895263, "learning_rate": 2.131893215240734e-06, "loss": 0.014249801635742188, "step": 93555 }, { "epoch": 0.8805647058823529, "grad_norm": 0.3803076514446701, "learning_rate": 2.131836247934027e-06, "loss": 0.014260229468345643, "step": 93560 }, { "epoch": 0.8806117647058823, "grad_norm": 0.37850200571888565, "learning_rate": 2.1317792851938255e-06, "loss": 0.015509802103042602, "step": 93565 }, { "epoch": 0.8806588235294117, "grad_norm": 0.3516270605092499, "learning_rate": 2.1317223270195195e-06, "loss": 0.014147765934467316, "step": 93570 }, { "epoch": 0.8807058823529412, "grad_norm": 0.5489290459662775, "learning_rate": 2.1316653734104988e-06, "loss": 0.016141438484191896, "step": 93575 }, { "epoch": 0.8807529411764706, "grad_norm": 0.6170318178730878, "learning_rate": 2.131608424366153e-06, "loss": 0.016572307050228118, "step": 93580 }, { "epoch": 0.8808, "grad_norm": 0.601320698779244, "learning_rate": 2.131551479885874e-06, "loss": 0.016991209983825684, "step": 93585 }, { "epoch": 0.8808470588235294, "grad_norm": 0.43616305603656075, "learning_rate": 2.1314945399690506e-06, "loss": 0.014706332981586457, "step": 93590 }, { "epoch": 0.8808941176470588, "grad_norm": 0.5911153085452562, "learning_rate": 2.1314376046150743e-06, "loss": 0.013879549503326417, "step": 93595 }, { "epoch": 0.8809411764705882, "grad_norm": 1.0497586864137751, "learning_rate": 2.1313806738233352e-06, "loss": 0.014495238661766052, "step": 93600 }, { "epoch": 0.8809882352941176, "grad_norm": 0.3263334977295694, "learning_rate": 2.1313237475932247e-06, "loss": 0.014646652340888976, "step": 93605 }, { "epoch": 0.881035294117647, "grad_norm": 0.35607629508496713, "learning_rate": 2.1312668259241325e-06, "loss": 0.02147906571626663, "step": 93610 }, { "epoch": 0.8810823529411764, "grad_norm": 0.36814082506634466, "learning_rate": 2.1312099088154504e-06, "loss": 0.013881815969944, "step": 93615 }, { "epoch": 0.8811294117647058, "grad_norm": 0.3764823420113613, "learning_rate": 2.13115299626657e-06, "loss": 0.016804198920726775, "step": 93620 }, { "epoch": 0.8811764705882353, "grad_norm": 0.8267646891108603, "learning_rate": 2.1310960882768815e-06, "loss": 0.01750132590532303, "step": 93625 }, { "epoch": 0.8812235294117647, "grad_norm": 0.4134515252839519, "learning_rate": 2.1310391848457766e-06, "loss": 0.014916923642158509, "step": 93630 }, { "epoch": 0.8812705882352941, "grad_norm": 0.5573347911922427, "learning_rate": 2.1309822859726463e-06, "loss": 0.015657079219818116, "step": 93635 }, { "epoch": 0.8813176470588235, "grad_norm": 0.7449649428519008, "learning_rate": 2.130925391656883e-06, "loss": 0.01491192877292633, "step": 93640 }, { "epoch": 0.8813647058823529, "grad_norm": 0.7315456801148429, "learning_rate": 2.130868501897878e-06, "loss": 0.020777133107185364, "step": 93645 }, { "epoch": 0.8814117647058823, "grad_norm": 0.4879596967171821, "learning_rate": 2.1308116166950225e-06, "loss": 0.01510055661201477, "step": 93650 }, { "epoch": 0.8814588235294117, "grad_norm": 0.35754856623688763, "learning_rate": 2.1307547360477093e-06, "loss": 0.013396593928337096, "step": 93655 }, { "epoch": 0.8815058823529411, "grad_norm": 0.45170675711681363, "learning_rate": 2.1306978599553292e-06, "loss": 0.013850879669189454, "step": 93660 }, { "epoch": 0.8815529411764705, "grad_norm": 0.4627260410127922, "learning_rate": 2.1306409884172755e-06, "loss": 0.011523856222629547, "step": 93665 }, { "epoch": 0.8816, "grad_norm": 0.3384131642819482, "learning_rate": 2.13058412143294e-06, "loss": 0.01438610553741455, "step": 93670 }, { "epoch": 0.8816470588235295, "grad_norm": 0.47920051867831587, "learning_rate": 2.130527259001715e-06, "loss": 0.015311464667320251, "step": 93675 }, { "epoch": 0.8816941176470589, "grad_norm": 0.4330281766973816, "learning_rate": 2.130470401122993e-06, "loss": 0.015466225147247315, "step": 93680 }, { "epoch": 0.8817411764705883, "grad_norm": 0.5378833162820402, "learning_rate": 2.130413547796167e-06, "loss": 0.014217813313007355, "step": 93685 }, { "epoch": 0.8817882352941177, "grad_norm": 0.6531632681932351, "learning_rate": 2.1303566990206283e-06, "loss": 0.01584112346172333, "step": 93690 }, { "epoch": 0.881835294117647, "grad_norm": 0.5219095490135446, "learning_rate": 2.130299854795771e-06, "loss": 0.013548880815505981, "step": 93695 }, { "epoch": 0.8818823529411765, "grad_norm": 0.6195292253514579, "learning_rate": 2.1302430151209875e-06, "loss": 0.01334206610918045, "step": 93700 }, { "epoch": 0.8819294117647059, "grad_norm": 0.5202017890141534, "learning_rate": 2.1301861799956715e-06, "loss": 0.01582716703414917, "step": 93705 }, { "epoch": 0.8819764705882353, "grad_norm": 0.7376999026542754, "learning_rate": 2.130129349419214e-06, "loss": 0.016039475798606873, "step": 93710 }, { "epoch": 0.8820235294117647, "grad_norm": 0.4554175738502452, "learning_rate": 2.1300725233910117e-06, "loss": 0.012365604937076568, "step": 93715 }, { "epoch": 0.8820705882352942, "grad_norm": 0.3104268267278241, "learning_rate": 2.1300157019104546e-06, "loss": 0.008312998712062836, "step": 93720 }, { "epoch": 0.8821176470588236, "grad_norm": 0.5540230251388831, "learning_rate": 2.1299588849769384e-06, "loss": 0.014936017990112304, "step": 93725 }, { "epoch": 0.882164705882353, "grad_norm": 0.5528602690787422, "learning_rate": 2.1299020725898556e-06, "loss": 0.01995491087436676, "step": 93730 }, { "epoch": 0.8822117647058824, "grad_norm": 0.6907624587362126, "learning_rate": 2.1298452647486007e-06, "loss": 0.016940706968307497, "step": 93735 }, { "epoch": 0.8822588235294118, "grad_norm": 0.6403349567580878, "learning_rate": 2.129788461452566e-06, "loss": 0.012573689222335815, "step": 93740 }, { "epoch": 0.8823058823529412, "grad_norm": 0.3519035525971601, "learning_rate": 2.1297316627011476e-06, "loss": 0.015825466811656953, "step": 93745 }, { "epoch": 0.8823529411764706, "grad_norm": 0.5849482477709628, "learning_rate": 2.1296748684937376e-06, "loss": 0.012359815835952758, "step": 93750 }, { "epoch": 0.8824, "grad_norm": 0.6632007286622417, "learning_rate": 2.1296180788297315e-06, "loss": 0.01721433997154236, "step": 93755 }, { "epoch": 0.8824470588235294, "grad_norm": 0.9584115239565675, "learning_rate": 2.1295612937085223e-06, "loss": 0.013225062191486359, "step": 93760 }, { "epoch": 0.8824941176470589, "grad_norm": 0.6173359659713803, "learning_rate": 2.1295045131295057e-06, "loss": 0.0158269464969635, "step": 93765 }, { "epoch": 0.8825411764705883, "grad_norm": 0.5716605163837722, "learning_rate": 2.129447737092075e-06, "loss": 0.01433597207069397, "step": 93770 }, { "epoch": 0.8825882352941177, "grad_norm": 0.7565535719612322, "learning_rate": 2.1293909655956256e-06, "loss": 0.014269694685935974, "step": 93775 }, { "epoch": 0.8826352941176471, "grad_norm": 0.4684400959813494, "learning_rate": 2.129334198639552e-06, "loss": 0.014179393649101257, "step": 93780 }, { "epoch": 0.8826823529411765, "grad_norm": 0.3725906266289302, "learning_rate": 2.1292774362232486e-06, "loss": 0.010784661769866944, "step": 93785 }, { "epoch": 0.8827294117647059, "grad_norm": 0.3749112559773032, "learning_rate": 2.1292206783461113e-06, "loss": 0.01144939884543419, "step": 93790 }, { "epoch": 0.8827764705882353, "grad_norm": 0.4854740114227337, "learning_rate": 2.1291639250075346e-06, "loss": 0.01614498198032379, "step": 93795 }, { "epoch": 0.8828235294117647, "grad_norm": 0.5462306566784221, "learning_rate": 2.1291071762069135e-06, "loss": 0.016247811913490295, "step": 93800 }, { "epoch": 0.8828705882352941, "grad_norm": 0.5514681421290482, "learning_rate": 2.1290504319436434e-06, "loss": 0.014270561933517455, "step": 93805 }, { "epoch": 0.8829176470588236, "grad_norm": 0.4678027474533525, "learning_rate": 2.1289936922171194e-06, "loss": 0.014542555809020996, "step": 93810 }, { "epoch": 0.882964705882353, "grad_norm": 0.4803734540948227, "learning_rate": 2.128936957026738e-06, "loss": 0.017127367854118346, "step": 93815 }, { "epoch": 0.8830117647058824, "grad_norm": 0.38839494230536054, "learning_rate": 2.1288802263718937e-06, "loss": 0.015379205346107483, "step": 93820 }, { "epoch": 0.8830588235294118, "grad_norm": 0.6344599738126006, "learning_rate": 2.1288235002519825e-06, "loss": 0.01554044783115387, "step": 93825 }, { "epoch": 0.8831058823529412, "grad_norm": 0.4088010853282955, "learning_rate": 2.128766778666401e-06, "loss": 0.011382278800010682, "step": 93830 }, { "epoch": 0.8831529411764706, "grad_norm": 0.5363916235956131, "learning_rate": 2.1287100616145444e-06, "loss": 0.014861822128295898, "step": 93835 }, { "epoch": 0.8832, "grad_norm": 0.39175710202300956, "learning_rate": 2.1286533490958084e-06, "loss": 0.008346631377935409, "step": 93840 }, { "epoch": 0.8832470588235294, "grad_norm": 0.4713272932918936, "learning_rate": 2.1285966411095902e-06, "loss": 0.013591744005680084, "step": 93845 }, { "epoch": 0.8832941176470588, "grad_norm": 0.3196292941076162, "learning_rate": 2.128539937655286e-06, "loss": 0.01672607958316803, "step": 93850 }, { "epoch": 0.8833411764705882, "grad_norm": 0.502856675924638, "learning_rate": 2.128483238732291e-06, "loss": 0.015949562191963196, "step": 93855 }, { "epoch": 0.8833882352941177, "grad_norm": 0.3833309334024412, "learning_rate": 2.128426544340003e-06, "loss": 0.01507154554128647, "step": 93860 }, { "epoch": 0.8834352941176471, "grad_norm": 0.36277034344169107, "learning_rate": 2.128369854477818e-06, "loss": 0.014537326991558075, "step": 93865 }, { "epoch": 0.8834823529411765, "grad_norm": 0.5472817520489958, "learning_rate": 2.128313169145133e-06, "loss": 0.012946709990501404, "step": 93870 }, { "epoch": 0.8835294117647059, "grad_norm": 0.5934234746008327, "learning_rate": 2.1282564883413447e-06, "loss": 0.014273373782634735, "step": 93875 }, { "epoch": 0.8835764705882353, "grad_norm": 0.3689292490729994, "learning_rate": 2.1281998120658504e-06, "loss": 0.012982313334941865, "step": 93880 }, { "epoch": 0.8836235294117647, "grad_norm": 0.5291109435906456, "learning_rate": 2.128143140318047e-06, "loss": 0.014423555135726929, "step": 93885 }, { "epoch": 0.8836705882352941, "grad_norm": 0.5311260950348758, "learning_rate": 2.1280864730973312e-06, "loss": 0.013009396195411683, "step": 93890 }, { "epoch": 0.8837176470588235, "grad_norm": 0.266619663100434, "learning_rate": 2.128029810403101e-06, "loss": 0.010865416377782822, "step": 93895 }, { "epoch": 0.8837647058823529, "grad_norm": 0.5472575034298168, "learning_rate": 2.1279731522347537e-06, "loss": 0.013395853340625763, "step": 93900 }, { "epoch": 0.8838117647058824, "grad_norm": 0.7347656090745421, "learning_rate": 2.1279164985916863e-06, "loss": 0.022365228831768037, "step": 93905 }, { "epoch": 0.8838588235294118, "grad_norm": 0.47261873822093686, "learning_rate": 2.127859849473297e-06, "loss": 0.014826072752475739, "step": 93910 }, { "epoch": 0.8839058823529412, "grad_norm": 0.4905342019188828, "learning_rate": 2.127803204878983e-06, "loss": 0.015028144419193267, "step": 93915 }, { "epoch": 0.8839529411764706, "grad_norm": 0.46787323431839706, "learning_rate": 2.1277465648081434e-06, "loss": 0.012743517756462097, "step": 93920 }, { "epoch": 0.884, "grad_norm": 0.22900154117061475, "learning_rate": 2.127689929260175e-06, "loss": 0.010875510424375534, "step": 93925 }, { "epoch": 0.8840470588235294, "grad_norm": 0.3771605014438567, "learning_rate": 2.127633298234476e-06, "loss": 0.015318912267684937, "step": 93930 }, { "epoch": 0.8840941176470588, "grad_norm": 0.42927146263873467, "learning_rate": 2.1275766717304447e-06, "loss": 0.013042525947093963, "step": 93935 }, { "epoch": 0.8841411764705882, "grad_norm": 0.4487132475245456, "learning_rate": 2.12752004974748e-06, "loss": 0.01444031149148941, "step": 93940 }, { "epoch": 0.8841882352941176, "grad_norm": 0.4325911285022825, "learning_rate": 2.1274634322849794e-06, "loss": 0.01102827787399292, "step": 93945 }, { "epoch": 0.884235294117647, "grad_norm": 0.4656369781452527, "learning_rate": 2.1274068193423424e-06, "loss": 0.014970834553241729, "step": 93950 }, { "epoch": 0.8842823529411765, "grad_norm": 0.5875063400811029, "learning_rate": 2.1273502109189666e-06, "loss": 0.013897427916526794, "step": 93955 }, { "epoch": 0.8843294117647059, "grad_norm": 0.6077968837869397, "learning_rate": 2.1272936070142515e-06, "loss": 0.01182391494512558, "step": 93960 }, { "epoch": 0.8843764705882353, "grad_norm": 0.8366809118688472, "learning_rate": 2.127237007627596e-06, "loss": 0.018765676021575927, "step": 93965 }, { "epoch": 0.8844235294117647, "grad_norm": 0.2955470708489923, "learning_rate": 2.1271804127583984e-06, "loss": 0.012948770821094514, "step": 93970 }, { "epoch": 0.8844705882352941, "grad_norm": 0.2756330494384963, "learning_rate": 2.1271238224060584e-06, "loss": 0.010898968577384949, "step": 93975 }, { "epoch": 0.8845176470588235, "grad_norm": 0.615192036468574, "learning_rate": 2.127067236569976e-06, "loss": 0.019467107951641083, "step": 93980 }, { "epoch": 0.8845647058823529, "grad_norm": 0.3813884452495875, "learning_rate": 2.127010655249549e-06, "loss": 0.016448333859443665, "step": 93985 }, { "epoch": 0.8846117647058823, "grad_norm": 0.4485595546273382, "learning_rate": 2.126954078444177e-06, "loss": 0.011633199453353883, "step": 93990 }, { "epoch": 0.8846588235294117, "grad_norm": 0.6248616400581667, "learning_rate": 2.126897506153261e-06, "loss": 0.014857330918312072, "step": 93995 }, { "epoch": 0.8847058823529412, "grad_norm": 0.3923811817451609, "learning_rate": 2.126840938376199e-06, "loss": 0.012868309020996093, "step": 94000 }, { "epoch": 0.8847529411764706, "grad_norm": 0.44811665255045574, "learning_rate": 2.1267843751123914e-06, "loss": 0.01870707720518112, "step": 94005 }, { "epoch": 0.8848, "grad_norm": 0.8356572867696516, "learning_rate": 2.126727816361238e-06, "loss": 0.01425977349281311, "step": 94010 }, { "epoch": 0.8848470588235294, "grad_norm": 0.5074108878854776, "learning_rate": 2.1266712621221392e-06, "loss": 0.016535159945487977, "step": 94015 }, { "epoch": 0.8848941176470588, "grad_norm": 0.684741027972311, "learning_rate": 2.1266147123944943e-06, "loss": 0.01500716358423233, "step": 94020 }, { "epoch": 0.8849411764705882, "grad_norm": 0.4827927581026417, "learning_rate": 2.126558167177705e-06, "loss": 0.01417338103055954, "step": 94025 }, { "epoch": 0.8849882352941176, "grad_norm": 0.5195936367210514, "learning_rate": 2.1265016264711698e-06, "loss": 0.01213681697845459, "step": 94030 }, { "epoch": 0.885035294117647, "grad_norm": 0.4520684899387966, "learning_rate": 2.1264450902742904e-06, "loss": 0.014851158857345581, "step": 94035 }, { "epoch": 0.8850823529411764, "grad_norm": 0.4105966029832138, "learning_rate": 2.126388558586467e-06, "loss": 0.016320401430130006, "step": 94040 }, { "epoch": 0.8851294117647058, "grad_norm": 0.5458211984070844, "learning_rate": 2.1263320314071003e-06, "loss": 0.017306651175022125, "step": 94045 }, { "epoch": 0.8851764705882353, "grad_norm": 0.4246303701819048, "learning_rate": 2.1262755087355905e-06, "loss": 0.015366718173027039, "step": 94050 }, { "epoch": 0.8852235294117647, "grad_norm": 0.5548668934722095, "learning_rate": 2.126218990571339e-06, "loss": 0.016042426228523254, "step": 94055 }, { "epoch": 0.8852705882352941, "grad_norm": 0.40235175735650164, "learning_rate": 2.1261624769137473e-06, "loss": 0.011979715526103973, "step": 94060 }, { "epoch": 0.8853176470588235, "grad_norm": 0.4426003164480915, "learning_rate": 2.1261059677622157e-06, "loss": 0.01477770060300827, "step": 94065 }, { "epoch": 0.8853647058823529, "grad_norm": 0.3066856799400328, "learning_rate": 2.1260494631161456e-06, "loss": 0.015577864646911622, "step": 94070 }, { "epoch": 0.8854117647058823, "grad_norm": 0.5636943758727294, "learning_rate": 2.125992962974939e-06, "loss": 0.015325437486171722, "step": 94075 }, { "epoch": 0.8854588235294117, "grad_norm": 0.777567976224354, "learning_rate": 2.1259364673379963e-06, "loss": 0.01657240092754364, "step": 94080 }, { "epoch": 0.8855058823529411, "grad_norm": 0.413554543655544, "learning_rate": 2.12587997620472e-06, "loss": 0.01949112266302109, "step": 94085 }, { "epoch": 0.8855529411764705, "grad_norm": 0.5380761820943215, "learning_rate": 2.1258234895745107e-06, "loss": 0.01180577427148819, "step": 94090 }, { "epoch": 0.8856, "grad_norm": 0.4185762789754969, "learning_rate": 2.1257670074467712e-06, "loss": 0.01574268937110901, "step": 94095 }, { "epoch": 0.8856470588235295, "grad_norm": 0.7222016068111156, "learning_rate": 2.1257105298209026e-06, "loss": 0.018322713673114777, "step": 94100 }, { "epoch": 0.8856941176470589, "grad_norm": 0.308961096047447, "learning_rate": 2.1256540566963077e-06, "loss": 0.01338779628276825, "step": 94105 }, { "epoch": 0.8857411764705883, "grad_norm": 0.3889025847953107, "learning_rate": 2.1255975880723876e-06, "loss": 0.013176271319389343, "step": 94110 }, { "epoch": 0.8857882352941177, "grad_norm": 0.8955718853066136, "learning_rate": 2.1255411239485456e-06, "loss": 0.014855310320854187, "step": 94115 }, { "epoch": 0.885835294117647, "grad_norm": 0.5285968522193691, "learning_rate": 2.1254846643241837e-06, "loss": 0.012163729965686798, "step": 94120 }, { "epoch": 0.8858823529411765, "grad_norm": 0.4564455745405486, "learning_rate": 2.125428209198704e-06, "loss": 0.013550683856010437, "step": 94125 }, { "epoch": 0.8859294117647059, "grad_norm": 0.3416823643077629, "learning_rate": 2.125371758571509e-06, "loss": 0.012754791975021362, "step": 94130 }, { "epoch": 0.8859764705882353, "grad_norm": 0.600878400779763, "learning_rate": 2.125315312442002e-06, "loss": 0.012774890661239624, "step": 94135 }, { "epoch": 0.8860235294117647, "grad_norm": 0.4411098536320036, "learning_rate": 2.1252588708095855e-06, "loss": 0.015743887424468993, "step": 94140 }, { "epoch": 0.8860705882352942, "grad_norm": 0.4208132664260089, "learning_rate": 2.1252024336736616e-06, "loss": 0.014762106537818908, "step": 94145 }, { "epoch": 0.8861176470588236, "grad_norm": 0.5590872976341, "learning_rate": 2.125146001033635e-06, "loss": 0.015334966778755187, "step": 94150 }, { "epoch": 0.886164705882353, "grad_norm": 0.33348012594535537, "learning_rate": 2.125089572888907e-06, "loss": 0.010748621821403504, "step": 94155 }, { "epoch": 0.8862117647058824, "grad_norm": 0.422758946345991, "learning_rate": 2.1250331492388818e-06, "loss": 0.016129982471466065, "step": 94160 }, { "epoch": 0.8862588235294118, "grad_norm": 0.3504874784661457, "learning_rate": 2.1249767300829623e-06, "loss": 0.01223199963569641, "step": 94165 }, { "epoch": 0.8863058823529412, "grad_norm": 0.5079517449929586, "learning_rate": 2.1249203154205526e-06, "loss": 0.01492232084274292, "step": 94170 }, { "epoch": 0.8863529411764706, "grad_norm": 0.6083168860248998, "learning_rate": 2.1248639052510555e-06, "loss": 0.017204238474369048, "step": 94175 }, { "epoch": 0.8864, "grad_norm": 0.6500941165414612, "learning_rate": 2.1248074995738756e-06, "loss": 0.01687811017036438, "step": 94180 }, { "epoch": 0.8864470588235294, "grad_norm": 0.6103992133361757, "learning_rate": 2.1247510983884157e-06, "loss": 0.012656277418136597, "step": 94185 }, { "epoch": 0.8864941176470589, "grad_norm": 0.49441505770318417, "learning_rate": 2.12469470169408e-06, "loss": 0.014451445639133453, "step": 94190 }, { "epoch": 0.8865411764705883, "grad_norm": 0.7119577880418149, "learning_rate": 2.1246383094902727e-06, "loss": 0.013382357358932496, "step": 94195 }, { "epoch": 0.8865882352941177, "grad_norm": 0.3727743869155116, "learning_rate": 2.1245819217763975e-06, "loss": 0.010253890603780746, "step": 94200 }, { "epoch": 0.8866352941176471, "grad_norm": 0.39761504012202387, "learning_rate": 2.1245255385518596e-06, "loss": 0.017032040655612944, "step": 94205 }, { "epoch": 0.8866823529411765, "grad_norm": 0.645563225134027, "learning_rate": 2.1244691598160622e-06, "loss": 0.014694267511367798, "step": 94210 }, { "epoch": 0.8867294117647059, "grad_norm": 0.3962481539431214, "learning_rate": 2.12441278556841e-06, "loss": 0.012566915154457093, "step": 94215 }, { "epoch": 0.8867764705882353, "grad_norm": 0.4078960998703621, "learning_rate": 2.1243564158083078e-06, "loss": 0.012489032000303268, "step": 94220 }, { "epoch": 0.8868235294117647, "grad_norm": 0.7286955574808954, "learning_rate": 2.1243000505351606e-06, "loss": 0.0154252290725708, "step": 94225 }, { "epoch": 0.8868705882352941, "grad_norm": 0.8344055546300415, "learning_rate": 2.1242436897483726e-06, "loss": 0.01481078714132309, "step": 94230 }, { "epoch": 0.8869176470588235, "grad_norm": 0.3063168222266149, "learning_rate": 2.1241873334473486e-06, "loss": 0.013651102781295776, "step": 94235 }, { "epoch": 0.886964705882353, "grad_norm": 0.5597098237957406, "learning_rate": 2.1241309816314937e-06, "loss": 0.012842684984207153, "step": 94240 }, { "epoch": 0.8870117647058824, "grad_norm": 0.4340099691894181, "learning_rate": 2.1240746343002137e-06, "loss": 0.013946188986301422, "step": 94245 }, { "epoch": 0.8870588235294118, "grad_norm": 0.4263155418995581, "learning_rate": 2.124018291452913e-06, "loss": 0.013745298981666565, "step": 94250 }, { "epoch": 0.8871058823529412, "grad_norm": 0.5740059721523937, "learning_rate": 2.123961953088997e-06, "loss": 0.013395777344703675, "step": 94255 }, { "epoch": 0.8871529411764706, "grad_norm": 0.3623138484322387, "learning_rate": 2.1239056192078715e-06, "loss": 0.00978381633758545, "step": 94260 }, { "epoch": 0.8872, "grad_norm": 0.5099979919617905, "learning_rate": 2.1238492898089417e-06, "loss": 0.014172431826591492, "step": 94265 }, { "epoch": 0.8872470588235294, "grad_norm": 0.6985632573379634, "learning_rate": 2.1237929648916138e-06, "loss": 0.015228338539600372, "step": 94270 }, { "epoch": 0.8872941176470588, "grad_norm": 0.5798454774677079, "learning_rate": 2.123736644455293e-06, "loss": 0.01441463828086853, "step": 94275 }, { "epoch": 0.8873411764705882, "grad_norm": 0.4739739631457461, "learning_rate": 2.1236803284993855e-06, "loss": 0.012652400135993957, "step": 94280 }, { "epoch": 0.8873882352941177, "grad_norm": 0.43662012347495427, "learning_rate": 2.1236240170232973e-06, "loss": 0.012079130113124847, "step": 94285 }, { "epoch": 0.8874352941176471, "grad_norm": 0.4641892674331229, "learning_rate": 2.1235677100264336e-06, "loss": 0.012488023936748504, "step": 94290 }, { "epoch": 0.8874823529411765, "grad_norm": 0.4701541657853018, "learning_rate": 2.123511407508202e-06, "loss": 0.012222134321928025, "step": 94295 }, { "epoch": 0.8875294117647059, "grad_norm": 0.4297704500111228, "learning_rate": 2.123455109468008e-06, "loss": 0.014529910683631898, "step": 94300 }, { "epoch": 0.8875764705882353, "grad_norm": 0.48844351770018707, "learning_rate": 2.1233988159052583e-06, "loss": 0.014211800694465638, "step": 94305 }, { "epoch": 0.8876235294117647, "grad_norm": 0.5120023372845921, "learning_rate": 2.1233425268193595e-06, "loss": 0.01105148047208786, "step": 94310 }, { "epoch": 0.8876705882352941, "grad_norm": 0.5651251983617976, "learning_rate": 2.1232862422097177e-06, "loss": 0.016095651686191557, "step": 94315 }, { "epoch": 0.8877176470588235, "grad_norm": 0.49363541401847644, "learning_rate": 2.12322996207574e-06, "loss": 0.012310215085744858, "step": 94320 }, { "epoch": 0.8877647058823529, "grad_norm": 0.48553219964186606, "learning_rate": 2.1231736864168333e-06, "loss": 0.012545736134052276, "step": 94325 }, { "epoch": 0.8878117647058823, "grad_norm": 0.6382267572195214, "learning_rate": 2.1231174152324053e-06, "loss": 0.016080868244171143, "step": 94330 }, { "epoch": 0.8878588235294118, "grad_norm": 0.8944013602882552, "learning_rate": 2.123061148521862e-06, "loss": 0.016556902229785918, "step": 94335 }, { "epoch": 0.8879058823529412, "grad_norm": 0.38238776945054714, "learning_rate": 2.1230048862846107e-06, "loss": 0.012829266488552094, "step": 94340 }, { "epoch": 0.8879529411764706, "grad_norm": 0.4248923152968512, "learning_rate": 2.122948628520059e-06, "loss": 0.013686564564704896, "step": 94345 }, { "epoch": 0.888, "grad_norm": 0.3320837572393332, "learning_rate": 2.1228923752276147e-06, "loss": 0.013355778157711029, "step": 94350 }, { "epoch": 0.8880470588235294, "grad_norm": 0.41912621262605765, "learning_rate": 2.1228361264066844e-06, "loss": 0.014161121845245362, "step": 94355 }, { "epoch": 0.8880941176470588, "grad_norm": 0.5525794305704133, "learning_rate": 2.1227798820566766e-06, "loss": 0.01374097466468811, "step": 94360 }, { "epoch": 0.8881411764705882, "grad_norm": 0.541187348024473, "learning_rate": 2.1227236421769986e-06, "loss": 0.014055639505386353, "step": 94365 }, { "epoch": 0.8881882352941176, "grad_norm": 0.5803592565864598, "learning_rate": 2.1226674067670584e-06, "loss": 0.014206355810165406, "step": 94370 }, { "epoch": 0.888235294117647, "grad_norm": 0.28076082886487097, "learning_rate": 2.1226111758262648e-06, "loss": 0.014742441475391388, "step": 94375 }, { "epoch": 0.8882823529411765, "grad_norm": 0.5808806320217162, "learning_rate": 2.1225549493540235e-06, "loss": 0.011470799148082734, "step": 94380 }, { "epoch": 0.8883294117647059, "grad_norm": 0.3954257683916129, "learning_rate": 2.1224987273497456e-06, "loss": 0.013320095837116241, "step": 94385 }, { "epoch": 0.8883764705882353, "grad_norm": 0.4830239819016573, "learning_rate": 2.1224425098128375e-06, "loss": 0.01726020872592926, "step": 94390 }, { "epoch": 0.8884235294117647, "grad_norm": 0.5556375797542794, "learning_rate": 2.1223862967427078e-06, "loss": 0.012052622437477113, "step": 94395 }, { "epoch": 0.8884705882352941, "grad_norm": 0.24455935346192562, "learning_rate": 2.1223300881387655e-06, "loss": 0.01078447550535202, "step": 94400 }, { "epoch": 0.8885176470588235, "grad_norm": 0.49071391652086177, "learning_rate": 2.1222738840004194e-06, "loss": 0.014732307195663452, "step": 94405 }, { "epoch": 0.8885647058823529, "grad_norm": 0.3315445952772277, "learning_rate": 2.1222176843270775e-06, "loss": 0.010470382124185561, "step": 94410 }, { "epoch": 0.8886117647058823, "grad_norm": 0.5012874855198062, "learning_rate": 2.1221614891181494e-06, "loss": 0.013314427435398101, "step": 94415 }, { "epoch": 0.8886588235294117, "grad_norm": 0.532311143204341, "learning_rate": 2.1221052983730434e-06, "loss": 0.012556573748588562, "step": 94420 }, { "epoch": 0.8887058823529411, "grad_norm": 0.5076380544572276, "learning_rate": 2.1220491120911687e-06, "loss": 0.011545489728450774, "step": 94425 }, { "epoch": 0.8887529411764706, "grad_norm": 0.5673086891420919, "learning_rate": 2.1219929302719346e-06, "loss": 0.011896188557147979, "step": 94430 }, { "epoch": 0.8888, "grad_norm": 0.7055382214304108, "learning_rate": 2.121936752914751e-06, "loss": 0.01605373024940491, "step": 94435 }, { "epoch": 0.8888470588235294, "grad_norm": 0.48798611958816046, "learning_rate": 2.1218805800190263e-06, "loss": 0.011120064556598664, "step": 94440 }, { "epoch": 0.8888941176470588, "grad_norm": 0.5414262824953072, "learning_rate": 2.1218244115841706e-06, "loss": 0.01665761023759842, "step": 94445 }, { "epoch": 0.8889411764705882, "grad_norm": 0.5360115430558735, "learning_rate": 2.1217682476095934e-06, "loss": 0.015190730988979339, "step": 94450 }, { "epoch": 0.8889882352941176, "grad_norm": 0.46530273524952365, "learning_rate": 2.1217120880947044e-06, "loss": 0.01243423968553543, "step": 94455 }, { "epoch": 0.889035294117647, "grad_norm": 0.43031599656144764, "learning_rate": 2.1216559330389128e-06, "loss": 0.012983320653438568, "step": 94460 }, { "epoch": 0.8890823529411764, "grad_norm": 0.4261291548912005, "learning_rate": 2.1215997824416294e-06, "loss": 0.015793877840042114, "step": 94465 }, { "epoch": 0.8891294117647058, "grad_norm": 0.9528753550118388, "learning_rate": 2.121543636302264e-06, "loss": 0.015380468964576722, "step": 94470 }, { "epoch": 0.8891764705882353, "grad_norm": 0.589307959905078, "learning_rate": 2.1214874946202273e-06, "loss": 0.014996486902236938, "step": 94475 }, { "epoch": 0.8892235294117647, "grad_norm": 0.48627864063446763, "learning_rate": 2.121431357394928e-06, "loss": 0.014252334833145142, "step": 94480 }, { "epoch": 0.8892705882352941, "grad_norm": 0.3766521680406253, "learning_rate": 2.121375224625778e-06, "loss": 0.015224994719028473, "step": 94485 }, { "epoch": 0.8893176470588235, "grad_norm": 0.5247951747945989, "learning_rate": 2.121319096312187e-06, "loss": 0.013379023969173431, "step": 94490 }, { "epoch": 0.889364705882353, "grad_norm": 0.5475221104811809, "learning_rate": 2.1212629724535665e-06, "loss": 0.011944060027599335, "step": 94495 }, { "epoch": 0.8894117647058823, "grad_norm": 0.5879633629274713, "learning_rate": 2.1212068530493256e-06, "loss": 0.021172285079956055, "step": 94500 }, { "epoch": 0.8894588235294117, "grad_norm": 0.5823992866662683, "learning_rate": 2.1211507380988766e-06, "loss": 0.0175134539604187, "step": 94505 }, { "epoch": 0.8895058823529411, "grad_norm": 0.3408154989069552, "learning_rate": 2.1210946276016297e-06, "loss": 0.014138039946556092, "step": 94510 }, { "epoch": 0.8895529411764705, "grad_norm": 0.4550992670541415, "learning_rate": 2.1210385215569966e-06, "loss": 0.015682299435138703, "step": 94515 }, { "epoch": 0.8896, "grad_norm": 0.4524633568614669, "learning_rate": 2.120982419964387e-06, "loss": 0.014121624827384948, "step": 94520 }, { "epoch": 0.8896470588235295, "grad_norm": 0.3473428984212345, "learning_rate": 2.120926322823214e-06, "loss": 0.01223669871687889, "step": 94525 }, { "epoch": 0.8896941176470589, "grad_norm": 0.7768022197146923, "learning_rate": 2.1208702301328877e-06, "loss": 0.0139188751578331, "step": 94530 }, { "epoch": 0.8897411764705883, "grad_norm": 0.7689064436703208, "learning_rate": 2.1208141418928198e-06, "loss": 0.014102911949157715, "step": 94535 }, { "epoch": 0.8897882352941177, "grad_norm": 0.8443123740807997, "learning_rate": 2.120758058102422e-06, "loss": 0.01281598061323166, "step": 94540 }, { "epoch": 0.8898352941176471, "grad_norm": 0.3447086840764966, "learning_rate": 2.1207019787611066e-06, "loss": 0.013803009688854218, "step": 94545 }, { "epoch": 0.8898823529411765, "grad_norm": 0.5187368091677352, "learning_rate": 2.120645903868285e-06, "loss": 0.014294146001338959, "step": 94550 }, { "epoch": 0.8899294117647059, "grad_norm": 0.37208528433648536, "learning_rate": 2.1205898334233678e-06, "loss": 0.008405427634716033, "step": 94555 }, { "epoch": 0.8899764705882353, "grad_norm": 0.3204977767941579, "learning_rate": 2.120533767425769e-06, "loss": 0.012409961223602295, "step": 94560 }, { "epoch": 0.8900235294117647, "grad_norm": 0.6731658924930913, "learning_rate": 2.120477705874899e-06, "loss": 0.014235544204711913, "step": 94565 }, { "epoch": 0.8900705882352942, "grad_norm": 0.48640294383672805, "learning_rate": 2.1204216487701713e-06, "loss": 0.013212138414382934, "step": 94570 }, { "epoch": 0.8901176470588236, "grad_norm": 0.5540594223510872, "learning_rate": 2.120365596110998e-06, "loss": 0.012826672196388245, "step": 94575 }, { "epoch": 0.890164705882353, "grad_norm": 0.2798590354801967, "learning_rate": 2.120309547896791e-06, "loss": 0.010447150468826294, "step": 94580 }, { "epoch": 0.8902117647058824, "grad_norm": 0.6379639462620115, "learning_rate": 2.120253504126963e-06, "loss": 0.015299282968044281, "step": 94585 }, { "epoch": 0.8902588235294118, "grad_norm": 0.4903628767223458, "learning_rate": 2.1201974648009274e-06, "loss": 0.015902404487133027, "step": 94590 }, { "epoch": 0.8903058823529412, "grad_norm": 0.5872078345158157, "learning_rate": 2.1201414299180966e-06, "loss": 0.01860995590686798, "step": 94595 }, { "epoch": 0.8903529411764706, "grad_norm": 0.6791181920999466, "learning_rate": 2.1200853994778823e-06, "loss": 0.014840133488178253, "step": 94600 }, { "epoch": 0.8904, "grad_norm": 0.3862704026562204, "learning_rate": 2.1200293734797e-06, "loss": 0.012002497166395187, "step": 94605 }, { "epoch": 0.8904470588235294, "grad_norm": 0.3197950542034396, "learning_rate": 2.1199733519229597e-06, "loss": 0.011490912735462188, "step": 94610 }, { "epoch": 0.8904941176470588, "grad_norm": 0.38934888362742254, "learning_rate": 2.1199173348070774e-06, "loss": 0.013258150219917298, "step": 94615 }, { "epoch": 0.8905411764705883, "grad_norm": 0.4995579204175547, "learning_rate": 2.119861322131465e-06, "loss": 0.010095646977424622, "step": 94620 }, { "epoch": 0.8905882352941177, "grad_norm": 0.5326596236336419, "learning_rate": 2.119805313895536e-06, "loss": 0.013903719186782838, "step": 94625 }, { "epoch": 0.8906352941176471, "grad_norm": 0.4828600483534953, "learning_rate": 2.1197493100987036e-06, "loss": 0.015525934100151063, "step": 94630 }, { "epoch": 0.8906823529411765, "grad_norm": 0.440458017472792, "learning_rate": 2.1196933107403825e-06, "loss": 0.01702828109264374, "step": 94635 }, { "epoch": 0.8907294117647059, "grad_norm": 0.4947726886875574, "learning_rate": 2.119637315819986e-06, "loss": 0.015770763158798218, "step": 94640 }, { "epoch": 0.8907764705882353, "grad_norm": 0.41289065802358454, "learning_rate": 2.1195813253369276e-06, "loss": 0.01175404042005539, "step": 94645 }, { "epoch": 0.8908235294117647, "grad_norm": 0.3405116268062256, "learning_rate": 2.1195253392906216e-06, "loss": 0.012241221964359283, "step": 94650 }, { "epoch": 0.8908705882352941, "grad_norm": 0.6306133623088602, "learning_rate": 2.1194693576804816e-06, "loss": 0.01281656175851822, "step": 94655 }, { "epoch": 0.8909176470588235, "grad_norm": 0.3994774445073258, "learning_rate": 2.119413380505923e-06, "loss": 0.01440315842628479, "step": 94660 }, { "epoch": 0.890964705882353, "grad_norm": 0.5886405267674616, "learning_rate": 2.1193574077663583e-06, "loss": 0.013904930651187896, "step": 94665 }, { "epoch": 0.8910117647058824, "grad_norm": 0.7008437565206473, "learning_rate": 2.119301439461203e-06, "loss": 0.013912610709667206, "step": 94670 }, { "epoch": 0.8910588235294118, "grad_norm": 0.306857078572668, "learning_rate": 2.1192454755898715e-06, "loss": 0.011021692305803299, "step": 94675 }, { "epoch": 0.8911058823529412, "grad_norm": 0.4074247748067607, "learning_rate": 2.119189516151779e-06, "loss": 0.013011562824249267, "step": 94680 }, { "epoch": 0.8911529411764706, "grad_norm": 0.45311840696060274, "learning_rate": 2.119133561146339e-06, "loss": 0.013175001740455628, "step": 94685 }, { "epoch": 0.8912, "grad_norm": 0.28617469032658355, "learning_rate": 2.1190776105729674e-06, "loss": 0.011761520802974702, "step": 94690 }, { "epoch": 0.8912470588235294, "grad_norm": 0.5202438507963281, "learning_rate": 2.1190216644310786e-06, "loss": 0.01717207729816437, "step": 94695 }, { "epoch": 0.8912941176470588, "grad_norm": 0.28566031598507124, "learning_rate": 2.1189657227200876e-06, "loss": 0.009032425284385682, "step": 94700 }, { "epoch": 0.8913411764705882, "grad_norm": 0.3651876456230006, "learning_rate": 2.1189097854394093e-06, "loss": 0.015422829985618591, "step": 94705 }, { "epoch": 0.8913882352941176, "grad_norm": 0.4108564217687696, "learning_rate": 2.11885385258846e-06, "loss": 0.015463799238204956, "step": 94710 }, { "epoch": 0.8914352941176471, "grad_norm": 0.4230644408341857, "learning_rate": 2.118797924166654e-06, "loss": 0.012384416908025742, "step": 94715 }, { "epoch": 0.8914823529411765, "grad_norm": 0.43022160965146894, "learning_rate": 2.1187420001734076e-06, "loss": 0.01260460466146469, "step": 94720 }, { "epoch": 0.8915294117647059, "grad_norm": 0.35795440753723107, "learning_rate": 2.1186860806081354e-06, "loss": 0.0120513916015625, "step": 94725 }, { "epoch": 0.8915764705882353, "grad_norm": 0.4142485914491862, "learning_rate": 2.118630165470254e-06, "loss": 0.010813076794147492, "step": 94730 }, { "epoch": 0.8916235294117647, "grad_norm": 0.5713173723397137, "learning_rate": 2.118574254759179e-06, "loss": 0.014886975288391113, "step": 94735 }, { "epoch": 0.8916705882352941, "grad_norm": 0.327513469425592, "learning_rate": 2.118518348474326e-06, "loss": 0.015263734757900238, "step": 94740 }, { "epoch": 0.8917176470588235, "grad_norm": 0.37827462903781994, "learning_rate": 2.118462446615111e-06, "loss": 0.012264075875282287, "step": 94745 }, { "epoch": 0.8917647058823529, "grad_norm": 0.4383598602311369, "learning_rate": 2.118406549180951e-06, "loss": 0.01328323781490326, "step": 94750 }, { "epoch": 0.8918117647058823, "grad_norm": 0.39049843944402374, "learning_rate": 2.1183506561712617e-06, "loss": 0.013572317361831666, "step": 94755 }, { "epoch": 0.8918588235294118, "grad_norm": 0.4234387939877401, "learning_rate": 2.118294767585459e-06, "loss": 0.011603021621704101, "step": 94760 }, { "epoch": 0.8919058823529412, "grad_norm": 0.4467326874850586, "learning_rate": 2.1182388834229596e-06, "loss": 0.030870190262794493, "step": 94765 }, { "epoch": 0.8919529411764706, "grad_norm": 0.5312616003825597, "learning_rate": 2.11818300368318e-06, "loss": 0.010664668679237366, "step": 94770 }, { "epoch": 0.892, "grad_norm": 0.5552436282299328, "learning_rate": 2.1181271283655373e-06, "loss": 0.016076701879501342, "step": 94775 }, { "epoch": 0.8920470588235294, "grad_norm": 0.3969382447110245, "learning_rate": 2.118071257469448e-06, "loss": 0.016754338145256044, "step": 94780 }, { "epoch": 0.8920941176470588, "grad_norm": 0.5655818561548679, "learning_rate": 2.1180153909943286e-06, "loss": 0.015475012362003326, "step": 94785 }, { "epoch": 0.8921411764705882, "grad_norm": 0.513137660128406, "learning_rate": 2.117959528939597e-06, "loss": 0.013109743595123291, "step": 94790 }, { "epoch": 0.8921882352941176, "grad_norm": 0.4706770114747036, "learning_rate": 2.1179036713046697e-06, "loss": 0.014293873310089111, "step": 94795 }, { "epoch": 0.892235294117647, "grad_norm": 0.40947047761556554, "learning_rate": 2.1178478180889638e-06, "loss": 0.012534214556217194, "step": 94800 }, { "epoch": 0.8922823529411764, "grad_norm": 0.6480153112382893, "learning_rate": 2.1177919692918967e-06, "loss": 0.027092528343200684, "step": 94805 }, { "epoch": 0.8923294117647059, "grad_norm": 0.3971173797052234, "learning_rate": 2.117736124912886e-06, "loss": 0.016638723015785218, "step": 94810 }, { "epoch": 0.8923764705882353, "grad_norm": 0.5758276900485635, "learning_rate": 2.1176802849513494e-06, "loss": 0.015126372873783111, "step": 94815 }, { "epoch": 0.8924235294117647, "grad_norm": 0.33002209019077555, "learning_rate": 2.1176244494067036e-06, "loss": 0.015767940878868104, "step": 94820 }, { "epoch": 0.8924705882352941, "grad_norm": 0.4480730672730779, "learning_rate": 2.1175686182783674e-06, "loss": 0.0114600770175457, "step": 94825 }, { "epoch": 0.8925176470588235, "grad_norm": 0.46286219008995, "learning_rate": 2.1175127915657587e-06, "loss": 0.015943080186843872, "step": 94830 }, { "epoch": 0.8925647058823529, "grad_norm": 0.7607671580999754, "learning_rate": 2.1174569692682944e-06, "loss": 0.013018836081027985, "step": 94835 }, { "epoch": 0.8926117647058823, "grad_norm": 0.6052212397995422, "learning_rate": 2.1174011513853935e-06, "loss": 0.015176828205585479, "step": 94840 }, { "epoch": 0.8926588235294117, "grad_norm": 0.441560826811345, "learning_rate": 2.1173453379164736e-06, "loss": 0.011643646657466889, "step": 94845 }, { "epoch": 0.8927058823529411, "grad_norm": 0.45727815330903493, "learning_rate": 2.1172895288609537e-06, "loss": 0.01656791865825653, "step": 94850 }, { "epoch": 0.8927529411764706, "grad_norm": 0.7030471310567932, "learning_rate": 2.1172337242182513e-06, "loss": 0.018004333972930907, "step": 94855 }, { "epoch": 0.8928, "grad_norm": 0.5122579911470417, "learning_rate": 2.1171779239877856e-06, "loss": 0.012943938374519348, "step": 94860 }, { "epoch": 0.8928470588235294, "grad_norm": 0.8135235467237809, "learning_rate": 2.117122128168975e-06, "loss": 0.016895544528961182, "step": 94865 }, { "epoch": 0.8928941176470588, "grad_norm": 0.3893165354987263, "learning_rate": 2.1170663367612377e-06, "loss": 0.011938130110502243, "step": 94870 }, { "epoch": 0.8929411764705882, "grad_norm": 0.7773882278316865, "learning_rate": 2.1170105497639935e-06, "loss": 0.01498262882232666, "step": 94875 }, { "epoch": 0.8929882352941176, "grad_norm": 1.995985126656009, "learning_rate": 2.1169547671766603e-06, "loss": 0.01392270028591156, "step": 94880 }, { "epoch": 0.893035294117647, "grad_norm": 0.6377272374350091, "learning_rate": 2.1168989889986583e-06, "loss": 0.016058212518692015, "step": 94885 }, { "epoch": 0.8930823529411764, "grad_norm": 0.4516921824527867, "learning_rate": 2.1168432152294053e-06, "loss": 0.012012405693531037, "step": 94890 }, { "epoch": 0.8931294117647058, "grad_norm": 0.5114381024705114, "learning_rate": 2.1167874458683217e-06, "loss": 0.011451886594295501, "step": 94895 }, { "epoch": 0.8931764705882353, "grad_norm": 0.4445613137190865, "learning_rate": 2.116731680914826e-06, "loss": 0.014464497566223145, "step": 94900 }, { "epoch": 0.8932235294117647, "grad_norm": 0.47363507479492944, "learning_rate": 2.116675920368338e-06, "loss": 0.016795487701892854, "step": 94905 }, { "epoch": 0.8932705882352941, "grad_norm": 0.745106259160904, "learning_rate": 2.1166201642282776e-06, "loss": 0.015400491654872894, "step": 94910 }, { "epoch": 0.8933176470588235, "grad_norm": 0.791470229361932, "learning_rate": 2.1165644124940636e-06, "loss": 0.016390493512153624, "step": 94915 }, { "epoch": 0.893364705882353, "grad_norm": 0.5598180168967507, "learning_rate": 2.1165086651651168e-06, "loss": 0.011972470581531525, "step": 94920 }, { "epoch": 0.8934117647058823, "grad_norm": 0.5445583368789654, "learning_rate": 2.116452922240857e-06, "loss": 0.014421124756336213, "step": 94925 }, { "epoch": 0.8934588235294117, "grad_norm": 0.4645766972533362, "learning_rate": 2.116397183720703e-06, "loss": 0.00958993285894394, "step": 94930 }, { "epoch": 0.8935058823529411, "grad_norm": 0.7566246161423488, "learning_rate": 2.116341449604076e-06, "loss": 0.012304554879665374, "step": 94935 }, { "epoch": 0.8935529411764705, "grad_norm": 0.5044098038862516, "learning_rate": 2.116285719890396e-06, "loss": 0.01680179834365845, "step": 94940 }, { "epoch": 0.8936, "grad_norm": 1.022418884977139, "learning_rate": 2.116229994579083e-06, "loss": 0.014682993292808533, "step": 94945 }, { "epoch": 0.8936470588235295, "grad_norm": 0.4475593143601995, "learning_rate": 2.1161742736695574e-06, "loss": 0.013614675402641297, "step": 94950 }, { "epoch": 0.8936941176470589, "grad_norm": 0.837326762853818, "learning_rate": 2.116118557161241e-06, "loss": 0.01809457689523697, "step": 94955 }, { "epoch": 0.8937411764705883, "grad_norm": 0.583524726438198, "learning_rate": 2.116062845053553e-06, "loss": 0.01289558708667755, "step": 94960 }, { "epoch": 0.8937882352941177, "grad_norm": 0.4727639048297063, "learning_rate": 2.1160071373459144e-06, "loss": 0.013883979618549347, "step": 94965 }, { "epoch": 0.8938352941176471, "grad_norm": 0.6777962629780178, "learning_rate": 2.1159514340377463e-06, "loss": 0.014460667967796326, "step": 94970 }, { "epoch": 0.8938823529411765, "grad_norm": 0.5871408600288214, "learning_rate": 2.1158957351284693e-06, "loss": 0.01645478755235672, "step": 94975 }, { "epoch": 0.8939294117647059, "grad_norm": 0.5669858478216835, "learning_rate": 2.115840040617505e-06, "loss": 0.013935220241546632, "step": 94980 }, { "epoch": 0.8939764705882353, "grad_norm": 0.5060824224769431, "learning_rate": 2.1157843505042746e-06, "loss": 0.015551628172397613, "step": 94985 }, { "epoch": 0.8940235294117647, "grad_norm": 0.6027062690979663, "learning_rate": 2.1157286647881986e-06, "loss": 0.014325276017189026, "step": 94990 }, { "epoch": 0.8940705882352942, "grad_norm": 0.5260702281969949, "learning_rate": 2.115672983468699e-06, "loss": 0.014508669078350068, "step": 94995 }, { "epoch": 0.8941176470588236, "grad_norm": 0.3730977032283162, "learning_rate": 2.115617306545198e-06, "loss": 0.010799185931682586, "step": 95000 }, { "epoch": 0.894164705882353, "grad_norm": 0.5576558109282675, "learning_rate": 2.1155616340171152e-06, "loss": 0.014467427134513855, "step": 95005 }, { "epoch": 0.8942117647058824, "grad_norm": 0.45432257736550896, "learning_rate": 2.115505965883874e-06, "loss": 0.010982812196016312, "step": 95010 }, { "epoch": 0.8942588235294118, "grad_norm": 0.5156965619238691, "learning_rate": 2.1154503021448954e-06, "loss": 0.01067090630531311, "step": 95015 }, { "epoch": 0.8943058823529412, "grad_norm": 0.4713372158665483, "learning_rate": 2.1153946427996016e-06, "loss": 0.011135125160217285, "step": 95020 }, { "epoch": 0.8943529411764706, "grad_norm": 0.6148822955337735, "learning_rate": 2.115338987847415e-06, "loss": 0.013883855938911439, "step": 95025 }, { "epoch": 0.8944, "grad_norm": 0.5966128092361513, "learning_rate": 2.115283337287757e-06, "loss": 0.01669164001941681, "step": 95030 }, { "epoch": 0.8944470588235294, "grad_norm": 0.5498656753929069, "learning_rate": 2.1152276911200503e-06, "loss": 0.01696318984031677, "step": 95035 }, { "epoch": 0.8944941176470588, "grad_norm": 0.8630025875660219, "learning_rate": 2.115172049343717e-06, "loss": 0.011878082156181335, "step": 95040 }, { "epoch": 0.8945411764705883, "grad_norm": 0.5054286080435232, "learning_rate": 2.1151164119581796e-06, "loss": 0.01521962434053421, "step": 95045 }, { "epoch": 0.8945882352941177, "grad_norm": 0.40324927016585144, "learning_rate": 2.1150607789628606e-06, "loss": 0.01562925726175308, "step": 95050 }, { "epoch": 0.8946352941176471, "grad_norm": 0.5053975677310979, "learning_rate": 2.1150051503571835e-06, "loss": 0.016694816946983337, "step": 95055 }, { "epoch": 0.8946823529411765, "grad_norm": 0.46942193848202884, "learning_rate": 2.1149495261405695e-06, "loss": 0.013959650695323945, "step": 95060 }, { "epoch": 0.8947294117647059, "grad_norm": 0.7066422985042724, "learning_rate": 2.1148939063124425e-06, "loss": 0.016082023084163666, "step": 95065 }, { "epoch": 0.8947764705882353, "grad_norm": 0.41668229840664545, "learning_rate": 2.1148382908722257e-06, "loss": 0.011411570012569427, "step": 95070 }, { "epoch": 0.8948235294117647, "grad_norm": 0.5211838808552349, "learning_rate": 2.1147826798193418e-06, "loss": 0.014074334502220153, "step": 95075 }, { "epoch": 0.8948705882352941, "grad_norm": 0.7696161693051246, "learning_rate": 2.114727073153213e-06, "loss": 0.02218013405799866, "step": 95080 }, { "epoch": 0.8949176470588235, "grad_norm": 0.4600625554429221, "learning_rate": 2.114671470873264e-06, "loss": 0.012063486874103546, "step": 95085 }, { "epoch": 0.894964705882353, "grad_norm": 0.4800977896802737, "learning_rate": 2.1146158729789185e-06, "loss": 0.013717108964920044, "step": 95090 }, { "epoch": 0.8950117647058824, "grad_norm": 0.42133205986724875, "learning_rate": 2.114560279469599e-06, "loss": 0.012740874290466308, "step": 95095 }, { "epoch": 0.8950588235294118, "grad_norm": 0.4922335192201883, "learning_rate": 2.114504690344729e-06, "loss": 0.012226563692092896, "step": 95100 }, { "epoch": 0.8951058823529412, "grad_norm": 0.367466894121878, "learning_rate": 2.1144491056037327e-06, "loss": 0.01582358330488205, "step": 95105 }, { "epoch": 0.8951529411764706, "grad_norm": 0.36703264574951666, "learning_rate": 2.114393525246034e-06, "loss": 0.014279107749462127, "step": 95110 }, { "epoch": 0.8952, "grad_norm": 0.35743987764278057, "learning_rate": 2.1143379492710566e-06, "loss": 0.014389148354530335, "step": 95115 }, { "epoch": 0.8952470588235294, "grad_norm": 0.3660565490345868, "learning_rate": 2.1142823776782245e-06, "loss": 0.012387506663799286, "step": 95120 }, { "epoch": 0.8952941176470588, "grad_norm": 0.3962305250678654, "learning_rate": 2.1142268104669626e-06, "loss": 0.017717379331588744, "step": 95125 }, { "epoch": 0.8953411764705882, "grad_norm": 0.4262435548661868, "learning_rate": 2.114171247636694e-06, "loss": 0.012683829665184021, "step": 95130 }, { "epoch": 0.8953882352941176, "grad_norm": 0.8030907314421939, "learning_rate": 2.1141156891868435e-06, "loss": 0.016518697142601013, "step": 95135 }, { "epoch": 0.8954352941176471, "grad_norm": 1.0502038006718806, "learning_rate": 2.1140601351168356e-06, "loss": 0.013321763277053833, "step": 95140 }, { "epoch": 0.8954823529411765, "grad_norm": 0.43812424032568564, "learning_rate": 2.1140045854260953e-06, "loss": 0.010852809250354766, "step": 95145 }, { "epoch": 0.8955294117647059, "grad_norm": 0.45224630819444844, "learning_rate": 2.113949040114047e-06, "loss": 0.014978645741939545, "step": 95150 }, { "epoch": 0.8955764705882353, "grad_norm": 0.49833666097258134, "learning_rate": 2.113893499180115e-06, "loss": 0.01434485912322998, "step": 95155 }, { "epoch": 0.8956235294117647, "grad_norm": 0.46277475754483244, "learning_rate": 2.113837962623724e-06, "loss": 0.012399470061063766, "step": 95160 }, { "epoch": 0.8956705882352941, "grad_norm": 0.3588902882991413, "learning_rate": 2.1137824304443e-06, "loss": 0.015088708698749542, "step": 95165 }, { "epoch": 0.8957176470588235, "grad_norm": 0.7636472719183447, "learning_rate": 2.113726902641268e-06, "loss": 0.013428467512130737, "step": 95170 }, { "epoch": 0.8957647058823529, "grad_norm": 0.47265725166233796, "learning_rate": 2.1136713792140527e-06, "loss": 0.011082415282726289, "step": 95175 }, { "epoch": 0.8958117647058823, "grad_norm": 0.4544392622885359, "learning_rate": 2.1136158601620792e-06, "loss": 0.015461081266403198, "step": 95180 }, { "epoch": 0.8958588235294118, "grad_norm": 0.5556214111262113, "learning_rate": 2.113560345484774e-06, "loss": 0.014472161233425141, "step": 95185 }, { "epoch": 0.8959058823529412, "grad_norm": 0.655723620502586, "learning_rate": 2.113504835181561e-06, "loss": 0.014091964066028594, "step": 95190 }, { "epoch": 0.8959529411764706, "grad_norm": 0.34158420228201486, "learning_rate": 2.113449329251867e-06, "loss": 0.017221392691135408, "step": 95195 }, { "epoch": 0.896, "grad_norm": 0.5073683968440358, "learning_rate": 2.1133938276951177e-06, "loss": 0.013006749749183654, "step": 95200 }, { "epoch": 0.8960470588235294, "grad_norm": 0.41087297858305727, "learning_rate": 2.113338330510739e-06, "loss": 0.014643499255180359, "step": 95205 }, { "epoch": 0.8960941176470588, "grad_norm": 0.37384137163084313, "learning_rate": 2.1132828376981564e-06, "loss": 0.01468479037284851, "step": 95210 }, { "epoch": 0.8961411764705882, "grad_norm": 0.6155935877491294, "learning_rate": 2.113227349256795e-06, "loss": 0.012919847667217255, "step": 95215 }, { "epoch": 0.8961882352941176, "grad_norm": 0.5270966199134294, "learning_rate": 2.1131718651860832e-06, "loss": 0.01443517804145813, "step": 95220 }, { "epoch": 0.896235294117647, "grad_norm": 0.3188483263352151, "learning_rate": 2.1131163854854454e-06, "loss": 0.015180903673171996, "step": 95225 }, { "epoch": 0.8962823529411764, "grad_norm": 0.3546272164230311, "learning_rate": 2.113060910154309e-06, "loss": 0.013861243426799775, "step": 95230 }, { "epoch": 0.8963294117647059, "grad_norm": 0.4014472436723959, "learning_rate": 2.1130054391921e-06, "loss": 0.016676679253578186, "step": 95235 }, { "epoch": 0.8963764705882353, "grad_norm": 0.5793661138721563, "learning_rate": 2.1129499725982455e-06, "loss": 0.018589667975902557, "step": 95240 }, { "epoch": 0.8964235294117647, "grad_norm": 0.4137450952176634, "learning_rate": 2.1128945103721714e-06, "loss": 0.013564558327198028, "step": 95245 }, { "epoch": 0.8964705882352941, "grad_norm": 0.447695664817118, "learning_rate": 2.112839052513305e-06, "loss": 0.01515323668718338, "step": 95250 }, { "epoch": 0.8965176470588235, "grad_norm": 0.4123917293199306, "learning_rate": 2.1127835990210736e-06, "loss": 0.01573508232831955, "step": 95255 }, { "epoch": 0.8965647058823529, "grad_norm": 0.5230612546885568, "learning_rate": 2.112728149894903e-06, "loss": 0.015801307559013367, "step": 95260 }, { "epoch": 0.8966117647058823, "grad_norm": 0.2956110715340015, "learning_rate": 2.112672705134221e-06, "loss": 0.013035620748996734, "step": 95265 }, { "epoch": 0.8966588235294117, "grad_norm": 0.5327084397858942, "learning_rate": 2.112617264738455e-06, "loss": 0.01699296534061432, "step": 95270 }, { "epoch": 0.8967058823529411, "grad_norm": 0.549324829624785, "learning_rate": 2.112561828707032e-06, "loss": 0.014413791894912719, "step": 95275 }, { "epoch": 0.8967529411764706, "grad_norm": 0.4334458700011044, "learning_rate": 2.1125063970393797e-06, "loss": 0.011220208555459976, "step": 95280 }, { "epoch": 0.8968, "grad_norm": 0.42333065038311257, "learning_rate": 2.1124509697349248e-06, "loss": 0.013182634115219116, "step": 95285 }, { "epoch": 0.8968470588235294, "grad_norm": 0.3841115751054331, "learning_rate": 2.112395546793096e-06, "loss": 0.014584621787071228, "step": 95290 }, { "epoch": 0.8968941176470588, "grad_norm": 0.5952222304891293, "learning_rate": 2.112340128213321e-06, "loss": 0.015730804204940795, "step": 95295 }, { "epoch": 0.8969411764705882, "grad_norm": 0.7026931289499623, "learning_rate": 2.1122847139950266e-06, "loss": 0.01446220576763153, "step": 95300 }, { "epoch": 0.8969882352941176, "grad_norm": 0.367864681602878, "learning_rate": 2.112229304137642e-06, "loss": 0.018918488919734956, "step": 95305 }, { "epoch": 0.897035294117647, "grad_norm": 0.5362702321597371, "learning_rate": 2.112173898640594e-06, "loss": 0.013604938983917236, "step": 95310 }, { "epoch": 0.8970823529411764, "grad_norm": 0.5310626366239649, "learning_rate": 2.1121184975033114e-06, "loss": 0.013446037471294404, "step": 95315 }, { "epoch": 0.8971294117647058, "grad_norm": 0.5394743793174653, "learning_rate": 2.112063100725223e-06, "loss": 0.01595628559589386, "step": 95320 }, { "epoch": 0.8971764705882352, "grad_norm": 0.4724154902433035, "learning_rate": 2.1120077083057563e-06, "loss": 0.01392650306224823, "step": 95325 }, { "epoch": 0.8972235294117648, "grad_norm": 0.38071499197365805, "learning_rate": 2.11195232024434e-06, "loss": 0.015770548582077028, "step": 95330 }, { "epoch": 0.8972705882352942, "grad_norm": 0.5467344343884307, "learning_rate": 2.111896936540403e-06, "loss": 0.015632966160774232, "step": 95335 }, { "epoch": 0.8973176470588236, "grad_norm": 0.5267216297847314, "learning_rate": 2.1118415571933735e-06, "loss": 0.013310274481773377, "step": 95340 }, { "epoch": 0.897364705882353, "grad_norm": 0.9516929629143972, "learning_rate": 2.1117861822026805e-06, "loss": 0.013941387832164764, "step": 95345 }, { "epoch": 0.8974117647058824, "grad_norm": 0.4544827631507427, "learning_rate": 2.1117308115677527e-06, "loss": 0.012866321206092834, "step": 95350 }, { "epoch": 0.8974588235294118, "grad_norm": 0.6569686197740474, "learning_rate": 2.1116754452880194e-06, "loss": 0.016934938728809357, "step": 95355 }, { "epoch": 0.8975058823529412, "grad_norm": 0.37952786740528993, "learning_rate": 2.11162008336291e-06, "loss": 0.012498107552528382, "step": 95360 }, { "epoch": 0.8975529411764706, "grad_norm": 0.525882116239008, "learning_rate": 2.1115647257918526e-06, "loss": 0.016035348176956177, "step": 95365 }, { "epoch": 0.8976, "grad_norm": 0.2774991043013269, "learning_rate": 2.111509372574278e-06, "loss": 0.007776564359664917, "step": 95370 }, { "epoch": 0.8976470588235295, "grad_norm": 0.38826098177612245, "learning_rate": 2.1114540237096144e-06, "loss": 0.013625900447368621, "step": 95375 }, { "epoch": 0.8976941176470589, "grad_norm": 0.5389727550530858, "learning_rate": 2.111398679197292e-06, "loss": 0.014268492162227631, "step": 95380 }, { "epoch": 0.8977411764705883, "grad_norm": 0.3583934366583589, "learning_rate": 2.11134333903674e-06, "loss": 0.01367546319961548, "step": 95385 }, { "epoch": 0.8977882352941177, "grad_norm": 0.5763130630838211, "learning_rate": 2.111288003227388e-06, "loss": 0.013667696714401245, "step": 95390 }, { "epoch": 0.8978352941176471, "grad_norm": 0.7294879609493411, "learning_rate": 2.111232671768666e-06, "loss": 0.012569102644920348, "step": 95395 }, { "epoch": 0.8978823529411765, "grad_norm": 0.45849994025505764, "learning_rate": 2.1111773446600045e-06, "loss": 0.013335999846458436, "step": 95400 }, { "epoch": 0.8979294117647059, "grad_norm": 0.45765842927011235, "learning_rate": 2.111122021900833e-06, "loss": 0.014382216334342956, "step": 95405 }, { "epoch": 0.8979764705882353, "grad_norm": 0.40138965080285055, "learning_rate": 2.111066703490581e-06, "loss": 0.013281580805778504, "step": 95410 }, { "epoch": 0.8980235294117647, "grad_norm": 0.5134793708958746, "learning_rate": 2.1110113894286807e-06, "loss": 0.013038536906242371, "step": 95415 }, { "epoch": 0.8980705882352941, "grad_norm": 0.5954085076616324, "learning_rate": 2.1109560797145606e-06, "loss": 0.01481141448020935, "step": 95420 }, { "epoch": 0.8981176470588236, "grad_norm": 0.3923479101235959, "learning_rate": 2.110900774347652e-06, "loss": 0.014868900179862976, "step": 95425 }, { "epoch": 0.898164705882353, "grad_norm": 0.4336577532755815, "learning_rate": 2.1108454733273853e-06, "loss": 0.01311897337436676, "step": 95430 }, { "epoch": 0.8982117647058824, "grad_norm": 0.6678705043167071, "learning_rate": 2.1107901766531904e-06, "loss": 0.014474603533744811, "step": 95435 }, { "epoch": 0.8982588235294118, "grad_norm": 0.5828873619899203, "learning_rate": 2.1107348843244995e-06, "loss": 0.015074869990348816, "step": 95440 }, { "epoch": 0.8983058823529412, "grad_norm": 0.6123769416105993, "learning_rate": 2.1106795963407424e-06, "loss": 0.013936753571033477, "step": 95445 }, { "epoch": 0.8983529411764706, "grad_norm": 0.2644355850018463, "learning_rate": 2.1106243127013508e-06, "loss": 0.010110098123550414, "step": 95450 }, { "epoch": 0.8984, "grad_norm": 0.6304594468966008, "learning_rate": 2.1105690334057554e-06, "loss": 0.0144563227891922, "step": 95455 }, { "epoch": 0.8984470588235294, "grad_norm": 0.4306486493627518, "learning_rate": 2.1105137584533868e-06, "loss": 0.01580774486064911, "step": 95460 }, { "epoch": 0.8984941176470588, "grad_norm": 0.588203987753144, "learning_rate": 2.110458487843678e-06, "loss": 0.014693750441074372, "step": 95465 }, { "epoch": 0.8985411764705883, "grad_norm": 0.49364330644281096, "learning_rate": 2.1104032215760585e-06, "loss": 0.012823456525802612, "step": 95470 }, { "epoch": 0.8985882352941177, "grad_norm": 0.6670303201427396, "learning_rate": 2.1103479596499605e-06, "loss": 0.011729351431131362, "step": 95475 }, { "epoch": 0.8986352941176471, "grad_norm": 0.3829746944620059, "learning_rate": 2.1102927020648166e-06, "loss": 0.016190379858016968, "step": 95480 }, { "epoch": 0.8986823529411765, "grad_norm": 0.5186766628883126, "learning_rate": 2.1102374488200563e-06, "loss": 0.014341261982917786, "step": 95485 }, { "epoch": 0.8987294117647059, "grad_norm": 0.30227532634835247, "learning_rate": 2.110182199915114e-06, "loss": 0.01653742492198944, "step": 95490 }, { "epoch": 0.8987764705882353, "grad_norm": 0.46802337818157114, "learning_rate": 2.1101269553494197e-06, "loss": 0.018298134207725525, "step": 95495 }, { "epoch": 0.8988235294117647, "grad_norm": 0.4068801840052615, "learning_rate": 2.1100717151224063e-06, "loss": 0.015642426908016205, "step": 95500 }, { "epoch": 0.8988705882352941, "grad_norm": 0.5964732853043399, "learning_rate": 2.1100164792335057e-06, "loss": 0.012447570264339448, "step": 95505 }, { "epoch": 0.8989176470588235, "grad_norm": 0.4462572052017837, "learning_rate": 2.1099612476821498e-06, "loss": 0.017391470074653626, "step": 95510 }, { "epoch": 0.8989647058823529, "grad_norm": 0.8165765214726722, "learning_rate": 2.1099060204677715e-06, "loss": 0.01535833477973938, "step": 95515 }, { "epoch": 0.8990117647058824, "grad_norm": 0.5280856230586172, "learning_rate": 2.1098507975898035e-06, "loss": 0.014130535721778869, "step": 95520 }, { "epoch": 0.8990588235294118, "grad_norm": 0.37039815317748487, "learning_rate": 2.1097955790476772e-06, "loss": 0.012786144018173217, "step": 95525 }, { "epoch": 0.8991058823529412, "grad_norm": 0.5444438559288787, "learning_rate": 2.109740364840826e-06, "loss": 0.011033844947814942, "step": 95530 }, { "epoch": 0.8991529411764706, "grad_norm": 0.7048854968393593, "learning_rate": 2.1096851549686826e-06, "loss": 0.013452859222888946, "step": 95535 }, { "epoch": 0.8992, "grad_norm": 0.555441995509805, "learning_rate": 2.10962994943068e-06, "loss": 0.011279787123203277, "step": 95540 }, { "epoch": 0.8992470588235294, "grad_norm": 0.48985496622877184, "learning_rate": 2.1095747482262505e-06, "loss": 0.013958612084388733, "step": 95545 }, { "epoch": 0.8992941176470588, "grad_norm": 0.5816832441231087, "learning_rate": 2.109519551354828e-06, "loss": 0.011049866676330566, "step": 95550 }, { "epoch": 0.8993411764705882, "grad_norm": 0.46312020742766474, "learning_rate": 2.109464358815845e-06, "loss": 0.014245887100696564, "step": 95555 }, { "epoch": 0.8993882352941176, "grad_norm": 0.4035638235428007, "learning_rate": 2.109409170608735e-06, "loss": 0.013756735622882843, "step": 95560 }, { "epoch": 0.8994352941176471, "grad_norm": 0.3558117193610131, "learning_rate": 2.109353986732932e-06, "loss": 0.012348122149705886, "step": 95565 }, { "epoch": 0.8994823529411765, "grad_norm": 0.3982147129423145, "learning_rate": 2.1092988071878684e-06, "loss": 0.01183943748474121, "step": 95570 }, { "epoch": 0.8995294117647059, "grad_norm": 1.8305121719045878, "learning_rate": 2.1092436319729787e-06, "loss": 0.01615409255027771, "step": 95575 }, { "epoch": 0.8995764705882353, "grad_norm": 0.5083053990164336, "learning_rate": 2.109188461087696e-06, "loss": 0.01345670223236084, "step": 95580 }, { "epoch": 0.8996235294117647, "grad_norm": 0.29235166186141354, "learning_rate": 2.109133294531454e-06, "loss": 0.015448778867721558, "step": 95585 }, { "epoch": 0.8996705882352941, "grad_norm": 0.482308405055679, "learning_rate": 2.1090781323036873e-06, "loss": 0.01398920565843582, "step": 95590 }, { "epoch": 0.8997176470588235, "grad_norm": 0.8552453936809513, "learning_rate": 2.1090229744038295e-06, "loss": 0.012655168771743774, "step": 95595 }, { "epoch": 0.8997647058823529, "grad_norm": 0.24610615756769996, "learning_rate": 2.108967820831314e-06, "loss": 0.013056740164756775, "step": 95600 }, { "epoch": 0.8998117647058823, "grad_norm": 0.3819387940511582, "learning_rate": 2.108912671585576e-06, "loss": 0.01220313012599945, "step": 95605 }, { "epoch": 0.8998588235294117, "grad_norm": 0.4801492321006272, "learning_rate": 2.1088575266660492e-06, "loss": 0.014277783036231995, "step": 95610 }, { "epoch": 0.8999058823529412, "grad_norm": 0.43592237428795977, "learning_rate": 2.1088023860721685e-06, "loss": 0.011035291850566864, "step": 95615 }, { "epoch": 0.8999529411764706, "grad_norm": 0.4468121889171235, "learning_rate": 2.1087472498033684e-06, "loss": 0.012989956140518188, "step": 95620 }, { "epoch": 0.9, "grad_norm": 0.4096885529415377, "learning_rate": 2.108692117859083e-06, "loss": 0.01825997233390808, "step": 95625 }, { "epoch": 0.9000470588235294, "grad_norm": 0.4598112083795508, "learning_rate": 2.1086369902387474e-06, "loss": 0.013204169273376466, "step": 95630 }, { "epoch": 0.9000941176470588, "grad_norm": 0.6692476845067536, "learning_rate": 2.1085818669417965e-06, "loss": 0.01371273398399353, "step": 95635 }, { "epoch": 0.9001411764705882, "grad_norm": 0.6126445608733352, "learning_rate": 2.1085267479676648e-06, "loss": 0.013039782643318176, "step": 95640 }, { "epoch": 0.9001882352941176, "grad_norm": 0.4086130773028614, "learning_rate": 2.1084716333157876e-06, "loss": 0.014276403188705444, "step": 95645 }, { "epoch": 0.900235294117647, "grad_norm": 0.6298684150485504, "learning_rate": 2.1084165229856004e-06, "loss": 0.012216152250766754, "step": 95650 }, { "epoch": 0.9002823529411764, "grad_norm": 0.5251406773102567, "learning_rate": 2.1083614169765377e-06, "loss": 0.015572543442249297, "step": 95655 }, { "epoch": 0.9003294117647059, "grad_norm": 0.6853670022739865, "learning_rate": 2.108306315288035e-06, "loss": 0.012977837026119233, "step": 95660 }, { "epoch": 0.9003764705882353, "grad_norm": 0.5021902377828088, "learning_rate": 2.108251217919528e-06, "loss": 0.012682744860649109, "step": 95665 }, { "epoch": 0.9004235294117647, "grad_norm": 0.7779712055073214, "learning_rate": 2.108196124870453e-06, "loss": 0.021458891034126282, "step": 95670 }, { "epoch": 0.9004705882352941, "grad_norm": 0.6461275441952238, "learning_rate": 2.108141036140244e-06, "loss": 0.015838047862052916, "step": 95675 }, { "epoch": 0.9005176470588235, "grad_norm": 0.34846456508937956, "learning_rate": 2.108085951728338e-06, "loss": 0.014162246882915498, "step": 95680 }, { "epoch": 0.9005647058823529, "grad_norm": 0.6818589624073235, "learning_rate": 2.10803087163417e-06, "loss": 0.02040775716304779, "step": 95685 }, { "epoch": 0.9006117647058823, "grad_norm": 0.4534232794887434, "learning_rate": 2.107975795857177e-06, "loss": 0.015059179067611695, "step": 95690 }, { "epoch": 0.9006588235294117, "grad_norm": 0.4123383760612502, "learning_rate": 2.1079207243967942e-06, "loss": 0.014341658353805542, "step": 95695 }, { "epoch": 0.9007058823529411, "grad_norm": 0.4507596628963574, "learning_rate": 2.107865657252458e-06, "loss": 0.01277538388967514, "step": 95700 }, { "epoch": 0.9007529411764705, "grad_norm": 0.4813035668262053, "learning_rate": 2.107810594423605e-06, "loss": 0.01521536409854889, "step": 95705 }, { "epoch": 0.9008, "grad_norm": 0.5338225186311967, "learning_rate": 2.107755535909671e-06, "loss": 0.012953497469425201, "step": 95710 }, { "epoch": 0.9008470588235294, "grad_norm": 0.2754293424979839, "learning_rate": 2.107700481710093e-06, "loss": 0.012384069710969925, "step": 95715 }, { "epoch": 0.9008941176470588, "grad_norm": 0.36299977687426627, "learning_rate": 2.107645431824307e-06, "loss": 0.016472649574279786, "step": 95720 }, { "epoch": 0.9009411764705882, "grad_norm": 0.5492993947954623, "learning_rate": 2.1075903862517505e-06, "loss": 0.01561683714389801, "step": 95725 }, { "epoch": 0.9009882352941176, "grad_norm": 0.33564228348301295, "learning_rate": 2.107535344991859e-06, "loss": 0.011320894956588745, "step": 95730 }, { "epoch": 0.901035294117647, "grad_norm": 0.5288033824995989, "learning_rate": 2.107480308044071e-06, "loss": 0.018009832501411437, "step": 95735 }, { "epoch": 0.9010823529411764, "grad_norm": 0.5236179071409071, "learning_rate": 2.1074252754078225e-06, "loss": 0.015678150951862334, "step": 95740 }, { "epoch": 0.9011294117647058, "grad_norm": 0.5432540962755055, "learning_rate": 2.1073702470825507e-06, "loss": 0.015718130767345427, "step": 95745 }, { "epoch": 0.9011764705882352, "grad_norm": 0.6478226166358504, "learning_rate": 2.1073152230676927e-06, "loss": 0.013852143287658691, "step": 95750 }, { "epoch": 0.9012235294117648, "grad_norm": 0.34212749364467465, "learning_rate": 2.1072602033626863e-06, "loss": 0.010171428322792053, "step": 95755 }, { "epoch": 0.9012705882352942, "grad_norm": 0.5789908150663448, "learning_rate": 2.1072051879669684e-06, "loss": 0.01625829339027405, "step": 95760 }, { "epoch": 0.9013176470588236, "grad_norm": 0.4270770467020299, "learning_rate": 2.1071501768799766e-06, "loss": 0.015065297484397888, "step": 95765 }, { "epoch": 0.901364705882353, "grad_norm": 0.39306127068830293, "learning_rate": 2.107095170101149e-06, "loss": 0.012130457162857055, "step": 95770 }, { "epoch": 0.9014117647058824, "grad_norm": 0.5968483232780939, "learning_rate": 2.1070401676299223e-06, "loss": 0.01284245252609253, "step": 95775 }, { "epoch": 0.9014588235294118, "grad_norm": 0.5454619720464149, "learning_rate": 2.106985169465735e-06, "loss": 0.013389256596565247, "step": 95780 }, { "epoch": 0.9015058823529412, "grad_norm": 0.3764221475126664, "learning_rate": 2.1069301756080252e-06, "loss": 0.01381995677947998, "step": 95785 }, { "epoch": 0.9015529411764706, "grad_norm": 0.6817652460920025, "learning_rate": 2.106875186056231e-06, "loss": 0.015994393825531007, "step": 95790 }, { "epoch": 0.9016, "grad_norm": 0.4534288481940637, "learning_rate": 2.1068202008097896e-06, "loss": 0.017305010557174684, "step": 95795 }, { "epoch": 0.9016470588235294, "grad_norm": 0.3608846208194307, "learning_rate": 2.1067652198681398e-06, "loss": 0.01874067485332489, "step": 95800 }, { "epoch": 0.9016941176470589, "grad_norm": 0.49558296381354255, "learning_rate": 2.10671024323072e-06, "loss": 0.013790807127952576, "step": 95805 }, { "epoch": 0.9017411764705883, "grad_norm": 0.40884890511745964, "learning_rate": 2.1066552708969688e-06, "loss": 0.013090106844902038, "step": 95810 }, { "epoch": 0.9017882352941177, "grad_norm": 0.4889168536110717, "learning_rate": 2.106600302866324e-06, "loss": 0.014844471216201782, "step": 95815 }, { "epoch": 0.9018352941176471, "grad_norm": 0.5217914324692497, "learning_rate": 2.1065453391382247e-06, "loss": 0.012316108494997025, "step": 95820 }, { "epoch": 0.9018823529411765, "grad_norm": 0.3905510790165584, "learning_rate": 2.10649037971211e-06, "loss": 0.013914906978607177, "step": 95825 }, { "epoch": 0.9019294117647059, "grad_norm": 0.5565943953622766, "learning_rate": 2.1064354245874187e-06, "loss": 0.016084504127502442, "step": 95830 }, { "epoch": 0.9019764705882353, "grad_norm": 0.45376618370091876, "learning_rate": 2.106380473763589e-06, "loss": 0.016988106071949005, "step": 95835 }, { "epoch": 0.9020235294117647, "grad_norm": 0.43003450616403066, "learning_rate": 2.10632552724006e-06, "loss": 0.013464763760566711, "step": 95840 }, { "epoch": 0.9020705882352941, "grad_norm": 0.37068404732458365, "learning_rate": 2.1062705850162715e-06, "loss": 0.010860943794250488, "step": 95845 }, { "epoch": 0.9021176470588236, "grad_norm": 0.7273630244112431, "learning_rate": 2.106215647091663e-06, "loss": 0.01303248256444931, "step": 95850 }, { "epoch": 0.902164705882353, "grad_norm": 0.4845469813626892, "learning_rate": 2.1061607134656724e-06, "loss": 0.02362702339887619, "step": 95855 }, { "epoch": 0.9022117647058824, "grad_norm": 0.4853492143032431, "learning_rate": 2.106105784137741e-06, "loss": 0.01323462426662445, "step": 95860 }, { "epoch": 0.9022588235294118, "grad_norm": 0.42090433449676806, "learning_rate": 2.1060508591073066e-06, "loss": 0.012890703976154327, "step": 95865 }, { "epoch": 0.9023058823529412, "grad_norm": 0.6449872666211783, "learning_rate": 2.10599593837381e-06, "loss": 0.015636500716209412, "step": 95870 }, { "epoch": 0.9023529411764706, "grad_norm": 0.634958665080794, "learning_rate": 2.1059410219366906e-06, "loss": 0.01463901400566101, "step": 95875 }, { "epoch": 0.9024, "grad_norm": 0.4580149687930778, "learning_rate": 2.105886109795389e-06, "loss": 0.013102038204669953, "step": 95880 }, { "epoch": 0.9024470588235294, "grad_norm": 0.35273251763670044, "learning_rate": 2.1058312019493437e-06, "loss": 0.013390833139419555, "step": 95885 }, { "epoch": 0.9024941176470588, "grad_norm": 0.5061019013734158, "learning_rate": 2.105776298397996e-06, "loss": 0.01389099657535553, "step": 95890 }, { "epoch": 0.9025411764705882, "grad_norm": 0.4763646377715928, "learning_rate": 2.1057213991407847e-06, "loss": 0.014153607189655304, "step": 95895 }, { "epoch": 0.9025882352941177, "grad_norm": 0.568995626082071, "learning_rate": 2.105666504177152e-06, "loss": 0.01370593011379242, "step": 95900 }, { "epoch": 0.9026352941176471, "grad_norm": 0.5866959400045337, "learning_rate": 2.105611613506537e-06, "loss": 0.016944092512130738, "step": 95905 }, { "epoch": 0.9026823529411765, "grad_norm": 0.5807746157906823, "learning_rate": 2.10555672712838e-06, "loss": 0.014503659307956695, "step": 95910 }, { "epoch": 0.9027294117647059, "grad_norm": 0.7381150290169202, "learning_rate": 2.1055018450421223e-06, "loss": 0.01603201627731323, "step": 95915 }, { "epoch": 0.9027764705882353, "grad_norm": 0.43950056591935605, "learning_rate": 2.105446967247204e-06, "loss": 0.010925748944282531, "step": 95920 }, { "epoch": 0.9028235294117647, "grad_norm": 0.34368024004829817, "learning_rate": 2.1053920937430666e-06, "loss": 0.013148483633995057, "step": 95925 }, { "epoch": 0.9028705882352941, "grad_norm": 0.4017584191179144, "learning_rate": 2.1053372245291503e-06, "loss": 0.01701391488313675, "step": 95930 }, { "epoch": 0.9029176470588235, "grad_norm": 0.4309542325010616, "learning_rate": 2.1052823596048968e-06, "loss": 0.016723114252090453, "step": 95935 }, { "epoch": 0.9029647058823529, "grad_norm": 0.5097099806336606, "learning_rate": 2.105227498969746e-06, "loss": 0.011130310595035553, "step": 95940 }, { "epoch": 0.9030117647058824, "grad_norm": 0.33601045243337174, "learning_rate": 2.1051726426231404e-06, "loss": 0.013400612771511078, "step": 95945 }, { "epoch": 0.9030588235294118, "grad_norm": 0.401091074220875, "learning_rate": 2.1051177905645203e-06, "loss": 0.013738995790481568, "step": 95950 }, { "epoch": 0.9031058823529412, "grad_norm": 0.467708542569839, "learning_rate": 2.1050629427933277e-06, "loss": 0.014843736588954926, "step": 95955 }, { "epoch": 0.9031529411764706, "grad_norm": 0.44161569787493976, "learning_rate": 2.105008099309004e-06, "loss": 0.013982482254505157, "step": 95960 }, { "epoch": 0.9032, "grad_norm": 0.49886088935272294, "learning_rate": 2.1049532601109906e-06, "loss": 0.012567247450351714, "step": 95965 }, { "epoch": 0.9032470588235294, "grad_norm": 0.5118527882157518, "learning_rate": 2.104898425198729e-06, "loss": 0.016708570718765258, "step": 95970 }, { "epoch": 0.9032941176470588, "grad_norm": 0.361120892238405, "learning_rate": 2.104843594571662e-06, "loss": 0.012861064076423645, "step": 95975 }, { "epoch": 0.9033411764705882, "grad_norm": 0.3880146590228866, "learning_rate": 2.10478876822923e-06, "loss": 0.01777094751596451, "step": 95980 }, { "epoch": 0.9033882352941176, "grad_norm": 0.39059255507289264, "learning_rate": 2.104733946170876e-06, "loss": 0.011865460127592087, "step": 95985 }, { "epoch": 0.9034352941176471, "grad_norm": 0.4889062376133126, "learning_rate": 2.1046791283960417e-06, "loss": 0.013660040497779847, "step": 95990 }, { "epoch": 0.9034823529411765, "grad_norm": 0.4003615530452114, "learning_rate": 2.10462431490417e-06, "loss": 0.013831144571304322, "step": 95995 }, { "epoch": 0.9035294117647059, "grad_norm": 0.37982925141924573, "learning_rate": 2.1045695056947025e-06, "loss": 0.013325877487659454, "step": 96000 }, { "epoch": 0.9035764705882353, "grad_norm": 0.4968195997765346, "learning_rate": 2.1045147007670822e-06, "loss": 0.01756075620651245, "step": 96005 }, { "epoch": 0.9036235294117647, "grad_norm": 0.4709869433679678, "learning_rate": 2.1044599001207506e-06, "loss": 0.013679274916648864, "step": 96010 }, { "epoch": 0.9036705882352941, "grad_norm": 0.38092641426201834, "learning_rate": 2.1044051037551516e-06, "loss": 0.012900029122829438, "step": 96015 }, { "epoch": 0.9037176470588235, "grad_norm": 0.4561979989259154, "learning_rate": 2.1043503116697273e-06, "loss": 0.011849984526634216, "step": 96020 }, { "epoch": 0.9037647058823529, "grad_norm": 0.3061464632981342, "learning_rate": 2.1042955238639197e-06, "loss": 0.013495919108390809, "step": 96025 }, { "epoch": 0.9038117647058823, "grad_norm": 0.3290990091979664, "learning_rate": 2.104240740337173e-06, "loss": 0.013034290075302124, "step": 96030 }, { "epoch": 0.9038588235294117, "grad_norm": 0.6078678130590883, "learning_rate": 2.10418596108893e-06, "loss": 0.01293623149394989, "step": 96035 }, { "epoch": 0.9039058823529412, "grad_norm": 0.33301820979762936, "learning_rate": 2.1041311861186335e-06, "loss": 0.010607941448688507, "step": 96040 }, { "epoch": 0.9039529411764706, "grad_norm": 0.3961408701654087, "learning_rate": 2.1040764154257264e-06, "loss": 0.01207917332649231, "step": 96045 }, { "epoch": 0.904, "grad_norm": 0.7063195255292638, "learning_rate": 2.1040216490096525e-06, "loss": 0.01629466563463211, "step": 96050 }, { "epoch": 0.9040470588235294, "grad_norm": 0.6482065890516662, "learning_rate": 2.103966886869855e-06, "loss": 0.015979665517807006, "step": 96055 }, { "epoch": 0.9040941176470588, "grad_norm": 0.39142633956716205, "learning_rate": 2.1039121290057778e-06, "loss": 0.01689097285270691, "step": 96060 }, { "epoch": 0.9041411764705882, "grad_norm": 0.2588508385170158, "learning_rate": 2.1038573754168642e-06, "loss": 0.014493313431739808, "step": 96065 }, { "epoch": 0.9041882352941176, "grad_norm": 0.5152493236594247, "learning_rate": 2.1038026261025584e-06, "loss": 0.016377142071723937, "step": 96070 }, { "epoch": 0.904235294117647, "grad_norm": 0.37310548693215395, "learning_rate": 2.1037478810623037e-06, "loss": 0.011894212663173675, "step": 96075 }, { "epoch": 0.9042823529411764, "grad_norm": 0.50181110900272, "learning_rate": 2.1036931402955433e-06, "loss": 0.014447090029716492, "step": 96080 }, { "epoch": 0.9043294117647059, "grad_norm": 0.5518707970488054, "learning_rate": 2.1036384038017228e-06, "loss": 0.013653208315372468, "step": 96085 }, { "epoch": 0.9043764705882353, "grad_norm": 0.49001724813151626, "learning_rate": 2.103583671580286e-06, "loss": 0.01498156487941742, "step": 96090 }, { "epoch": 0.9044235294117647, "grad_norm": 0.7155672478870795, "learning_rate": 2.1035289436306765e-06, "loss": 0.01941009610891342, "step": 96095 }, { "epoch": 0.9044705882352941, "grad_norm": 0.4537819776557744, "learning_rate": 2.103474219952339e-06, "loss": 0.010851384699344635, "step": 96100 }, { "epoch": 0.9045176470588235, "grad_norm": 0.3944705104129804, "learning_rate": 2.1034195005447176e-06, "loss": 0.014562512934207916, "step": 96105 }, { "epoch": 0.9045647058823529, "grad_norm": 0.47928954504826016, "learning_rate": 2.1033647854072574e-06, "loss": 0.012941206991672515, "step": 96110 }, { "epoch": 0.9046117647058823, "grad_norm": 0.2666081819378395, "learning_rate": 2.1033100745394026e-06, "loss": 0.013079063594341278, "step": 96115 }, { "epoch": 0.9046588235294117, "grad_norm": 0.5207341212173867, "learning_rate": 2.1032553679405977e-06, "loss": 0.013513226807117463, "step": 96120 }, { "epoch": 0.9047058823529411, "grad_norm": 0.33291602389290187, "learning_rate": 2.1032006656102887e-06, "loss": 0.015405559539794922, "step": 96125 }, { "epoch": 0.9047529411764705, "grad_norm": 0.5453403322870858, "learning_rate": 2.1031459675479196e-06, "loss": 0.012608225643634795, "step": 96130 }, { "epoch": 0.9048, "grad_norm": 0.46472542795153204, "learning_rate": 2.1030912737529355e-06, "loss": 0.016879859566688537, "step": 96135 }, { "epoch": 0.9048470588235294, "grad_norm": 0.8187051977767041, "learning_rate": 2.1030365842247815e-06, "loss": 0.015963101387023927, "step": 96140 }, { "epoch": 0.9048941176470588, "grad_norm": 0.28777031852197665, "learning_rate": 2.102981898962903e-06, "loss": 0.01130390092730522, "step": 96145 }, { "epoch": 0.9049411764705882, "grad_norm": 0.5780434997493181, "learning_rate": 2.102927217966746e-06, "loss": 0.01463417112827301, "step": 96150 }, { "epoch": 0.9049882352941176, "grad_norm": 0.5560254212977386, "learning_rate": 2.102872541235755e-06, "loss": 0.016426658630371092, "step": 96155 }, { "epoch": 0.905035294117647, "grad_norm": 0.3495265879643704, "learning_rate": 2.1028178687693757e-06, "loss": 0.015284688770771026, "step": 96160 }, { "epoch": 0.9050823529411764, "grad_norm": 0.48380371175915277, "learning_rate": 2.1027632005670538e-06, "loss": 0.010680477321147918, "step": 96165 }, { "epoch": 0.9051294117647058, "grad_norm": 0.6367050112980416, "learning_rate": 2.1027085366282353e-06, "loss": 0.01392899751663208, "step": 96170 }, { "epoch": 0.9051764705882352, "grad_norm": 0.40297672383362776, "learning_rate": 2.102653876952366e-06, "loss": 0.01428413689136505, "step": 96175 }, { "epoch": 0.9052235294117648, "grad_norm": 0.5574395480435463, "learning_rate": 2.102599221538892e-06, "loss": 0.01190619170665741, "step": 96180 }, { "epoch": 0.9052705882352942, "grad_norm": 0.4531414720967475, "learning_rate": 2.102544570387259e-06, "loss": 0.015152767300605774, "step": 96185 }, { "epoch": 0.9053176470588236, "grad_norm": 0.45760584910281843, "learning_rate": 2.102489923496913e-06, "loss": 0.013063974678516388, "step": 96190 }, { "epoch": 0.905364705882353, "grad_norm": 0.37453923098513925, "learning_rate": 2.102435280867301e-06, "loss": 0.011718150973320008, "step": 96195 }, { "epoch": 0.9054117647058824, "grad_norm": 0.3113603141591331, "learning_rate": 2.1023806424978685e-06, "loss": 0.013586097955703735, "step": 96200 }, { "epoch": 0.9054588235294118, "grad_norm": 0.36698365694053686, "learning_rate": 2.1023260083880627e-06, "loss": 0.013602533936500549, "step": 96205 }, { "epoch": 0.9055058823529412, "grad_norm": 0.4976340320371574, "learning_rate": 2.1022713785373297e-06, "loss": 0.015175485610961914, "step": 96210 }, { "epoch": 0.9055529411764706, "grad_norm": 0.39928344571014845, "learning_rate": 2.102216752945116e-06, "loss": 0.012068059295415878, "step": 96215 }, { "epoch": 0.9056, "grad_norm": 0.4335414445346898, "learning_rate": 2.102162131610869e-06, "loss": 0.01493464708328247, "step": 96220 }, { "epoch": 0.9056470588235294, "grad_norm": 0.7329460554050774, "learning_rate": 2.1021075145340353e-06, "loss": 0.014269348978996278, "step": 96225 }, { "epoch": 0.9056941176470589, "grad_norm": 0.4692350267891808, "learning_rate": 2.102052901714062e-06, "loss": 0.015930935740470886, "step": 96230 }, { "epoch": 0.9057411764705883, "grad_norm": 0.3686820650763741, "learning_rate": 2.1019982931503954e-06, "loss": 0.018254658579826354, "step": 96235 }, { "epoch": 0.9057882352941177, "grad_norm": 0.3199146953988543, "learning_rate": 2.101943688842483e-06, "loss": 0.013142368197441101, "step": 96240 }, { "epoch": 0.9058352941176471, "grad_norm": 0.38265342820192866, "learning_rate": 2.1018890887897726e-06, "loss": 0.012365607917308808, "step": 96245 }, { "epoch": 0.9058823529411765, "grad_norm": 2.0671925309905275, "learning_rate": 2.1018344929917114e-06, "loss": 0.01856531947851181, "step": 96250 }, { "epoch": 0.9059294117647059, "grad_norm": 0.3456051202470777, "learning_rate": 2.1017799014477465e-06, "loss": 0.01153811514377594, "step": 96255 }, { "epoch": 0.9059764705882353, "grad_norm": 0.4961002225168444, "learning_rate": 2.1017253141573256e-06, "loss": 0.011833501607179641, "step": 96260 }, { "epoch": 0.9060235294117647, "grad_norm": 0.3634864789356272, "learning_rate": 2.101670731119896e-06, "loss": 0.015796954929828643, "step": 96265 }, { "epoch": 0.9060705882352941, "grad_norm": 0.5870683513952215, "learning_rate": 2.1016161523349067e-06, "loss": 0.01713133752346039, "step": 96270 }, { "epoch": 0.9061176470588236, "grad_norm": 0.5124138606976503, "learning_rate": 2.1015615778018045e-06, "loss": 0.01193814054131508, "step": 96275 }, { "epoch": 0.906164705882353, "grad_norm": 0.3179254644575081, "learning_rate": 2.1015070075200374e-06, "loss": 0.012654215097427368, "step": 96280 }, { "epoch": 0.9062117647058824, "grad_norm": 0.49509868214339053, "learning_rate": 2.1014524414890542e-06, "loss": 0.01095472052693367, "step": 96285 }, { "epoch": 0.9062588235294118, "grad_norm": 0.5704227695824562, "learning_rate": 2.1013978797083024e-06, "loss": 0.015104889869689941, "step": 96290 }, { "epoch": 0.9063058823529412, "grad_norm": 0.502417972669121, "learning_rate": 2.1013433221772297e-06, "loss": 0.011092987656593323, "step": 96295 }, { "epoch": 0.9063529411764706, "grad_norm": 0.5050586155350731, "learning_rate": 2.101288768895286e-06, "loss": 0.013011883199214935, "step": 96300 }, { "epoch": 0.9064, "grad_norm": 0.6149475184921167, "learning_rate": 2.1012342198619183e-06, "loss": 0.014053028821945191, "step": 96305 }, { "epoch": 0.9064470588235294, "grad_norm": 0.358616060742866, "learning_rate": 2.101179675076576e-06, "loss": 0.012253013253211976, "step": 96310 }, { "epoch": 0.9064941176470588, "grad_norm": 0.6650851708312586, "learning_rate": 2.101125134538708e-06, "loss": 0.013620337843894959, "step": 96315 }, { "epoch": 0.9065411764705882, "grad_norm": 0.5999376488649634, "learning_rate": 2.101070598247762e-06, "loss": 0.013250765204429627, "step": 96320 }, { "epoch": 0.9065882352941177, "grad_norm": 0.5764052995030611, "learning_rate": 2.1010160662031883e-06, "loss": 0.016365456581115722, "step": 96325 }, { "epoch": 0.9066352941176471, "grad_norm": 0.47282064875513596, "learning_rate": 2.1009615384044344e-06, "loss": 0.013649703562259674, "step": 96330 }, { "epoch": 0.9066823529411765, "grad_norm": 0.5399074675072754, "learning_rate": 2.1009070148509504e-06, "loss": 0.016926202178001403, "step": 96335 }, { "epoch": 0.9067294117647059, "grad_norm": 0.6224952614991844, "learning_rate": 2.1008524955421848e-06, "loss": 0.013791827857494355, "step": 96340 }, { "epoch": 0.9067764705882353, "grad_norm": 0.5328618248706516, "learning_rate": 2.1007979804775874e-06, "loss": 0.01314716637134552, "step": 96345 }, { "epoch": 0.9068235294117647, "grad_norm": 0.3752080442743736, "learning_rate": 2.1007434696566078e-06, "loss": 0.011945885419845582, "step": 96350 }, { "epoch": 0.9068705882352941, "grad_norm": 0.6644379291207488, "learning_rate": 2.1006889630786944e-06, "loss": 0.020811119675636293, "step": 96355 }, { "epoch": 0.9069176470588235, "grad_norm": 0.5275276140277306, "learning_rate": 2.1006344607432976e-06, "loss": 0.013460971415042877, "step": 96360 }, { "epoch": 0.9069647058823529, "grad_norm": 0.49844643504025427, "learning_rate": 2.1005799626498668e-06, "loss": 0.012641894817352294, "step": 96365 }, { "epoch": 0.9070117647058824, "grad_norm": 0.4491233825229491, "learning_rate": 2.1005254687978518e-06, "loss": 0.015271465480327606, "step": 96370 }, { "epoch": 0.9070588235294118, "grad_norm": 0.4395988664724369, "learning_rate": 2.100470979186703e-06, "loss": 0.010556606948375702, "step": 96375 }, { "epoch": 0.9071058823529412, "grad_norm": 0.5594905182396402, "learning_rate": 2.1004164938158696e-06, "loss": 0.01607949286699295, "step": 96380 }, { "epoch": 0.9071529411764706, "grad_norm": 0.6745025870214946, "learning_rate": 2.1003620126848017e-06, "loss": 0.01447525918483734, "step": 96385 }, { "epoch": 0.9072, "grad_norm": 0.5324146700398383, "learning_rate": 2.1003075357929494e-06, "loss": 0.014929115772247314, "step": 96390 }, { "epoch": 0.9072470588235294, "grad_norm": 0.5914216552498244, "learning_rate": 2.100253063139764e-06, "loss": 0.017029689252376558, "step": 96395 }, { "epoch": 0.9072941176470588, "grad_norm": 0.4916322852683243, "learning_rate": 2.1001985947246947e-06, "loss": 0.012895609438419341, "step": 96400 }, { "epoch": 0.9073411764705882, "grad_norm": 0.8092972626331871, "learning_rate": 2.1001441305471924e-06, "loss": 0.01681702733039856, "step": 96405 }, { "epoch": 0.9073882352941176, "grad_norm": 0.4718335086277195, "learning_rate": 2.100089670606708e-06, "loss": 0.016296325623989104, "step": 96410 }, { "epoch": 0.907435294117647, "grad_norm": 0.37747672125104753, "learning_rate": 2.1000352149026918e-06, "loss": 0.01285528987646103, "step": 96415 }, { "epoch": 0.9074823529411765, "grad_norm": 0.5249284913661321, "learning_rate": 2.0999807634345944e-06, "loss": 0.014144949615001678, "step": 96420 }, { "epoch": 0.9075294117647059, "grad_norm": 0.3834834265603874, "learning_rate": 2.0999263162018668e-06, "loss": 0.019434887170791625, "step": 96425 }, { "epoch": 0.9075764705882353, "grad_norm": 0.46852932910118794, "learning_rate": 2.0998718732039604e-06, "loss": 0.01227194219827652, "step": 96430 }, { "epoch": 0.9076235294117647, "grad_norm": 0.2937254720738275, "learning_rate": 2.099817434440326e-06, "loss": 0.01322285532951355, "step": 96435 }, { "epoch": 0.9076705882352941, "grad_norm": 0.4989230565107208, "learning_rate": 2.099762999910414e-06, "loss": 0.015140092372894287, "step": 96440 }, { "epoch": 0.9077176470588235, "grad_norm": 0.8963885175757778, "learning_rate": 2.0997085696136767e-06, "loss": 0.01628703624010086, "step": 96445 }, { "epoch": 0.9077647058823529, "grad_norm": 0.4696438963629111, "learning_rate": 2.0996541435495653e-06, "loss": 0.010225106775760651, "step": 96450 }, { "epoch": 0.9078117647058823, "grad_norm": 0.5183681434684723, "learning_rate": 2.099599721717531e-06, "loss": 0.013517159223556518, "step": 96455 }, { "epoch": 0.9078588235294117, "grad_norm": 0.5965834403750306, "learning_rate": 2.0995453041170252e-06, "loss": 0.01555456817150116, "step": 96460 }, { "epoch": 0.9079058823529412, "grad_norm": 0.3250367319990994, "learning_rate": 2.0994908907475e-06, "loss": 0.014110854268074036, "step": 96465 }, { "epoch": 0.9079529411764706, "grad_norm": 0.3398005004969539, "learning_rate": 2.0994364816084067e-06, "loss": 0.016909658908843994, "step": 96470 }, { "epoch": 0.908, "grad_norm": 0.5961147027478094, "learning_rate": 2.0993820766991977e-06, "loss": 0.011416375637054443, "step": 96475 }, { "epoch": 0.9080470588235294, "grad_norm": 0.5008956496235982, "learning_rate": 2.0993276760193246e-06, "loss": 0.01128285974264145, "step": 96480 }, { "epoch": 0.9080941176470588, "grad_norm": 0.5763650016330895, "learning_rate": 2.0992732795682394e-06, "loss": 0.01582164168357849, "step": 96485 }, { "epoch": 0.9081411764705882, "grad_norm": 0.4577152434761057, "learning_rate": 2.099218887345395e-06, "loss": 0.01571219265460968, "step": 96490 }, { "epoch": 0.9081882352941176, "grad_norm": 0.5254550714045608, "learning_rate": 2.099164499350242e-06, "loss": 0.015610314905643463, "step": 96495 }, { "epoch": 0.908235294117647, "grad_norm": 0.4826781986726426, "learning_rate": 2.0991101155822347e-06, "loss": 0.013984505832195283, "step": 96500 }, { "epoch": 0.9082823529411764, "grad_norm": 0.3901641995161893, "learning_rate": 2.0990557360408243e-06, "loss": 0.01143384575843811, "step": 96505 }, { "epoch": 0.9083294117647058, "grad_norm": 0.34680647749143306, "learning_rate": 2.099001360725464e-06, "loss": 0.011667659878730774, "step": 96510 }, { "epoch": 0.9083764705882353, "grad_norm": 0.4278099956110887, "learning_rate": 2.098946989635606e-06, "loss": 0.014139825105667114, "step": 96515 }, { "epoch": 0.9084235294117647, "grad_norm": 0.6910315824648726, "learning_rate": 2.098892622770703e-06, "loss": 0.01281871497631073, "step": 96520 }, { "epoch": 0.9084705882352941, "grad_norm": 0.5344430301813727, "learning_rate": 2.098838260130208e-06, "loss": 0.018577203154563904, "step": 96525 }, { "epoch": 0.9085176470588235, "grad_norm": 0.6449525032551005, "learning_rate": 2.0987839017135745e-06, "loss": 0.013858242332935334, "step": 96530 }, { "epoch": 0.9085647058823529, "grad_norm": 0.36742290003370703, "learning_rate": 2.0987295475202553e-06, "loss": 0.011422641575336456, "step": 96535 }, { "epoch": 0.9086117647058823, "grad_norm": 0.46553115857766786, "learning_rate": 2.098675197549703e-06, "loss": 0.012364031374454498, "step": 96540 }, { "epoch": 0.9086588235294117, "grad_norm": 0.46434718440947864, "learning_rate": 2.0986208518013716e-06, "loss": 0.016430193185806276, "step": 96545 }, { "epoch": 0.9087058823529411, "grad_norm": 0.6479128827305684, "learning_rate": 2.0985665102747135e-06, "loss": 0.01534973680973053, "step": 96550 }, { "epoch": 0.9087529411764705, "grad_norm": 0.41591799786260675, "learning_rate": 2.0985121729691827e-06, "loss": 0.016123655438423156, "step": 96555 }, { "epoch": 0.9088, "grad_norm": 0.44399575268124836, "learning_rate": 2.098457839884233e-06, "loss": 0.012695999443531036, "step": 96560 }, { "epoch": 0.9088470588235295, "grad_norm": 0.24794324679009747, "learning_rate": 2.098403511019318e-06, "loss": 0.011499375849962235, "step": 96565 }, { "epoch": 0.9088941176470589, "grad_norm": 0.4202065468703997, "learning_rate": 2.0983491863738907e-06, "loss": 0.016864295303821563, "step": 96570 }, { "epoch": 0.9089411764705883, "grad_norm": 0.5193692366974925, "learning_rate": 2.098294865947406e-06, "loss": 0.011370958387851715, "step": 96575 }, { "epoch": 0.9089882352941177, "grad_norm": 0.5889264981468894, "learning_rate": 2.0982405497393176e-06, "loss": 0.019473257660865783, "step": 96580 }, { "epoch": 0.909035294117647, "grad_norm": 0.48770216858226667, "learning_rate": 2.0981862377490787e-06, "loss": 0.01259281039237976, "step": 96585 }, { "epoch": 0.9090823529411765, "grad_norm": 0.5283812501512188, "learning_rate": 2.098131929976144e-06, "loss": 0.015207266807556153, "step": 96590 }, { "epoch": 0.9091294117647059, "grad_norm": 0.4549213510274262, "learning_rate": 2.098077626419968e-06, "loss": 0.014190500974655152, "step": 96595 }, { "epoch": 0.9091764705882353, "grad_norm": 0.36423410769991815, "learning_rate": 2.098023327080005e-06, "loss": 0.017265065014362334, "step": 96600 }, { "epoch": 0.9092235294117647, "grad_norm": 0.41916784484758135, "learning_rate": 2.097969031955709e-06, "loss": 0.012369485199451446, "step": 96605 }, { "epoch": 0.9092705882352942, "grad_norm": 0.5999107507400853, "learning_rate": 2.0979147410465346e-06, "loss": 0.01125028431415558, "step": 96610 }, { "epoch": 0.9093176470588236, "grad_norm": 0.4935172887459304, "learning_rate": 2.0978604543519373e-06, "loss": 0.01317666471004486, "step": 96615 }, { "epoch": 0.909364705882353, "grad_norm": 0.4735055282250011, "learning_rate": 2.097806171871371e-06, "loss": 0.01547146588563919, "step": 96620 }, { "epoch": 0.9094117647058824, "grad_norm": 0.48330117896534824, "learning_rate": 2.09775189360429e-06, "loss": 0.010355927795171738, "step": 96625 }, { "epoch": 0.9094588235294118, "grad_norm": 0.4604567452443137, "learning_rate": 2.0976976195501506e-06, "loss": 0.012498986721038819, "step": 96630 }, { "epoch": 0.9095058823529412, "grad_norm": 0.37607372292913593, "learning_rate": 2.097643349708407e-06, "loss": 0.01301589012145996, "step": 96635 }, { "epoch": 0.9095529411764706, "grad_norm": 0.4560860267015673, "learning_rate": 2.0975890840785143e-06, "loss": 0.014731258153915405, "step": 96640 }, { "epoch": 0.9096, "grad_norm": 0.37459963397441903, "learning_rate": 2.097534822659928e-06, "loss": 0.016903518140316008, "step": 96645 }, { "epoch": 0.9096470588235294, "grad_norm": 0.3141039389097736, "learning_rate": 2.0974805654521034e-06, "loss": 0.015330284833908081, "step": 96650 }, { "epoch": 0.9096941176470589, "grad_norm": 0.5709590436288178, "learning_rate": 2.097426312454496e-06, "loss": 0.013238747417926789, "step": 96655 }, { "epoch": 0.9097411764705883, "grad_norm": 0.5085308632164617, "learning_rate": 2.097372063666561e-06, "loss": 0.015603113174438476, "step": 96660 }, { "epoch": 0.9097882352941177, "grad_norm": 0.472897050823177, "learning_rate": 2.0973178190877545e-06, "loss": 0.014012105762958527, "step": 96665 }, { "epoch": 0.9098352941176471, "grad_norm": 0.41834418172955623, "learning_rate": 2.097263578717532e-06, "loss": 0.011315000802278518, "step": 96670 }, { "epoch": 0.9098823529411765, "grad_norm": 0.3970554078582503, "learning_rate": 2.0972093425553493e-06, "loss": 0.01305917203426361, "step": 96675 }, { "epoch": 0.9099294117647059, "grad_norm": 0.48293545788186215, "learning_rate": 2.0971551106006616e-06, "loss": 0.014803335070610046, "step": 96680 }, { "epoch": 0.9099764705882353, "grad_norm": 0.4837755902312321, "learning_rate": 2.0971008828529264e-06, "loss": 0.014002218842506409, "step": 96685 }, { "epoch": 0.9100235294117647, "grad_norm": 0.39953429898263476, "learning_rate": 2.0970466593115984e-06, "loss": 0.0162860631942749, "step": 96690 }, { "epoch": 0.9100705882352941, "grad_norm": 0.7557164690027592, "learning_rate": 2.0969924399761345e-06, "loss": 0.012045253813266755, "step": 96695 }, { "epoch": 0.9101176470588235, "grad_norm": 0.4008451920743069, "learning_rate": 2.0969382248459915e-06, "loss": 0.014650930464267731, "step": 96700 }, { "epoch": 0.910164705882353, "grad_norm": 0.2915785843690848, "learning_rate": 2.096884013920625e-06, "loss": 0.009557737410068512, "step": 96705 }, { "epoch": 0.9102117647058824, "grad_norm": 0.8217097352495148, "learning_rate": 2.0968298071994917e-06, "loss": 0.026165339350700378, "step": 96710 }, { "epoch": 0.9102588235294118, "grad_norm": 0.4393252123699149, "learning_rate": 2.0967756046820483e-06, "loss": 0.014589713513851165, "step": 96715 }, { "epoch": 0.9103058823529412, "grad_norm": 0.3888775027194241, "learning_rate": 2.0967214063677518e-06, "loss": 0.012831848859786988, "step": 96720 }, { "epoch": 0.9103529411764706, "grad_norm": 0.39309070453989897, "learning_rate": 2.096667212256058e-06, "loss": 0.015276147425174713, "step": 96725 }, { "epoch": 0.9104, "grad_norm": 0.5032401627406745, "learning_rate": 2.096613022346425e-06, "loss": 0.013034166395664215, "step": 96730 }, { "epoch": 0.9104470588235294, "grad_norm": 0.8980104187259221, "learning_rate": 2.096558836638309e-06, "loss": 0.012687012553215027, "step": 96735 }, { "epoch": 0.9104941176470588, "grad_norm": 0.4590637082537604, "learning_rate": 2.0965046551311675e-06, "loss": 0.012686438858509064, "step": 96740 }, { "epoch": 0.9105411764705882, "grad_norm": 0.650657141137442, "learning_rate": 2.0964504778244574e-06, "loss": 0.015493610501289367, "step": 96745 }, { "epoch": 0.9105882352941177, "grad_norm": 0.5997032435245503, "learning_rate": 2.0963963047176365e-06, "loss": 0.01425207257270813, "step": 96750 }, { "epoch": 0.9106352941176471, "grad_norm": 0.5256595507540583, "learning_rate": 2.096342135810162e-06, "loss": 0.01905732452869415, "step": 96755 }, { "epoch": 0.9106823529411765, "grad_norm": 0.7118959264454158, "learning_rate": 2.096287971101491e-06, "loss": 0.011973142623901367, "step": 96760 }, { "epoch": 0.9107294117647059, "grad_norm": 0.34913196661699514, "learning_rate": 2.0962338105910816e-06, "loss": 0.013899147510528564, "step": 96765 }, { "epoch": 0.9107764705882353, "grad_norm": 0.6599640975484219, "learning_rate": 2.096179654278391e-06, "loss": 0.014696022868156433, "step": 96770 }, { "epoch": 0.9108235294117647, "grad_norm": 0.4115770180450869, "learning_rate": 2.0961255021628778e-06, "loss": 0.011873960494995117, "step": 96775 }, { "epoch": 0.9108705882352941, "grad_norm": 0.5054985032767849, "learning_rate": 2.096071354243998e-06, "loss": 0.014086875319480895, "step": 96780 }, { "epoch": 0.9109176470588235, "grad_norm": 0.2295582181788066, "learning_rate": 2.0960172105212127e-06, "loss": 0.011676935851573944, "step": 96785 }, { "epoch": 0.9109647058823529, "grad_norm": 0.4827638010003806, "learning_rate": 2.095963070993977e-06, "loss": 0.013450619578361512, "step": 96790 }, { "epoch": 0.9110117647058823, "grad_norm": 0.5028774261385482, "learning_rate": 2.095908935661751e-06, "loss": 0.010004869103431702, "step": 96795 }, { "epoch": 0.9110588235294118, "grad_norm": 0.43538494931079236, "learning_rate": 2.095854804523991e-06, "loss": 0.010766855627298354, "step": 96800 }, { "epoch": 0.9111058823529412, "grad_norm": 0.5754519293296652, "learning_rate": 2.095800677580158e-06, "loss": 0.013610519468784332, "step": 96805 }, { "epoch": 0.9111529411764706, "grad_norm": 0.3624513219149227, "learning_rate": 2.095746554829709e-06, "loss": 0.013149899244308472, "step": 96810 }, { "epoch": 0.9112, "grad_norm": 0.381298240495643, "learning_rate": 2.095692436272102e-06, "loss": 0.01332532912492752, "step": 96815 }, { "epoch": 0.9112470588235294, "grad_norm": 0.46329453488232397, "learning_rate": 2.0956383219067965e-06, "loss": 0.014471413195133209, "step": 96820 }, { "epoch": 0.9112941176470588, "grad_norm": 0.44966916145295727, "learning_rate": 2.095584211733251e-06, "loss": 0.016473379731178284, "step": 96825 }, { "epoch": 0.9113411764705882, "grad_norm": 0.28487374713415214, "learning_rate": 2.0955301057509247e-06, "loss": 0.010046611726284026, "step": 96830 }, { "epoch": 0.9113882352941176, "grad_norm": 0.42079774090001154, "learning_rate": 2.095476003959276e-06, "loss": 0.01146424636244774, "step": 96835 }, { "epoch": 0.911435294117647, "grad_norm": 0.4450801547848578, "learning_rate": 2.095421906357764e-06, "loss": 0.015388599038124085, "step": 96840 }, { "epoch": 0.9114823529411765, "grad_norm": 0.4109650980692209, "learning_rate": 2.095367812945849e-06, "loss": 0.012292486429214478, "step": 96845 }, { "epoch": 0.9115294117647059, "grad_norm": 0.8813817128610936, "learning_rate": 2.095313723722989e-06, "loss": 0.014459364116191864, "step": 96850 }, { "epoch": 0.9115764705882353, "grad_norm": 0.4882010564645283, "learning_rate": 2.0952596386886437e-06, "loss": 0.01551954448223114, "step": 96855 }, { "epoch": 0.9116235294117647, "grad_norm": 0.44488001456159115, "learning_rate": 2.0952055578422726e-06, "loss": 0.011952384561300277, "step": 96860 }, { "epoch": 0.9116705882352941, "grad_norm": 0.5027905244637052, "learning_rate": 2.0951514811833356e-06, "loss": 0.011756008863449097, "step": 96865 }, { "epoch": 0.9117176470588235, "grad_norm": 0.40257639410178814, "learning_rate": 2.0950974087112917e-06, "loss": 0.013214200735092163, "step": 96870 }, { "epoch": 0.9117647058823529, "grad_norm": 0.40298672476926534, "learning_rate": 2.095043340425601e-06, "loss": 0.015475128591060639, "step": 96875 }, { "epoch": 0.9118117647058823, "grad_norm": 0.4636667473022767, "learning_rate": 2.094989276325723e-06, "loss": 0.016351571679115294, "step": 96880 }, { "epoch": 0.9118588235294117, "grad_norm": 0.4344916347660722, "learning_rate": 2.094935216411118e-06, "loss": 0.02028180956840515, "step": 96885 }, { "epoch": 0.9119058823529411, "grad_norm": 0.4497187783882136, "learning_rate": 2.0948811606812465e-06, "loss": 0.012813061475753784, "step": 96890 }, { "epoch": 0.9119529411764706, "grad_norm": 0.6254340465919938, "learning_rate": 2.0948271091355674e-06, "loss": 0.017994168400764465, "step": 96895 }, { "epoch": 0.912, "grad_norm": 0.5658242599425658, "learning_rate": 2.0947730617735417e-06, "loss": 0.015241706371307373, "step": 96900 }, { "epoch": 0.9120470588235294, "grad_norm": 0.24514597359373222, "learning_rate": 2.09471901859463e-06, "loss": 0.016032692790031434, "step": 96905 }, { "epoch": 0.9120941176470588, "grad_norm": 0.416425599365399, "learning_rate": 2.094664979598292e-06, "loss": 0.015888744592666627, "step": 96910 }, { "epoch": 0.9121411764705882, "grad_norm": 0.5334243038335644, "learning_rate": 2.094610944783989e-06, "loss": 0.013362307846546174, "step": 96915 }, { "epoch": 0.9121882352941176, "grad_norm": 1.1588336829945307, "learning_rate": 2.094556914151181e-06, "loss": 0.01456092894077301, "step": 96920 }, { "epoch": 0.912235294117647, "grad_norm": 0.37083362280899423, "learning_rate": 2.094502887699329e-06, "loss": 0.011263124644756317, "step": 96925 }, { "epoch": 0.9122823529411764, "grad_norm": 0.36809875999164937, "learning_rate": 2.0944488654278934e-06, "loss": 0.014912596344947815, "step": 96930 }, { "epoch": 0.9123294117647058, "grad_norm": 0.5355236798172468, "learning_rate": 2.094394847336336e-06, "loss": 0.016976216435432435, "step": 96935 }, { "epoch": 0.9123764705882353, "grad_norm": 0.43610462396672667, "learning_rate": 2.094340833424117e-06, "loss": 0.019895362854003906, "step": 96940 }, { "epoch": 0.9124235294117647, "grad_norm": 0.5032147878075741, "learning_rate": 2.0942868236906975e-06, "loss": 0.010264402627944947, "step": 96945 }, { "epoch": 0.9124705882352941, "grad_norm": 0.6199861359836437, "learning_rate": 2.0942328181355393e-06, "loss": 0.011591394245624543, "step": 96950 }, { "epoch": 0.9125176470588235, "grad_norm": 0.46233734223624406, "learning_rate": 2.094178816758103e-06, "loss": 0.011286897957324982, "step": 96955 }, { "epoch": 0.912564705882353, "grad_norm": 0.4385629886279557, "learning_rate": 2.0941248195578507e-06, "loss": 0.016832190752029418, "step": 96960 }, { "epoch": 0.9126117647058823, "grad_norm": 0.6064984485313091, "learning_rate": 2.0940708265342434e-06, "loss": 0.008738823980093003, "step": 96965 }, { "epoch": 0.9126588235294117, "grad_norm": 0.7954136362144191, "learning_rate": 2.094016837686743e-06, "loss": 0.014075785875320435, "step": 96970 }, { "epoch": 0.9127058823529411, "grad_norm": 0.4748581000894902, "learning_rate": 2.0939628530148112e-06, "loss": 0.017009323835372923, "step": 96975 }, { "epoch": 0.9127529411764705, "grad_norm": 0.47498666581259863, "learning_rate": 2.09390887251791e-06, "loss": 0.019212116301059724, "step": 96980 }, { "epoch": 0.9128, "grad_norm": 0.48461599897305485, "learning_rate": 2.0938548961955e-06, "loss": 0.014606958627700806, "step": 96985 }, { "epoch": 0.9128470588235295, "grad_norm": 0.42829490644870893, "learning_rate": 2.0938009240470448e-06, "loss": 0.01534818708896637, "step": 96990 }, { "epoch": 0.9128941176470589, "grad_norm": 0.5661979045240629, "learning_rate": 2.0937469560720055e-06, "loss": 0.01601795256137848, "step": 96995 }, { "epoch": 0.9129411764705883, "grad_norm": 0.46624760051013403, "learning_rate": 2.0936929922698447e-06, "loss": 0.015190371870994568, "step": 97000 }, { "epoch": 0.9129882352941177, "grad_norm": 0.4971387525987386, "learning_rate": 2.093639032640024e-06, "loss": 0.01742796003818512, "step": 97005 }, { "epoch": 0.9130352941176471, "grad_norm": 0.42252727189294487, "learning_rate": 2.093585077182007e-06, "loss": 0.01506684422492981, "step": 97010 }, { "epoch": 0.9130823529411765, "grad_norm": 0.547427509182589, "learning_rate": 2.0935311258952555e-06, "loss": 0.015541571378707885, "step": 97015 }, { "epoch": 0.9131294117647059, "grad_norm": 0.4865042688392602, "learning_rate": 2.0934771787792317e-06, "loss": 0.010025472939014434, "step": 97020 }, { "epoch": 0.9131764705882353, "grad_norm": 0.2598124797864876, "learning_rate": 2.093423235833399e-06, "loss": 0.012126132845878601, "step": 97025 }, { "epoch": 0.9132235294117647, "grad_norm": 0.7962110911541457, "learning_rate": 2.0933692970572192e-06, "loss": 0.018480655550956727, "step": 97030 }, { "epoch": 0.9132705882352942, "grad_norm": 0.40545324082134143, "learning_rate": 2.093315362450156e-06, "loss": 0.013271600008010864, "step": 97035 }, { "epoch": 0.9133176470588236, "grad_norm": 0.5491210518710606, "learning_rate": 2.093261432011672e-06, "loss": 0.015226787328720093, "step": 97040 }, { "epoch": 0.913364705882353, "grad_norm": 0.46822941326171874, "learning_rate": 2.0932075057412306e-06, "loss": 0.014805693924427033, "step": 97045 }, { "epoch": 0.9134117647058824, "grad_norm": 0.5858095692864893, "learning_rate": 2.0931535836382944e-06, "loss": 0.015382733941078187, "step": 97050 }, { "epoch": 0.9134588235294118, "grad_norm": 0.47724291608921426, "learning_rate": 2.093099665702327e-06, "loss": 0.013838493824005127, "step": 97055 }, { "epoch": 0.9135058823529412, "grad_norm": 0.42815873527849657, "learning_rate": 2.0930457519327914e-06, "loss": 0.010214704275131225, "step": 97060 }, { "epoch": 0.9135529411764706, "grad_norm": 0.5542831601510139, "learning_rate": 2.0929918423291515e-06, "loss": 0.012221413105726242, "step": 97065 }, { "epoch": 0.9136, "grad_norm": 0.3963374645821469, "learning_rate": 2.092937936890871e-06, "loss": 0.011999430507421494, "step": 97070 }, { "epoch": 0.9136470588235294, "grad_norm": 0.4726463805002792, "learning_rate": 2.092884035617412e-06, "loss": 0.013069501519203186, "step": 97075 }, { "epoch": 0.9136941176470589, "grad_norm": 0.3732855324169303, "learning_rate": 2.0928301385082405e-06, "loss": 0.011108039319515229, "step": 97080 }, { "epoch": 0.9137411764705883, "grad_norm": 0.46777923176023983, "learning_rate": 2.092776245562819e-06, "loss": 0.015752844512462616, "step": 97085 }, { "epoch": 0.9137882352941177, "grad_norm": 0.5283748540257549, "learning_rate": 2.0927223567806113e-06, "loss": 0.013065603375434876, "step": 97090 }, { "epoch": 0.9138352941176471, "grad_norm": 0.40615938828735304, "learning_rate": 2.092668472161082e-06, "loss": 0.01451040804386139, "step": 97095 }, { "epoch": 0.9138823529411765, "grad_norm": 0.5186274039407263, "learning_rate": 2.0926145917036944e-06, "loss": 0.016419582068920135, "step": 97100 }, { "epoch": 0.9139294117647059, "grad_norm": 0.7504503298191006, "learning_rate": 2.0925607154079137e-06, "loss": 0.016824916005134583, "step": 97105 }, { "epoch": 0.9139764705882353, "grad_norm": 0.49244922758680076, "learning_rate": 2.092506843273204e-06, "loss": 0.012701256573200226, "step": 97110 }, { "epoch": 0.9140235294117647, "grad_norm": 0.5214947392499683, "learning_rate": 2.092452975299029e-06, "loss": 0.011409284919500351, "step": 97115 }, { "epoch": 0.9140705882352941, "grad_norm": 0.42879212972672676, "learning_rate": 2.092399111484854e-06, "loss": 0.013150134682655334, "step": 97120 }, { "epoch": 0.9141176470588235, "grad_norm": 0.5042418314217783, "learning_rate": 2.0923452518301425e-06, "loss": 0.011657123267650605, "step": 97125 }, { "epoch": 0.914164705882353, "grad_norm": 0.28640225447365936, "learning_rate": 2.0922913963343607e-06, "loss": 0.01700736880302429, "step": 97130 }, { "epoch": 0.9142117647058824, "grad_norm": 0.44253218057018945, "learning_rate": 2.0922375449969725e-06, "loss": 0.016375741362571715, "step": 97135 }, { "epoch": 0.9142588235294118, "grad_norm": 0.37920147204429927, "learning_rate": 2.0921836978174424e-06, "loss": 0.014197292923927306, "step": 97140 }, { "epoch": 0.9143058823529412, "grad_norm": 0.66477445808923, "learning_rate": 2.092129854795237e-06, "loss": 0.01298164129257202, "step": 97145 }, { "epoch": 0.9143529411764706, "grad_norm": 0.4711979072923381, "learning_rate": 2.092076015929819e-06, "loss": 0.013355234265327453, "step": 97150 }, { "epoch": 0.9144, "grad_norm": 0.28029446372103545, "learning_rate": 2.092022181220656e-06, "loss": 0.01135675236582756, "step": 97155 }, { "epoch": 0.9144470588235294, "grad_norm": 0.4170401682231261, "learning_rate": 2.0919683506672115e-06, "loss": 0.014939911663532257, "step": 97160 }, { "epoch": 0.9144941176470588, "grad_norm": 0.5248652762199516, "learning_rate": 2.091914524268951e-06, "loss": 0.013511075079441071, "step": 97165 }, { "epoch": 0.9145411764705882, "grad_norm": 0.34567833488640437, "learning_rate": 2.0918607020253413e-06, "loss": 0.014080029726028443, "step": 97170 }, { "epoch": 0.9145882352941177, "grad_norm": 0.37866888947270255, "learning_rate": 2.0918068839358466e-06, "loss": 0.012432161718606949, "step": 97175 }, { "epoch": 0.9146352941176471, "grad_norm": 0.4476452562401102, "learning_rate": 2.0917530699999333e-06, "loss": 0.01744106262922287, "step": 97180 }, { "epoch": 0.9146823529411765, "grad_norm": 0.47483695013220933, "learning_rate": 2.0916992602170667e-06, "loss": 0.013180632889270783, "step": 97185 }, { "epoch": 0.9147294117647059, "grad_norm": 0.3826024510018712, "learning_rate": 2.0916454545867134e-06, "loss": 0.011711366474628448, "step": 97190 }, { "epoch": 0.9147764705882353, "grad_norm": 0.38955736604652164, "learning_rate": 2.091591653108338e-06, "loss": 0.01159788966178894, "step": 97195 }, { "epoch": 0.9148235294117647, "grad_norm": 0.3578558873312228, "learning_rate": 2.091537855781408e-06, "loss": 0.015367919206619262, "step": 97200 }, { "epoch": 0.9148705882352941, "grad_norm": 0.388374821373118, "learning_rate": 2.0914840626053886e-06, "loss": 0.012320593744516373, "step": 97205 }, { "epoch": 0.9149176470588235, "grad_norm": 0.38958950521934693, "learning_rate": 2.0914302735797466e-06, "loss": 0.013927507400512695, "step": 97210 }, { "epoch": 0.9149647058823529, "grad_norm": 0.516650410391507, "learning_rate": 2.0913764887039477e-06, "loss": 0.014813271164894105, "step": 97215 }, { "epoch": 0.9150117647058823, "grad_norm": 0.5679423361161212, "learning_rate": 2.0913227079774583e-06, "loss": 0.016804738342761992, "step": 97220 }, { "epoch": 0.9150588235294118, "grad_norm": 0.4148367270308936, "learning_rate": 2.091268931399746e-06, "loss": 0.010652607679367066, "step": 97225 }, { "epoch": 0.9151058823529412, "grad_norm": 0.6446529333291517, "learning_rate": 2.0912151589702764e-06, "loss": 0.02238062024116516, "step": 97230 }, { "epoch": 0.9151529411764706, "grad_norm": 0.529713552058918, "learning_rate": 2.0911613906885166e-06, "loss": 0.01212289035320282, "step": 97235 }, { "epoch": 0.9152, "grad_norm": 0.4836882587080267, "learning_rate": 2.091107626553933e-06, "loss": 0.014375782012939453, "step": 97240 }, { "epoch": 0.9152470588235294, "grad_norm": 0.34017677964178533, "learning_rate": 2.091053866565993e-06, "loss": 0.013991162180900574, "step": 97245 }, { "epoch": 0.9152941176470588, "grad_norm": 1.8275035246202056, "learning_rate": 2.0910001107241634e-06, "loss": 0.012782944738864899, "step": 97250 }, { "epoch": 0.9153411764705882, "grad_norm": 0.5123941513825846, "learning_rate": 2.0909463590279113e-06, "loss": 0.013751837611198425, "step": 97255 }, { "epoch": 0.9153882352941176, "grad_norm": 0.5220090652529955, "learning_rate": 2.0908926114767044e-06, "loss": 0.014163029193878175, "step": 97260 }, { "epoch": 0.915435294117647, "grad_norm": 0.4681587830546731, "learning_rate": 2.090838868070009e-06, "loss": 0.011162122339010238, "step": 97265 }, { "epoch": 0.9154823529411765, "grad_norm": 0.5246436670345873, "learning_rate": 2.090785128807293e-06, "loss": 0.015193916857242584, "step": 97270 }, { "epoch": 0.9155294117647059, "grad_norm": 0.8204117816530837, "learning_rate": 2.0907313936880237e-06, "loss": 0.016279080510139467, "step": 97275 }, { "epoch": 0.9155764705882353, "grad_norm": 0.38917747277698517, "learning_rate": 2.0906776627116693e-06, "loss": 0.012367446720600129, "step": 97280 }, { "epoch": 0.9156235294117647, "grad_norm": 0.5247386696071799, "learning_rate": 2.0906239358776964e-06, "loss": 0.015428054332733154, "step": 97285 }, { "epoch": 0.9156705882352941, "grad_norm": 0.4021313644796936, "learning_rate": 2.090570213185574e-06, "loss": 0.013535350561141968, "step": 97290 }, { "epoch": 0.9157176470588235, "grad_norm": 0.4049962335914387, "learning_rate": 2.090516494634769e-06, "loss": 0.012075239419937133, "step": 97295 }, { "epoch": 0.9157647058823529, "grad_norm": 0.4588198631296687, "learning_rate": 2.09046278022475e-06, "loss": 0.01459420621395111, "step": 97300 }, { "epoch": 0.9158117647058823, "grad_norm": 0.4997270527101421, "learning_rate": 2.090409069954984e-06, "loss": 0.011351752281188964, "step": 97305 }, { "epoch": 0.9158588235294117, "grad_norm": 0.5071562063115482, "learning_rate": 2.090355363824941e-06, "loss": 0.013566146790981292, "step": 97310 }, { "epoch": 0.9159058823529411, "grad_norm": 0.3008338358065274, "learning_rate": 2.0903016618340874e-06, "loss": 0.01113331988453865, "step": 97315 }, { "epoch": 0.9159529411764706, "grad_norm": 0.5252377026737767, "learning_rate": 2.090247963981892e-06, "loss": 0.013027378916740417, "step": 97320 }, { "epoch": 0.916, "grad_norm": 0.32686965056032236, "learning_rate": 2.090194270267824e-06, "loss": 0.014094576239585876, "step": 97325 }, { "epoch": 0.9160470588235294, "grad_norm": 0.41188569501085315, "learning_rate": 2.0901405806913516e-06, "loss": 0.014071720838546752, "step": 97330 }, { "epoch": 0.9160941176470588, "grad_norm": 0.3359928427022503, "learning_rate": 2.090086895251943e-06, "loss": 0.014630374312400819, "step": 97335 }, { "epoch": 0.9161411764705882, "grad_norm": 0.5746952589488667, "learning_rate": 2.0900332139490674e-06, "loss": 0.0124480701982975, "step": 97340 }, { "epoch": 0.9161882352941176, "grad_norm": 0.4640675368500913, "learning_rate": 2.089979536782193e-06, "loss": 0.013618256151676177, "step": 97345 }, { "epoch": 0.916235294117647, "grad_norm": 0.5025495640805496, "learning_rate": 2.0899258637507897e-06, "loss": 0.011340951919555664, "step": 97350 }, { "epoch": 0.9162823529411764, "grad_norm": 0.6320989424624919, "learning_rate": 2.0898721948543257e-06, "loss": 0.013587242364883423, "step": 97355 }, { "epoch": 0.9163294117647058, "grad_norm": 0.38395270803421083, "learning_rate": 2.0898185300922707e-06, "loss": 0.014798429608345032, "step": 97360 }, { "epoch": 0.9163764705882353, "grad_norm": 0.45022762644113423, "learning_rate": 2.0897648694640926e-06, "loss": 0.01362280547618866, "step": 97365 }, { "epoch": 0.9164235294117647, "grad_norm": 0.31080456727928296, "learning_rate": 2.0897112129692624e-06, "loss": 0.014659164845943451, "step": 97370 }, { "epoch": 0.9164705882352941, "grad_norm": 0.4584407614632845, "learning_rate": 2.089657560607249e-06, "loss": 0.01528947353363037, "step": 97375 }, { "epoch": 0.9165176470588235, "grad_norm": 0.3839546348672084, "learning_rate": 2.089603912377521e-06, "loss": 0.018366819620132445, "step": 97380 }, { "epoch": 0.916564705882353, "grad_norm": 0.6868787622168328, "learning_rate": 2.0895502682795483e-06, "loss": 0.013274961709976196, "step": 97385 }, { "epoch": 0.9166117647058823, "grad_norm": 0.5207399655108055, "learning_rate": 2.0894966283128013e-06, "loss": 0.018103581666946412, "step": 97390 }, { "epoch": 0.9166588235294117, "grad_norm": 0.40199935994439995, "learning_rate": 2.0894429924767495e-06, "loss": 0.01775751858949661, "step": 97395 }, { "epoch": 0.9167058823529411, "grad_norm": 0.3829306918003091, "learning_rate": 2.0893893607708625e-06, "loss": 0.0129481241106987, "step": 97400 }, { "epoch": 0.9167529411764705, "grad_norm": 0.9190468121058849, "learning_rate": 2.089335733194611e-06, "loss": 0.024191676080226897, "step": 97405 }, { "epoch": 0.9168, "grad_norm": 0.3956149820897792, "learning_rate": 2.0892821097474632e-06, "loss": 0.011939164996147156, "step": 97410 }, { "epoch": 0.9168470588235295, "grad_norm": 0.4627809147968247, "learning_rate": 2.0892284904288914e-06, "loss": 0.010173899680376053, "step": 97415 }, { "epoch": 0.9168941176470589, "grad_norm": 0.5308590552904683, "learning_rate": 2.0891748752383647e-06, "loss": 0.01438445895910263, "step": 97420 }, { "epoch": 0.9169411764705883, "grad_norm": 0.5760682876772514, "learning_rate": 2.0891212641753537e-06, "loss": 0.01489778608083725, "step": 97425 }, { "epoch": 0.9169882352941177, "grad_norm": 0.6902075595991458, "learning_rate": 2.0890676572393287e-06, "loss": 0.013202826678752898, "step": 97430 }, { "epoch": 0.9170352941176471, "grad_norm": 0.31629123522179026, "learning_rate": 2.0890140544297606e-06, "loss": 0.012915651500225066, "step": 97435 }, { "epoch": 0.9170823529411765, "grad_norm": 0.4412357029537256, "learning_rate": 2.0889604557461196e-06, "loss": 0.015967321395874024, "step": 97440 }, { "epoch": 0.9171294117647059, "grad_norm": 0.5994255614527417, "learning_rate": 2.0889068611878764e-06, "loss": 0.015022349357604981, "step": 97445 }, { "epoch": 0.9171764705882353, "grad_norm": 0.4145181012153822, "learning_rate": 2.0888532707545024e-06, "loss": 0.011471766233444213, "step": 97450 }, { "epoch": 0.9172235294117647, "grad_norm": 0.44401676474245744, "learning_rate": 2.0887996844454685e-06, "loss": 0.011645806580781936, "step": 97455 }, { "epoch": 0.9172705882352942, "grad_norm": 0.6045773223923482, "learning_rate": 2.0887461022602447e-06, "loss": 0.015364348888397217, "step": 97460 }, { "epoch": 0.9173176470588236, "grad_norm": 0.32670337157413876, "learning_rate": 2.0886925241983027e-06, "loss": 0.013429823517799377, "step": 97465 }, { "epoch": 0.917364705882353, "grad_norm": 0.4738370104230808, "learning_rate": 2.0886389502591144e-06, "loss": 0.015497273206710816, "step": 97470 }, { "epoch": 0.9174117647058824, "grad_norm": 0.48824414167996805, "learning_rate": 2.08858538044215e-06, "loss": 0.015556775033473969, "step": 97475 }, { "epoch": 0.9174588235294118, "grad_norm": 0.5281593930655519, "learning_rate": 2.0885318147468815e-06, "loss": 0.011311764270067215, "step": 97480 }, { "epoch": 0.9175058823529412, "grad_norm": 0.40187393312461783, "learning_rate": 2.08847825317278e-06, "loss": 0.013568675518035889, "step": 97485 }, { "epoch": 0.9175529411764706, "grad_norm": 0.48454052356564126, "learning_rate": 2.088424695719318e-06, "loss": 0.012338197976350784, "step": 97490 }, { "epoch": 0.9176, "grad_norm": 0.4757363074843081, "learning_rate": 2.0883711423859657e-06, "loss": 0.01158691942691803, "step": 97495 }, { "epoch": 0.9176470588235294, "grad_norm": 0.4586078604738625, "learning_rate": 2.0883175931721956e-06, "loss": 0.01195637732744217, "step": 97500 }, { "epoch": 0.9176941176470588, "grad_norm": 0.3764076492648725, "learning_rate": 2.0882640480774802e-06, "loss": 0.011694996803998946, "step": 97505 }, { "epoch": 0.9177411764705883, "grad_norm": 0.4310816514362204, "learning_rate": 2.088210507101291e-06, "loss": 0.012231582403182983, "step": 97510 }, { "epoch": 0.9177882352941177, "grad_norm": 0.4632021366360524, "learning_rate": 2.0881569702430995e-06, "loss": 0.018139708042144775, "step": 97515 }, { "epoch": 0.9178352941176471, "grad_norm": 0.3174991587372944, "learning_rate": 2.088103437502378e-06, "loss": 0.011714887619018555, "step": 97520 }, { "epoch": 0.9178823529411765, "grad_norm": 0.39283249762884503, "learning_rate": 2.0880499088785993e-06, "loss": 0.010679499059915543, "step": 97525 }, { "epoch": 0.9179294117647059, "grad_norm": 0.466329069504008, "learning_rate": 2.0879963843712352e-06, "loss": 0.013922634720802306, "step": 97530 }, { "epoch": 0.9179764705882353, "grad_norm": 0.36558694835547567, "learning_rate": 2.0879428639797585e-06, "loss": 0.011530381441116334, "step": 97535 }, { "epoch": 0.9180235294117647, "grad_norm": 0.39144740832217123, "learning_rate": 2.087889347703642e-06, "loss": 0.012558519840240479, "step": 97540 }, { "epoch": 0.9180705882352941, "grad_norm": 0.884180099952211, "learning_rate": 2.0878358355423575e-06, "loss": 0.017894968390464783, "step": 97545 }, { "epoch": 0.9181176470588235, "grad_norm": 0.5950550971983726, "learning_rate": 2.0877823274953784e-06, "loss": 0.012224052101373672, "step": 97550 }, { "epoch": 0.918164705882353, "grad_norm": 0.5219626096300072, "learning_rate": 2.0877288235621774e-06, "loss": 0.017287030816078186, "step": 97555 }, { "epoch": 0.9182117647058824, "grad_norm": 0.2327567134869209, "learning_rate": 2.087675323742227e-06, "loss": 0.013357110321521759, "step": 97560 }, { "epoch": 0.9182588235294118, "grad_norm": 0.4730837432134306, "learning_rate": 2.0876218280350004e-06, "loss": 0.013579940795898438, "step": 97565 }, { "epoch": 0.9183058823529412, "grad_norm": 0.37351997579204915, "learning_rate": 2.0875683364399706e-06, "loss": 0.010401035100221634, "step": 97570 }, { "epoch": 0.9183529411764706, "grad_norm": 0.34274563419738613, "learning_rate": 2.0875148489566115e-06, "loss": 0.01206858828663826, "step": 97575 }, { "epoch": 0.9184, "grad_norm": 0.4384730256386049, "learning_rate": 2.0874613655843954e-06, "loss": 0.014436626434326172, "step": 97580 }, { "epoch": 0.9184470588235294, "grad_norm": 0.3491695918708908, "learning_rate": 2.0874078863227968e-06, "loss": 0.012433794140815736, "step": 97585 }, { "epoch": 0.9184941176470588, "grad_norm": 0.6855149262723859, "learning_rate": 2.0873544111712875e-06, "loss": 0.011917833983898164, "step": 97590 }, { "epoch": 0.9185411764705882, "grad_norm": 0.46375594789838753, "learning_rate": 2.0873009401293423e-06, "loss": 0.015240296721458435, "step": 97595 }, { "epoch": 0.9185882352941176, "grad_norm": 0.4207148490648714, "learning_rate": 2.087247473196435e-06, "loss": 0.013033623993396758, "step": 97600 }, { "epoch": 0.9186352941176471, "grad_norm": 0.44948820550890545, "learning_rate": 2.0871940103720387e-06, "loss": 0.011741171777248382, "step": 97605 }, { "epoch": 0.9186823529411765, "grad_norm": 0.5675028100603391, "learning_rate": 2.087140551655627e-06, "loss": 0.014977535605430603, "step": 97610 }, { "epoch": 0.9187294117647059, "grad_norm": 0.44991132442370807, "learning_rate": 2.0870870970466753e-06, "loss": 0.01497509479522705, "step": 97615 }, { "epoch": 0.9187764705882353, "grad_norm": 0.3552514565832055, "learning_rate": 2.087033646544656e-06, "loss": 0.011757971346378326, "step": 97620 }, { "epoch": 0.9188235294117647, "grad_norm": 0.3670693355589061, "learning_rate": 2.0869802001490446e-06, "loss": 0.013688747584819794, "step": 97625 }, { "epoch": 0.9188705882352941, "grad_norm": 0.48414836799379807, "learning_rate": 2.086926757859314e-06, "loss": 0.014542236924171448, "step": 97630 }, { "epoch": 0.9189176470588235, "grad_norm": 0.30157659016573624, "learning_rate": 2.086873319674939e-06, "loss": 0.010200780630111695, "step": 97635 }, { "epoch": 0.9189647058823529, "grad_norm": 0.6922271418903146, "learning_rate": 2.0868198855953944e-06, "loss": 0.01254393607378006, "step": 97640 }, { "epoch": 0.9190117647058823, "grad_norm": 0.4296779417211064, "learning_rate": 2.086766455620155e-06, "loss": 0.012133067846298218, "step": 97645 }, { "epoch": 0.9190588235294118, "grad_norm": 0.4448806272877358, "learning_rate": 2.086713029748694e-06, "loss": 0.018867984414100647, "step": 97650 }, { "epoch": 0.9191058823529412, "grad_norm": 0.5285008456809469, "learning_rate": 2.086659607980488e-06, "loss": 0.011231730878353118, "step": 97655 }, { "epoch": 0.9191529411764706, "grad_norm": 0.32777477861505955, "learning_rate": 2.08660619031501e-06, "loss": 0.016765618324279787, "step": 97660 }, { "epoch": 0.9192, "grad_norm": 0.4716986481590372, "learning_rate": 2.086552776751736e-06, "loss": 0.012369771301746369, "step": 97665 }, { "epoch": 0.9192470588235294, "grad_norm": 0.611364549998517, "learning_rate": 2.0864993672901407e-06, "loss": 0.017847752571105956, "step": 97670 }, { "epoch": 0.9192941176470588, "grad_norm": 0.552610698819512, "learning_rate": 2.086445961929699e-06, "loss": 0.014792931079864503, "step": 97675 }, { "epoch": 0.9193411764705882, "grad_norm": 0.3645746552942021, "learning_rate": 2.0863925606698863e-06, "loss": 0.01442277729511261, "step": 97680 }, { "epoch": 0.9193882352941176, "grad_norm": 0.3282991977944974, "learning_rate": 2.086339163510178e-06, "loss": 0.014931762218475341, "step": 97685 }, { "epoch": 0.919435294117647, "grad_norm": 0.6157068865968145, "learning_rate": 2.086285770450049e-06, "loss": 0.016557949781417846, "step": 97690 }, { "epoch": 0.9194823529411764, "grad_norm": 0.4145332774458032, "learning_rate": 2.0862323814889744e-06, "loss": 0.01797274351119995, "step": 97695 }, { "epoch": 0.9195294117647059, "grad_norm": 0.48022106414162075, "learning_rate": 2.0861789966264305e-06, "loss": 0.013268564641475678, "step": 97700 }, { "epoch": 0.9195764705882353, "grad_norm": 0.5239182189850665, "learning_rate": 2.086125615861893e-06, "loss": 0.013126800954341888, "step": 97705 }, { "epoch": 0.9196235294117647, "grad_norm": 0.19556018919221183, "learning_rate": 2.0860722391948375e-06, "loss": 0.011965010315179825, "step": 97710 }, { "epoch": 0.9196705882352941, "grad_norm": 0.43631029440913244, "learning_rate": 2.08601886662474e-06, "loss": 0.0115689218044281, "step": 97715 }, { "epoch": 0.9197176470588235, "grad_norm": 0.5644643439393501, "learning_rate": 2.085965498151075e-06, "loss": 0.014332598447799683, "step": 97720 }, { "epoch": 0.9197647058823529, "grad_norm": 0.5339581438055423, "learning_rate": 2.0859121337733204e-06, "loss": 0.016232378780841827, "step": 97725 }, { "epoch": 0.9198117647058823, "grad_norm": 0.5047377125583423, "learning_rate": 2.0858587734909514e-06, "loss": 0.016495582461357117, "step": 97730 }, { "epoch": 0.9198588235294117, "grad_norm": 0.42922944595123236, "learning_rate": 2.0858054173034445e-06, "loss": 0.013655826449394226, "step": 97735 }, { "epoch": 0.9199058823529411, "grad_norm": 0.5416661950416507, "learning_rate": 2.0857520652102756e-06, "loss": 0.013026179373264312, "step": 97740 }, { "epoch": 0.9199529411764706, "grad_norm": 0.46634278597215556, "learning_rate": 2.085698717210921e-06, "loss": 0.01290917992591858, "step": 97745 }, { "epoch": 0.92, "grad_norm": 0.4793248856253919, "learning_rate": 2.0856453733048583e-06, "loss": 0.014191001653671265, "step": 97750 }, { "epoch": 0.9200470588235294, "grad_norm": 0.4264718446965861, "learning_rate": 2.0855920334915626e-06, "loss": 0.01357300728559494, "step": 97755 }, { "epoch": 0.9200941176470588, "grad_norm": 0.400754953399797, "learning_rate": 2.085538697770512e-06, "loss": 0.013046570122241974, "step": 97760 }, { "epoch": 0.9201411764705882, "grad_norm": 0.5750569734406642, "learning_rate": 2.085485366141182e-06, "loss": 0.01604023277759552, "step": 97765 }, { "epoch": 0.9201882352941176, "grad_norm": 0.5906740670201752, "learning_rate": 2.08543203860305e-06, "loss": 0.011674032360315324, "step": 97770 }, { "epoch": 0.920235294117647, "grad_norm": 0.44198682112214677, "learning_rate": 2.085378715155593e-06, "loss": 0.013365039229393005, "step": 97775 }, { "epoch": 0.9202823529411764, "grad_norm": 0.34802270688738196, "learning_rate": 2.0853253957982875e-06, "loss": 0.015522390604019165, "step": 97780 }, { "epoch": 0.9203294117647058, "grad_norm": 0.5836568641447059, "learning_rate": 2.0852720805306116e-06, "loss": 0.014815038442611695, "step": 97785 }, { "epoch": 0.9203764705882352, "grad_norm": 0.3602456497944678, "learning_rate": 2.085218769352042e-06, "loss": 0.014601749181747437, "step": 97790 }, { "epoch": 0.9204235294117648, "grad_norm": 0.4700006922524439, "learning_rate": 2.085165462262056e-06, "loss": 0.013797043263912201, "step": 97795 }, { "epoch": 0.9204705882352942, "grad_norm": 0.620854009738144, "learning_rate": 2.085112159260131e-06, "loss": 0.018043944239616395, "step": 97800 }, { "epoch": 0.9205176470588236, "grad_norm": 0.30463341763545365, "learning_rate": 2.085058860345745e-06, "loss": 0.013181623816490174, "step": 97805 }, { "epoch": 0.920564705882353, "grad_norm": 0.3171334446813008, "learning_rate": 2.0850055655183745e-06, "loss": 0.01461244821548462, "step": 97810 }, { "epoch": 0.9206117647058824, "grad_norm": 0.45874033615817117, "learning_rate": 2.084952274777498e-06, "loss": 0.015267904102802276, "step": 97815 }, { "epoch": 0.9206588235294118, "grad_norm": 0.31826423600848447, "learning_rate": 2.0848989881225935e-06, "loss": 0.013452562689781188, "step": 97820 }, { "epoch": 0.9207058823529412, "grad_norm": 0.4661826762661897, "learning_rate": 2.0848457055531384e-06, "loss": 0.013776399195194244, "step": 97825 }, { "epoch": 0.9207529411764706, "grad_norm": 0.6370898258426988, "learning_rate": 2.084792427068611e-06, "loss": 0.016153639554977416, "step": 97830 }, { "epoch": 0.9208, "grad_norm": 0.4219870909274532, "learning_rate": 2.084739152668489e-06, "loss": 0.012574560940265656, "step": 97835 }, { "epoch": 0.9208470588235295, "grad_norm": 0.5091714318172953, "learning_rate": 2.0846858823522513e-06, "loss": 0.01586863398551941, "step": 97840 }, { "epoch": 0.9208941176470589, "grad_norm": 0.5313825878040125, "learning_rate": 2.0846326161193745e-06, "loss": 0.012862595915794372, "step": 97845 }, { "epoch": 0.9209411764705883, "grad_norm": 0.36819663506695915, "learning_rate": 2.0845793539693397e-06, "loss": 0.012216213345527648, "step": 97850 }, { "epoch": 0.9209882352941177, "grad_norm": 0.47681661780555745, "learning_rate": 2.0845260959016225e-06, "loss": 0.015671069920063018, "step": 97855 }, { "epoch": 0.9210352941176471, "grad_norm": 0.6271260552902291, "learning_rate": 2.0844728419157034e-06, "loss": 0.018518921732902528, "step": 97860 }, { "epoch": 0.9210823529411765, "grad_norm": 0.557147166214699, "learning_rate": 2.08441959201106e-06, "loss": 0.015053936839103698, "step": 97865 }, { "epoch": 0.9211294117647059, "grad_norm": 0.6209585218700712, "learning_rate": 2.084366346187171e-06, "loss": 0.01523374319076538, "step": 97870 }, { "epoch": 0.9211764705882353, "grad_norm": 0.46009210308522963, "learning_rate": 2.084313104443516e-06, "loss": 0.014340737462043762, "step": 97875 }, { "epoch": 0.9212235294117647, "grad_norm": 0.2604913852099098, "learning_rate": 2.084259866779573e-06, "loss": 0.012454457581043243, "step": 97880 }, { "epoch": 0.9212705882352941, "grad_norm": 0.44318311495876017, "learning_rate": 2.0842066331948224e-06, "loss": 0.016870751976966858, "step": 97885 }, { "epoch": 0.9213176470588236, "grad_norm": 0.4146959068358888, "learning_rate": 2.084153403688742e-06, "loss": 0.01143236756324768, "step": 97890 }, { "epoch": 0.921364705882353, "grad_norm": 0.6842479721260531, "learning_rate": 2.084100178260811e-06, "loss": 0.014296033978462219, "step": 97895 }, { "epoch": 0.9214117647058824, "grad_norm": 0.3972954403694137, "learning_rate": 2.084046956910509e-06, "loss": 0.011041422188282014, "step": 97900 }, { "epoch": 0.9214588235294118, "grad_norm": 0.43168206543211946, "learning_rate": 2.0839937396373158e-06, "loss": 0.013383099436759948, "step": 97905 }, { "epoch": 0.9215058823529412, "grad_norm": 0.36999130273834824, "learning_rate": 2.0839405264407107e-06, "loss": 0.01584697365760803, "step": 97910 }, { "epoch": 0.9215529411764706, "grad_norm": 0.5057490376523559, "learning_rate": 2.0838873173201722e-06, "loss": 0.01146000474691391, "step": 97915 }, { "epoch": 0.9216, "grad_norm": 0.8441275613685056, "learning_rate": 2.083834112275182e-06, "loss": 0.021389737725257874, "step": 97920 }, { "epoch": 0.9216470588235294, "grad_norm": 0.2559552682390762, "learning_rate": 2.083780911305218e-06, "loss": 0.015419179201126098, "step": 97925 }, { "epoch": 0.9216941176470588, "grad_norm": 0.5154008489399029, "learning_rate": 2.0837277144097607e-06, "loss": 0.01564970314502716, "step": 97930 }, { "epoch": 0.9217411764705883, "grad_norm": 0.37367598642274535, "learning_rate": 2.08367452158829e-06, "loss": 0.012629285454750061, "step": 97935 }, { "epoch": 0.9217882352941177, "grad_norm": 0.4911351504529603, "learning_rate": 2.0836213328402865e-06, "loss": 0.015678048133850098, "step": 97940 }, { "epoch": 0.9218352941176471, "grad_norm": 0.6301912171066623, "learning_rate": 2.083568148165229e-06, "loss": 0.01900007128715515, "step": 97945 }, { "epoch": 0.9218823529411765, "grad_norm": 0.34771008586540236, "learning_rate": 2.083514967562599e-06, "loss": 0.01681162863969803, "step": 97950 }, { "epoch": 0.9219294117647059, "grad_norm": 0.7909734898874694, "learning_rate": 2.083461791031876e-06, "loss": 0.01575890779495239, "step": 97955 }, { "epoch": 0.9219764705882353, "grad_norm": 0.5387285426594738, "learning_rate": 2.0834086185725413e-06, "loss": 0.021578474342823027, "step": 97960 }, { "epoch": 0.9220235294117647, "grad_norm": 0.33743133685175314, "learning_rate": 2.083355450184074e-06, "loss": 0.012831228971481323, "step": 97965 }, { "epoch": 0.9220705882352941, "grad_norm": 0.4850614267275938, "learning_rate": 2.0833022858659562e-06, "loss": 0.012357810884714127, "step": 97970 }, { "epoch": 0.9221176470588235, "grad_norm": 0.2752449389382708, "learning_rate": 2.0832491256176675e-06, "loss": 0.011423146724700928, "step": 97975 }, { "epoch": 0.9221647058823529, "grad_norm": 0.3631408861154853, "learning_rate": 2.0831959694386893e-06, "loss": 0.013776668906211853, "step": 97980 }, { "epoch": 0.9222117647058824, "grad_norm": 0.5661046845597266, "learning_rate": 2.0831428173285025e-06, "loss": 0.014498093724250793, "step": 97985 }, { "epoch": 0.9222588235294118, "grad_norm": 0.3441268771583355, "learning_rate": 2.083089669286587e-06, "loss": 0.010879933089017867, "step": 97990 }, { "epoch": 0.9223058823529412, "grad_norm": 0.33785547466439564, "learning_rate": 2.0830365253124248e-06, "loss": 0.012284774333238602, "step": 97995 }, { "epoch": 0.9223529411764706, "grad_norm": 0.3833186301485067, "learning_rate": 2.0829833854054975e-06, "loss": 0.015620481967926026, "step": 98000 }, { "epoch": 0.9224, "grad_norm": 0.3056591078624915, "learning_rate": 2.0829302495652847e-06, "loss": 0.014110089838504791, "step": 98005 }, { "epoch": 0.9224470588235294, "grad_norm": 0.6225897604313216, "learning_rate": 2.0828771177912697e-06, "loss": 0.01531522423028946, "step": 98010 }, { "epoch": 0.9224941176470588, "grad_norm": 0.8155133796414553, "learning_rate": 2.0828239900829323e-06, "loss": 0.016027995944023134, "step": 98015 }, { "epoch": 0.9225411764705882, "grad_norm": 0.5939402850282974, "learning_rate": 2.082770866439755e-06, "loss": 0.012876254320144654, "step": 98020 }, { "epoch": 0.9225882352941176, "grad_norm": 0.28441821343370355, "learning_rate": 2.082717746861219e-06, "loss": 0.013460145890712738, "step": 98025 }, { "epoch": 0.9226352941176471, "grad_norm": 0.29000113867617067, "learning_rate": 2.082664631346806e-06, "loss": 0.012494415044784546, "step": 98030 }, { "epoch": 0.9226823529411765, "grad_norm": 0.5361848256306471, "learning_rate": 2.0826115198959975e-06, "loss": 0.013843733072280883, "step": 98035 }, { "epoch": 0.9227294117647059, "grad_norm": 0.3891312562485772, "learning_rate": 2.082558412508276e-06, "loss": 0.011034739017486573, "step": 98040 }, { "epoch": 0.9227764705882353, "grad_norm": 0.7718047962285687, "learning_rate": 2.0825053091831235e-06, "loss": 0.01571595072746277, "step": 98045 }, { "epoch": 0.9228235294117647, "grad_norm": 0.7688489915843426, "learning_rate": 2.082452209920022e-06, "loss": 0.014971157908439637, "step": 98050 }, { "epoch": 0.9228705882352941, "grad_norm": 0.3546144223340724, "learning_rate": 2.0823991147184526e-06, "loss": 0.015187953412532807, "step": 98055 }, { "epoch": 0.9229176470588235, "grad_norm": 0.5920118233085031, "learning_rate": 2.0823460235778985e-06, "loss": 0.013302105665206908, "step": 98060 }, { "epoch": 0.9229647058823529, "grad_norm": 0.6385642688400838, "learning_rate": 2.0822929364978425e-06, "loss": 0.014495216310024261, "step": 98065 }, { "epoch": 0.9230117647058823, "grad_norm": 0.49268225107192043, "learning_rate": 2.082239853477766e-06, "loss": 0.01721073091030121, "step": 98070 }, { "epoch": 0.9230588235294117, "grad_norm": 0.38852058634395764, "learning_rate": 2.0821867745171524e-06, "loss": 0.013035182654857636, "step": 98075 }, { "epoch": 0.9231058823529412, "grad_norm": 0.4530918232684489, "learning_rate": 2.0821336996154833e-06, "loss": 0.01643632650375366, "step": 98080 }, { "epoch": 0.9231529411764706, "grad_norm": 0.30306942601369724, "learning_rate": 2.082080628772243e-06, "loss": 0.010453255474567413, "step": 98085 }, { "epoch": 0.9232, "grad_norm": 0.5147202339511155, "learning_rate": 2.0820275619869125e-06, "loss": 0.01444825977087021, "step": 98090 }, { "epoch": 0.9232470588235294, "grad_norm": 0.4741208822520424, "learning_rate": 2.081974499258976e-06, "loss": 0.012522532045841217, "step": 98095 }, { "epoch": 0.9232941176470588, "grad_norm": 0.5393043067607356, "learning_rate": 2.0819214405879156e-06, "loss": 0.013967129588127136, "step": 98100 }, { "epoch": 0.9233411764705882, "grad_norm": 0.5143569305155847, "learning_rate": 2.081868385973215e-06, "loss": 0.01349174529314041, "step": 98105 }, { "epoch": 0.9233882352941176, "grad_norm": 0.4546899944348192, "learning_rate": 2.081815335414357e-06, "loss": 0.018905201554298402, "step": 98110 }, { "epoch": 0.923435294117647, "grad_norm": 0.7847055563388238, "learning_rate": 2.0817622889108257e-06, "loss": 0.02248292863368988, "step": 98115 }, { "epoch": 0.9234823529411764, "grad_norm": 0.38872678643189434, "learning_rate": 2.0817092464621033e-06, "loss": 0.013770616054534912, "step": 98120 }, { "epoch": 0.9235294117647059, "grad_norm": 0.46624735013292584, "learning_rate": 2.081656208067674e-06, "loss": 0.014539310336112976, "step": 98125 }, { "epoch": 0.9235764705882353, "grad_norm": 0.34301051446082803, "learning_rate": 2.0816031737270207e-06, "loss": 0.012716385722160339, "step": 98130 }, { "epoch": 0.9236235294117647, "grad_norm": 0.43828756250571177, "learning_rate": 2.081550143439628e-06, "loss": 0.016001603007316588, "step": 98135 }, { "epoch": 0.9236705882352941, "grad_norm": 0.6355291961816846, "learning_rate": 2.0814971172049785e-06, "loss": 0.014789654314517975, "step": 98140 }, { "epoch": 0.9237176470588235, "grad_norm": 0.4008207454956828, "learning_rate": 2.081444095022557e-06, "loss": 0.016897508502006532, "step": 98145 }, { "epoch": 0.9237647058823529, "grad_norm": 0.6792284353518018, "learning_rate": 2.0813910768918473e-06, "loss": 0.012305858731269836, "step": 98150 }, { "epoch": 0.9238117647058823, "grad_norm": 1.0647421387149651, "learning_rate": 2.0813380628123326e-06, "loss": 0.015760067105293273, "step": 98155 }, { "epoch": 0.9238588235294117, "grad_norm": 0.4562517221598338, "learning_rate": 2.0812850527834973e-06, "loss": 0.018094301223754883, "step": 98160 }, { "epoch": 0.9239058823529411, "grad_norm": 0.34203767475910973, "learning_rate": 2.0812320468048262e-06, "loss": 0.016252589225769044, "step": 98165 }, { "epoch": 0.9239529411764706, "grad_norm": 0.5070250299036073, "learning_rate": 2.0811790448758036e-06, "loss": 0.016838541626930235, "step": 98170 }, { "epoch": 0.924, "grad_norm": 0.4922513219947799, "learning_rate": 2.0811260469959122e-06, "loss": 0.0155682310461998, "step": 98175 }, { "epoch": 0.9240470588235294, "grad_norm": 0.5476961797307448, "learning_rate": 2.081073053164639e-06, "loss": 0.015342810750007629, "step": 98180 }, { "epoch": 0.9240941176470588, "grad_norm": 0.3645243765105848, "learning_rate": 2.081020063381467e-06, "loss": 0.015378600358963013, "step": 98185 }, { "epoch": 0.9241411764705882, "grad_norm": 0.6726799374975394, "learning_rate": 2.080967077645881e-06, "loss": 0.016412380337715148, "step": 98190 }, { "epoch": 0.9241882352941176, "grad_norm": 0.5444793971373396, "learning_rate": 2.080914095957365e-06, "loss": 0.015799501538276674, "step": 98195 }, { "epoch": 0.924235294117647, "grad_norm": 0.5686001541155735, "learning_rate": 2.080861118315406e-06, "loss": 0.014364862442016601, "step": 98200 }, { "epoch": 0.9242823529411764, "grad_norm": 0.5702719972279456, "learning_rate": 2.0808081447194867e-06, "loss": 0.016552339494228362, "step": 98205 }, { "epoch": 0.9243294117647058, "grad_norm": 0.45713087482205433, "learning_rate": 2.0807551751690934e-06, "loss": 0.019873498380184172, "step": 98210 }, { "epoch": 0.9243764705882352, "grad_norm": 0.23202373244005106, "learning_rate": 2.080702209663711e-06, "loss": 0.01024685800075531, "step": 98215 }, { "epoch": 0.9244235294117648, "grad_norm": 0.3977179670027719, "learning_rate": 2.080649248202824e-06, "loss": 0.011404784023761749, "step": 98220 }, { "epoch": 0.9244705882352942, "grad_norm": 0.49145600370643105, "learning_rate": 2.0805962907859186e-06, "loss": 0.011345426738262176, "step": 98225 }, { "epoch": 0.9245176470588236, "grad_norm": 0.48493162473210966, "learning_rate": 2.08054333741248e-06, "loss": 0.01634376645088196, "step": 98230 }, { "epoch": 0.924564705882353, "grad_norm": 0.5207821126829846, "learning_rate": 2.0804903880819933e-06, "loss": 0.012318591773509979, "step": 98235 }, { "epoch": 0.9246117647058824, "grad_norm": 0.498388980632838, "learning_rate": 2.080437442793944e-06, "loss": 0.011511899530887604, "step": 98240 }, { "epoch": 0.9246588235294118, "grad_norm": 0.4285146924194188, "learning_rate": 2.080384501547818e-06, "loss": 0.012766608595848083, "step": 98245 }, { "epoch": 0.9247058823529412, "grad_norm": 0.8341225057072945, "learning_rate": 2.0803315643431012e-06, "loss": 0.019688668847084045, "step": 98250 }, { "epoch": 0.9247529411764706, "grad_norm": 0.4634156889714866, "learning_rate": 2.0802786311792795e-06, "loss": 0.012037580460309982, "step": 98255 }, { "epoch": 0.9248, "grad_norm": 0.4709513439545636, "learning_rate": 2.080225702055839e-06, "loss": 0.018534407019615173, "step": 98260 }, { "epoch": 0.9248470588235295, "grad_norm": 0.5841999094208054, "learning_rate": 2.080172776972264e-06, "loss": 0.012596455216407777, "step": 98265 }, { "epoch": 0.9248941176470589, "grad_norm": 0.319285942630411, "learning_rate": 2.080119855928043e-06, "loss": 0.014873111248016357, "step": 98270 }, { "epoch": 0.9249411764705883, "grad_norm": 0.614272615484045, "learning_rate": 2.0800669389226606e-06, "loss": 0.014726093411445618, "step": 98275 }, { "epoch": 0.9249882352941177, "grad_norm": 0.5181612142891541, "learning_rate": 2.0800140259556043e-06, "loss": 0.015271610021591187, "step": 98280 }, { "epoch": 0.9250352941176471, "grad_norm": 0.5310552465224748, "learning_rate": 2.0799611170263595e-06, "loss": 0.013536110520362854, "step": 98285 }, { "epoch": 0.9250823529411765, "grad_norm": 0.5492106511130845, "learning_rate": 2.079908212134413e-06, "loss": 0.014493641257286072, "step": 98290 }, { "epoch": 0.9251294117647059, "grad_norm": 0.3225652599357797, "learning_rate": 2.079855311279252e-06, "loss": 0.01277088075876236, "step": 98295 }, { "epoch": 0.9251764705882353, "grad_norm": 0.40429446454401435, "learning_rate": 2.0798024144603616e-06, "loss": 0.014109091460704803, "step": 98300 }, { "epoch": 0.9252235294117647, "grad_norm": 0.7748412563956529, "learning_rate": 2.0797495216772297e-06, "loss": 0.01940332055091858, "step": 98305 }, { "epoch": 0.9252705882352941, "grad_norm": 0.2889360007878436, "learning_rate": 2.0796966329293436e-06, "loss": 0.011702709645032883, "step": 98310 }, { "epoch": 0.9253176470588236, "grad_norm": 0.5097884645892238, "learning_rate": 2.0796437482161893e-06, "loss": 0.01078137308359146, "step": 98315 }, { "epoch": 0.925364705882353, "grad_norm": 0.4334596788328675, "learning_rate": 2.0795908675372543e-06, "loss": 0.012299223244190216, "step": 98320 }, { "epoch": 0.9254117647058824, "grad_norm": 0.4997040183752921, "learning_rate": 2.0795379908920253e-06, "loss": 0.012371832132339477, "step": 98325 }, { "epoch": 0.9254588235294118, "grad_norm": 0.34733538115575985, "learning_rate": 2.0794851182799895e-06, "loss": 0.0109861820936203, "step": 98330 }, { "epoch": 0.9255058823529412, "grad_norm": 0.4236638946792865, "learning_rate": 2.079432249700635e-06, "loss": 0.013425815105438232, "step": 98335 }, { "epoch": 0.9255529411764706, "grad_norm": 0.3106334798944023, "learning_rate": 2.0793793851534485e-06, "loss": 0.00911601483821869, "step": 98340 }, { "epoch": 0.9256, "grad_norm": 0.44048571813949494, "learning_rate": 2.0793265246379177e-06, "loss": 0.0126799076795578, "step": 98345 }, { "epoch": 0.9256470588235294, "grad_norm": 0.5878409320266161, "learning_rate": 2.07927366815353e-06, "loss": 0.015306901931762696, "step": 98350 }, { "epoch": 0.9256941176470588, "grad_norm": 0.3883164656945352, "learning_rate": 2.0792208156997735e-06, "loss": 0.015393739938735962, "step": 98355 }, { "epoch": 0.9257411764705883, "grad_norm": 0.9214091152996873, "learning_rate": 2.079167967276135e-06, "loss": 0.028321665525436402, "step": 98360 }, { "epoch": 0.9257882352941177, "grad_norm": 0.9320064497587139, "learning_rate": 2.0791151228821036e-06, "loss": 0.015570011734962464, "step": 98365 }, { "epoch": 0.9258352941176471, "grad_norm": 0.20246367627269135, "learning_rate": 2.0790622825171665e-06, "loss": 0.011092756688594819, "step": 98370 }, { "epoch": 0.9258823529411765, "grad_norm": 0.5468606211708048, "learning_rate": 2.0790094461808116e-06, "loss": 0.015889006853103637, "step": 98375 }, { "epoch": 0.9259294117647059, "grad_norm": 0.46801200814866395, "learning_rate": 2.0789566138725278e-06, "loss": 0.009791909903287887, "step": 98380 }, { "epoch": 0.9259764705882353, "grad_norm": 0.34751117079281924, "learning_rate": 2.0789037855918026e-06, "loss": 0.011923062801361083, "step": 98385 }, { "epoch": 0.9260235294117647, "grad_norm": 0.5310321315975746, "learning_rate": 2.0788509613381245e-06, "loss": 0.010826467722654342, "step": 98390 }, { "epoch": 0.9260705882352941, "grad_norm": 0.5446460604783476, "learning_rate": 2.078798141110982e-06, "loss": 0.010262763500213623, "step": 98395 }, { "epoch": 0.9261176470588235, "grad_norm": 0.4237872890569532, "learning_rate": 2.078745324909863e-06, "loss": 0.011957971751689911, "step": 98400 }, { "epoch": 0.9261647058823529, "grad_norm": 0.38242514332012856, "learning_rate": 2.0786925127342573e-06, "loss": 0.01384783536195755, "step": 98405 }, { "epoch": 0.9262117647058824, "grad_norm": 0.49234855556415946, "learning_rate": 2.0786397045836526e-06, "loss": 0.011601833999156952, "step": 98410 }, { "epoch": 0.9262588235294118, "grad_norm": 0.40176261434341337, "learning_rate": 2.0785869004575378e-06, "loss": 0.014971768856048584, "step": 98415 }, { "epoch": 0.9263058823529412, "grad_norm": 0.3727394924592054, "learning_rate": 2.078534100355402e-06, "loss": 0.013645265996456147, "step": 98420 }, { "epoch": 0.9263529411764706, "grad_norm": 1.3781773075942672, "learning_rate": 2.0784813042767333e-06, "loss": 0.026875048875808716, "step": 98425 }, { "epoch": 0.9264, "grad_norm": 0.39443612025270025, "learning_rate": 2.078428512221023e-06, "loss": 0.014335820078849792, "step": 98430 }, { "epoch": 0.9264470588235294, "grad_norm": 0.4026489503053758, "learning_rate": 2.078375724187757e-06, "loss": 0.01218501552939415, "step": 98435 }, { "epoch": 0.9264941176470588, "grad_norm": 0.5267841896483628, "learning_rate": 2.078322940176427e-06, "loss": 0.01360023021697998, "step": 98440 }, { "epoch": 0.9265411764705882, "grad_norm": 0.48377223436072747, "learning_rate": 2.078270160186522e-06, "loss": 0.010580813884735108, "step": 98445 }, { "epoch": 0.9265882352941176, "grad_norm": 0.6918407784571885, "learning_rate": 2.07821738421753e-06, "loss": 0.014686746895313263, "step": 98450 }, { "epoch": 0.9266352941176471, "grad_norm": 0.4479027632475666, "learning_rate": 2.0781646122689417e-06, "loss": 0.01605830192565918, "step": 98455 }, { "epoch": 0.9266823529411765, "grad_norm": 0.4376664306287763, "learning_rate": 2.078111844340246e-06, "loss": 0.014856183528900146, "step": 98460 }, { "epoch": 0.9267294117647059, "grad_norm": 0.45599345931527546, "learning_rate": 2.0780590804309334e-06, "loss": 0.011853557080030441, "step": 98465 }, { "epoch": 0.9267764705882353, "grad_norm": 0.3153537171532888, "learning_rate": 2.078006320540493e-06, "loss": 0.011335211992263793, "step": 98470 }, { "epoch": 0.9268235294117647, "grad_norm": 0.5703423182932894, "learning_rate": 2.0779535646684144e-06, "loss": 0.014832821488380433, "step": 98475 }, { "epoch": 0.9268705882352941, "grad_norm": 0.3158865520817987, "learning_rate": 2.077900812814189e-06, "loss": 0.01167515069246292, "step": 98480 }, { "epoch": 0.9269176470588235, "grad_norm": 0.5742022476685447, "learning_rate": 2.077848064977305e-06, "loss": 0.01521403193473816, "step": 98485 }, { "epoch": 0.9269647058823529, "grad_norm": 0.5235250105560444, "learning_rate": 2.0777953211572535e-06, "loss": 0.015199649333953857, "step": 98490 }, { "epoch": 0.9270117647058823, "grad_norm": 0.2659132709719888, "learning_rate": 2.0777425813535247e-06, "loss": 0.01336071640253067, "step": 98495 }, { "epoch": 0.9270588235294117, "grad_norm": 0.4054695936526247, "learning_rate": 2.0776898455656083e-06, "loss": 0.01204545795917511, "step": 98500 }, { "epoch": 0.9271058823529412, "grad_norm": 0.4368115865225457, "learning_rate": 2.0776371137929953e-06, "loss": 0.012103714048862457, "step": 98505 }, { "epoch": 0.9271529411764706, "grad_norm": 0.4187308856340138, "learning_rate": 2.077584386035176e-06, "loss": 0.011059370636940003, "step": 98510 }, { "epoch": 0.9272, "grad_norm": 0.5394809494526756, "learning_rate": 2.077531662291641e-06, "loss": 0.013161224126815797, "step": 98515 }, { "epoch": 0.9272470588235294, "grad_norm": 0.4552710234833232, "learning_rate": 2.077478942561881e-06, "loss": 0.017870792746543886, "step": 98520 }, { "epoch": 0.9272941176470588, "grad_norm": 0.396124096973287, "learning_rate": 2.077426226845387e-06, "loss": 0.010490202903747558, "step": 98525 }, { "epoch": 0.9273411764705882, "grad_norm": 0.4984307617721281, "learning_rate": 2.077373515141649e-06, "loss": 0.012407322973012924, "step": 98530 }, { "epoch": 0.9273882352941176, "grad_norm": 0.36189092522328203, "learning_rate": 2.0773208074501584e-06, "loss": 0.016275352239608763, "step": 98535 }, { "epoch": 0.927435294117647, "grad_norm": 0.4997884150993254, "learning_rate": 2.077268103770407e-06, "loss": 0.0135717511177063, "step": 98540 }, { "epoch": 0.9274823529411764, "grad_norm": 0.27045785137994893, "learning_rate": 2.0772154041018847e-06, "loss": 0.011484236270189286, "step": 98545 }, { "epoch": 0.9275294117647059, "grad_norm": 0.3835524672479061, "learning_rate": 2.0771627084440833e-06, "loss": 0.011223900318145751, "step": 98550 }, { "epoch": 0.9275764705882353, "grad_norm": 0.5377269889352967, "learning_rate": 2.0771100167964938e-06, "loss": 0.013767844438552857, "step": 98555 }, { "epoch": 0.9276235294117647, "grad_norm": 0.30462976181763834, "learning_rate": 2.0770573291586087e-06, "loss": 0.014612734317779541, "step": 98560 }, { "epoch": 0.9276705882352941, "grad_norm": 0.5311366989334707, "learning_rate": 2.0770046455299178e-06, "loss": 0.013782024383544922, "step": 98565 }, { "epoch": 0.9277176470588235, "grad_norm": 0.5602410617291771, "learning_rate": 2.0769519659099136e-06, "loss": 0.01629677563905716, "step": 98570 }, { "epoch": 0.9277647058823529, "grad_norm": 0.6506192471637144, "learning_rate": 2.076899290298088e-06, "loss": 0.014578330516815185, "step": 98575 }, { "epoch": 0.9278117647058823, "grad_norm": 0.583721523460466, "learning_rate": 2.0768466186939323e-06, "loss": 0.01585538685321808, "step": 98580 }, { "epoch": 0.9278588235294117, "grad_norm": 0.47231072873672936, "learning_rate": 2.076793951096938e-06, "loss": 0.01643549054861069, "step": 98585 }, { "epoch": 0.9279058823529411, "grad_norm": 0.4621174342349721, "learning_rate": 2.0767412875065976e-06, "loss": 0.014481045305728912, "step": 98590 }, { "epoch": 0.9279529411764705, "grad_norm": 0.7458425203518235, "learning_rate": 2.0766886279224027e-06, "loss": 0.014560931921005249, "step": 98595 }, { "epoch": 0.928, "grad_norm": 0.4996415629317399, "learning_rate": 2.076635972343846e-06, "loss": 0.01294633001089096, "step": 98600 }, { "epoch": 0.9280470588235294, "grad_norm": 0.6168565548012082, "learning_rate": 2.0765833207704195e-06, "loss": 0.016473987698554994, "step": 98605 }, { "epoch": 0.9280941176470588, "grad_norm": 0.5701703324327497, "learning_rate": 2.076530673201615e-06, "loss": 0.01396155208349228, "step": 98610 }, { "epoch": 0.9281411764705882, "grad_norm": 0.6112382778048167, "learning_rate": 2.0764780296369258e-06, "loss": 0.01805344969034195, "step": 98615 }, { "epoch": 0.9281882352941176, "grad_norm": 0.482383841166754, "learning_rate": 2.0764253900758435e-06, "loss": 0.01345861703157425, "step": 98620 }, { "epoch": 0.928235294117647, "grad_norm": 0.35511797849944815, "learning_rate": 2.0763727545178605e-06, "loss": 0.01698562055826187, "step": 98625 }, { "epoch": 0.9282823529411764, "grad_norm": 0.5847005086899719, "learning_rate": 2.0763201229624707e-06, "loss": 0.01611415147781372, "step": 98630 }, { "epoch": 0.9283294117647058, "grad_norm": 0.33819295755962253, "learning_rate": 2.076267495409166e-06, "loss": 0.014473974704742432, "step": 98635 }, { "epoch": 0.9283764705882352, "grad_norm": 0.5922735944505207, "learning_rate": 2.0762148718574387e-06, "loss": 0.016119134426116944, "step": 98640 }, { "epoch": 0.9284235294117648, "grad_norm": 0.37495465662332855, "learning_rate": 2.0761622523067826e-06, "loss": 0.013866133987903595, "step": 98645 }, { "epoch": 0.9284705882352942, "grad_norm": 0.4437536108144337, "learning_rate": 2.0761096367566907e-06, "loss": 0.012401202321052551, "step": 98650 }, { "epoch": 0.9285176470588236, "grad_norm": 0.3030569213882608, "learning_rate": 2.076057025206656e-06, "loss": 0.012707290053367615, "step": 98655 }, { "epoch": 0.928564705882353, "grad_norm": 0.43652519107070625, "learning_rate": 2.076004417656171e-06, "loss": 0.01621520221233368, "step": 98660 }, { "epoch": 0.9286117647058824, "grad_norm": 0.3716281272041624, "learning_rate": 2.0759518141047293e-06, "loss": 0.013173706829547882, "step": 98665 }, { "epoch": 0.9286588235294118, "grad_norm": 0.5900377568947399, "learning_rate": 2.075899214551825e-06, "loss": 0.013540223240852356, "step": 98670 }, { "epoch": 0.9287058823529412, "grad_norm": 0.4640202450500069, "learning_rate": 2.0758466189969506e-06, "loss": 0.013857373595237732, "step": 98675 }, { "epoch": 0.9287529411764706, "grad_norm": 0.47129269515349864, "learning_rate": 2.0757940274396e-06, "loss": 0.0164849191904068, "step": 98680 }, { "epoch": 0.9288, "grad_norm": 0.46959364129985615, "learning_rate": 2.075741439879267e-06, "loss": 0.013273325562477113, "step": 98685 }, { "epoch": 0.9288470588235294, "grad_norm": 0.33176625751364824, "learning_rate": 2.0756888563154458e-06, "loss": 0.01172095388174057, "step": 98690 }, { "epoch": 0.9288941176470589, "grad_norm": 0.4129711433418708, "learning_rate": 2.0756362767476298e-06, "loss": 0.018537414073944092, "step": 98695 }, { "epoch": 0.9289411764705883, "grad_norm": 0.5005154813512402, "learning_rate": 2.075583701175312e-06, "loss": 0.01530953198671341, "step": 98700 }, { "epoch": 0.9289882352941177, "grad_norm": 0.26448690414476234, "learning_rate": 2.0755311295979875e-06, "loss": 0.009992454946041108, "step": 98705 }, { "epoch": 0.9290352941176471, "grad_norm": 0.7161897630805376, "learning_rate": 2.07547856201515e-06, "loss": 0.01320057213306427, "step": 98710 }, { "epoch": 0.9290823529411765, "grad_norm": 0.26419976480106316, "learning_rate": 2.075425998426293e-06, "loss": 0.012595990300178527, "step": 98715 }, { "epoch": 0.9291294117647059, "grad_norm": 0.5008189027595306, "learning_rate": 2.075373438830913e-06, "loss": 0.00983426719903946, "step": 98720 }, { "epoch": 0.9291764705882353, "grad_norm": 0.5419695270074417, "learning_rate": 2.075320883228502e-06, "loss": 0.011735576391220092, "step": 98725 }, { "epoch": 0.9292235294117647, "grad_norm": 0.5373611375305841, "learning_rate": 2.0752683316185554e-06, "loss": 0.024235476553440095, "step": 98730 }, { "epoch": 0.9292705882352941, "grad_norm": 0.6010396481879229, "learning_rate": 2.0752157840005676e-06, "loss": 0.013582795858383179, "step": 98735 }, { "epoch": 0.9293176470588236, "grad_norm": 0.31452698986961886, "learning_rate": 2.0751632403740333e-06, "loss": 0.015722763538360596, "step": 98740 }, { "epoch": 0.929364705882353, "grad_norm": 0.303378455244159, "learning_rate": 2.0751107007384472e-06, "loss": 0.011860862374305725, "step": 98745 }, { "epoch": 0.9294117647058824, "grad_norm": 0.4126229126877122, "learning_rate": 2.0750581650933045e-06, "loss": 0.013531073927879333, "step": 98750 }, { "epoch": 0.9294588235294118, "grad_norm": 0.37716397503367255, "learning_rate": 2.0750056334380995e-06, "loss": 0.02145906388759613, "step": 98755 }, { "epoch": 0.9295058823529412, "grad_norm": 0.6297834171374431, "learning_rate": 2.0749531057723264e-06, "loss": 0.017688822746276856, "step": 98760 }, { "epoch": 0.9295529411764706, "grad_norm": 0.35910836686147746, "learning_rate": 2.0749005820954827e-06, "loss": 0.01476585865020752, "step": 98765 }, { "epoch": 0.9296, "grad_norm": 0.9031775206505757, "learning_rate": 2.0748480624070613e-06, "loss": 0.012358838319778442, "step": 98770 }, { "epoch": 0.9296470588235294, "grad_norm": 0.31709130398263285, "learning_rate": 2.074795546706558e-06, "loss": 0.011590241640806197, "step": 98775 }, { "epoch": 0.9296941176470588, "grad_norm": 0.4370080062950214, "learning_rate": 2.0747430349934687e-06, "loss": 0.012819565832614899, "step": 98780 }, { "epoch": 0.9297411764705882, "grad_norm": 0.45269770180158225, "learning_rate": 2.0746905272672887e-06, "loss": 0.01129501461982727, "step": 98785 }, { "epoch": 0.9297882352941177, "grad_norm": 0.5386461685762121, "learning_rate": 2.0746380235275135e-06, "loss": 0.012908291816711426, "step": 98790 }, { "epoch": 0.9298352941176471, "grad_norm": 0.3400162684535851, "learning_rate": 2.074585523773638e-06, "loss": 0.013681334257125855, "step": 98795 }, { "epoch": 0.9298823529411765, "grad_norm": 0.6278047156586752, "learning_rate": 2.074533028005159e-06, "loss": 0.01130821779370308, "step": 98800 }, { "epoch": 0.9299294117647059, "grad_norm": 0.57437996011499, "learning_rate": 2.074480536221572e-06, "loss": 0.017729157209396364, "step": 98805 }, { "epoch": 0.9299764705882353, "grad_norm": 0.45919418942123635, "learning_rate": 2.074428048422372e-06, "loss": 0.013654384016990661, "step": 98810 }, { "epoch": 0.9300235294117647, "grad_norm": 0.5909863320980354, "learning_rate": 2.0743755646070553e-06, "loss": 0.01435701847076416, "step": 98815 }, { "epoch": 0.9300705882352941, "grad_norm": 0.8615488247118557, "learning_rate": 2.074323084775119e-06, "loss": 0.014556345343589783, "step": 98820 }, { "epoch": 0.9301176470588235, "grad_norm": 0.3384574720362889, "learning_rate": 2.0742706089260585e-06, "loss": 0.014815789461135865, "step": 98825 }, { "epoch": 0.9301647058823529, "grad_norm": 0.5494308236370888, "learning_rate": 2.07421813705937e-06, "loss": 0.011133091896772385, "step": 98830 }, { "epoch": 0.9302117647058824, "grad_norm": 0.6388281502299684, "learning_rate": 2.0741656691745496e-06, "loss": 0.01320502907037735, "step": 98835 }, { "epoch": 0.9302588235294118, "grad_norm": 0.3490270309550028, "learning_rate": 2.074113205271094e-06, "loss": 0.011911627650260926, "step": 98840 }, { "epoch": 0.9303058823529412, "grad_norm": 0.5335674450363455, "learning_rate": 2.0740607453484994e-06, "loss": 0.012756437063217163, "step": 98845 }, { "epoch": 0.9303529411764706, "grad_norm": 0.5958627730001985, "learning_rate": 2.074008289406263e-06, "loss": 0.013610172271728515, "step": 98850 }, { "epoch": 0.9304, "grad_norm": 0.3617576227717244, "learning_rate": 2.073955837443881e-06, "loss": 0.011181684583425522, "step": 98855 }, { "epoch": 0.9304470588235294, "grad_norm": 0.43175082284870686, "learning_rate": 2.073903389460851e-06, "loss": 0.014299315214157105, "step": 98860 }, { "epoch": 0.9304941176470588, "grad_norm": 0.4503960209121806, "learning_rate": 2.073850945456668e-06, "loss": 0.01451382040977478, "step": 98865 }, { "epoch": 0.9305411764705882, "grad_norm": 0.33375654663002485, "learning_rate": 2.0737985054308314e-06, "loss": 0.01368912160396576, "step": 98870 }, { "epoch": 0.9305882352941176, "grad_norm": 0.4951053996590886, "learning_rate": 2.073746069382836e-06, "loss": 0.01109958067536354, "step": 98875 }, { "epoch": 0.930635294117647, "grad_norm": 0.39817109387540833, "learning_rate": 2.0736936373121806e-06, "loss": 0.011176854372024536, "step": 98880 }, { "epoch": 0.9306823529411765, "grad_norm": 0.3988791909142301, "learning_rate": 2.0736412092183607e-06, "loss": 0.015904071927070617, "step": 98885 }, { "epoch": 0.9307294117647059, "grad_norm": 0.40180766856103806, "learning_rate": 2.0735887851008755e-06, "loss": 0.015324559807777405, "step": 98890 }, { "epoch": 0.9307764705882353, "grad_norm": 0.5570282923917891, "learning_rate": 2.0735363649592215e-06, "loss": 0.015615162253379822, "step": 98895 }, { "epoch": 0.9308235294117647, "grad_norm": 0.4389391344091287, "learning_rate": 2.0734839487928956e-06, "loss": 0.012849250435829162, "step": 98900 }, { "epoch": 0.9308705882352941, "grad_norm": 0.37477315264527894, "learning_rate": 2.0734315366013967e-06, "loss": 0.012811152637004853, "step": 98905 }, { "epoch": 0.9309176470588235, "grad_norm": 0.6446428897594844, "learning_rate": 2.0733791283842205e-06, "loss": 0.017142146825790405, "step": 98910 }, { "epoch": 0.9309647058823529, "grad_norm": 0.46201280775644177, "learning_rate": 2.0733267241408667e-06, "loss": 0.012796764075756074, "step": 98915 }, { "epoch": 0.9310117647058823, "grad_norm": 0.3440726062624066, "learning_rate": 2.0732743238708327e-06, "loss": 0.017955401539802553, "step": 98920 }, { "epoch": 0.9310588235294117, "grad_norm": 0.4048425297669764, "learning_rate": 2.073221927573616e-06, "loss": 0.012577550113201141, "step": 98925 }, { "epoch": 0.9311058823529412, "grad_norm": 0.34101775260486267, "learning_rate": 2.0731695352487143e-06, "loss": 0.013978780806064605, "step": 98930 }, { "epoch": 0.9311529411764706, "grad_norm": 1.0869353212845538, "learning_rate": 2.0731171468956266e-06, "loss": 0.015378162264823914, "step": 98935 }, { "epoch": 0.9312, "grad_norm": 0.42223347233000336, "learning_rate": 2.073064762513851e-06, "loss": 0.012370900064706803, "step": 98940 }, { "epoch": 0.9312470588235294, "grad_norm": 0.6573535009416157, "learning_rate": 2.0730123821028846e-06, "loss": 0.01335316151380539, "step": 98945 }, { "epoch": 0.9312941176470588, "grad_norm": 0.4070665021260874, "learning_rate": 2.072960005662227e-06, "loss": 0.013628199696540833, "step": 98950 }, { "epoch": 0.9313411764705882, "grad_norm": 0.4976237732116015, "learning_rate": 2.0729076331913762e-06, "loss": 0.013398033380508424, "step": 98955 }, { "epoch": 0.9313882352941176, "grad_norm": 0.32755400024130255, "learning_rate": 2.072855264689831e-06, "loss": 0.013768315315246582, "step": 98960 }, { "epoch": 0.931435294117647, "grad_norm": 0.38621620598687634, "learning_rate": 2.0728029001570893e-06, "loss": 0.013926593959331513, "step": 98965 }, { "epoch": 0.9314823529411764, "grad_norm": 0.5458884553393576, "learning_rate": 2.0727505395926504e-06, "loss": 0.01818595677614212, "step": 98970 }, { "epoch": 0.9315294117647058, "grad_norm": 0.5338768517088253, "learning_rate": 2.072698182996014e-06, "loss": 0.013655784726142883, "step": 98975 }, { "epoch": 0.9315764705882353, "grad_norm": 0.5323537354223762, "learning_rate": 2.072645830366677e-06, "loss": 0.015954649448394774, "step": 98980 }, { "epoch": 0.9316235294117647, "grad_norm": 0.4210049563265057, "learning_rate": 2.07259348170414e-06, "loss": 0.016387832164764405, "step": 98985 }, { "epoch": 0.9316705882352941, "grad_norm": 0.5552816814237419, "learning_rate": 2.0725411370079016e-06, "loss": 0.01372791975736618, "step": 98990 }, { "epoch": 0.9317176470588235, "grad_norm": 0.4588423966800424, "learning_rate": 2.0724887962774605e-06, "loss": 0.016607119143009184, "step": 98995 }, { "epoch": 0.9317647058823529, "grad_norm": 0.5224562591500824, "learning_rate": 2.072436459512317e-06, "loss": 0.016756702959537507, "step": 99000 }, { "epoch": 0.9318117647058823, "grad_norm": 0.4051591463219449, "learning_rate": 2.0723841267119694e-06, "loss": 0.01337563693523407, "step": 99005 }, { "epoch": 0.9318588235294117, "grad_norm": 0.49734232524532096, "learning_rate": 2.0723317978759177e-06, "loss": 0.012345277518033982, "step": 99010 }, { "epoch": 0.9319058823529411, "grad_norm": 0.46488192251950733, "learning_rate": 2.072279473003661e-06, "loss": 0.013878078758716583, "step": 99015 }, { "epoch": 0.9319529411764705, "grad_norm": 0.4877995188520714, "learning_rate": 2.0722271520946996e-06, "loss": 0.009452659636735916, "step": 99020 }, { "epoch": 0.932, "grad_norm": 0.3804365594988651, "learning_rate": 2.0721748351485324e-06, "loss": 0.020898404717445373, "step": 99025 }, { "epoch": 0.9320470588235295, "grad_norm": 0.376443313841147, "learning_rate": 2.0721225221646596e-06, "loss": 0.012786437571048737, "step": 99030 }, { "epoch": 0.9320941176470589, "grad_norm": 0.45465751085709555, "learning_rate": 2.0720702131425814e-06, "loss": 0.014926710724830627, "step": 99035 }, { "epoch": 0.9321411764705883, "grad_norm": 0.42455648338168933, "learning_rate": 2.072017908081797e-06, "loss": 0.015907852351665495, "step": 99040 }, { "epoch": 0.9321882352941177, "grad_norm": 0.6478061979985664, "learning_rate": 2.071965606981807e-06, "loss": 0.013761325180530548, "step": 99045 }, { "epoch": 0.932235294117647, "grad_norm": 0.35852991461162825, "learning_rate": 2.071913309842112e-06, "loss": 0.014557281136512756, "step": 99050 }, { "epoch": 0.9322823529411765, "grad_norm": 0.3558293474794821, "learning_rate": 2.071861016662211e-06, "loss": 0.014461007714271546, "step": 99055 }, { "epoch": 0.9323294117647059, "grad_norm": 0.5710674148150322, "learning_rate": 2.0718087274416047e-06, "loss": 0.015339839458465575, "step": 99060 }, { "epoch": 0.9323764705882353, "grad_norm": 0.4613038545705881, "learning_rate": 2.0717564421797937e-06, "loss": 0.011666586995124817, "step": 99065 }, { "epoch": 0.9324235294117647, "grad_norm": 0.6197078953435102, "learning_rate": 2.0717041608762796e-06, "loss": 0.014698953926563263, "step": 99070 }, { "epoch": 0.9324705882352942, "grad_norm": 0.606311743487246, "learning_rate": 2.071651883530561e-06, "loss": 0.017959494888782502, "step": 99075 }, { "epoch": 0.9325176470588236, "grad_norm": 0.3442202466103523, "learning_rate": 2.07159961014214e-06, "loss": 0.012086980044841766, "step": 99080 }, { "epoch": 0.932564705882353, "grad_norm": 0.5149497830443281, "learning_rate": 2.0715473407105164e-06, "loss": 0.017682862281799317, "step": 99085 }, { "epoch": 0.9326117647058824, "grad_norm": 0.5488165964193338, "learning_rate": 2.071495075235192e-06, "loss": 0.01581057012081146, "step": 99090 }, { "epoch": 0.9326588235294118, "grad_norm": 0.43049745495050584, "learning_rate": 2.0714428137156666e-06, "loss": 0.016861866414546966, "step": 99095 }, { "epoch": 0.9327058823529412, "grad_norm": 0.40809117632628183, "learning_rate": 2.071390556151442e-06, "loss": 0.01465810090303421, "step": 99100 }, { "epoch": 0.9327529411764706, "grad_norm": 0.6375139581546775, "learning_rate": 2.07133830254202e-06, "loss": 0.018334375321865083, "step": 99105 }, { "epoch": 0.9328, "grad_norm": 0.48403777069814474, "learning_rate": 2.0712860528869004e-06, "loss": 0.018495872616767883, "step": 99110 }, { "epoch": 0.9328470588235294, "grad_norm": 0.4638029119737957, "learning_rate": 2.0712338071855846e-06, "loss": 0.018444010615348817, "step": 99115 }, { "epoch": 0.9328941176470589, "grad_norm": 0.2579505602528559, "learning_rate": 2.0711815654375752e-06, "loss": 0.01543576419353485, "step": 99120 }, { "epoch": 0.9329411764705883, "grad_norm": 0.45482385173433126, "learning_rate": 2.0711293276423726e-06, "loss": 0.011557598412036896, "step": 99125 }, { "epoch": 0.9329882352941177, "grad_norm": 0.4491442603803337, "learning_rate": 2.071077093799479e-06, "loss": 0.01187804937362671, "step": 99130 }, { "epoch": 0.9330352941176471, "grad_norm": 0.676563343593407, "learning_rate": 2.0710248639083952e-06, "loss": 0.01618521511554718, "step": 99135 }, { "epoch": 0.9330823529411765, "grad_norm": 0.8801318225871751, "learning_rate": 2.0709726379686236e-06, "loss": 0.0195915624499321, "step": 99140 }, { "epoch": 0.9331294117647059, "grad_norm": 0.4847278590592096, "learning_rate": 2.070920415979666e-06, "loss": 0.00989486277103424, "step": 99145 }, { "epoch": 0.9331764705882353, "grad_norm": 0.4167482892137069, "learning_rate": 2.0708681979410236e-06, "loss": 0.011682075262069703, "step": 99150 }, { "epoch": 0.9332235294117647, "grad_norm": 0.296389857522073, "learning_rate": 2.070815983852199e-06, "loss": 0.009229688346385956, "step": 99155 }, { "epoch": 0.9332705882352941, "grad_norm": 0.3965085748073266, "learning_rate": 2.0707637737126947e-06, "loss": 0.012949210405349732, "step": 99160 }, { "epoch": 0.9333176470588235, "grad_norm": 0.2845920022828711, "learning_rate": 2.0707115675220125e-06, "loss": 0.011621017754077912, "step": 99165 }, { "epoch": 0.933364705882353, "grad_norm": 0.6771329722124111, "learning_rate": 2.070659365279654e-06, "loss": 0.01506875455379486, "step": 99170 }, { "epoch": 0.9334117647058824, "grad_norm": 0.45224480450414484, "learning_rate": 2.0706071669851223e-06, "loss": 0.013779513537883759, "step": 99175 }, { "epoch": 0.9334588235294118, "grad_norm": 0.4895145800726838, "learning_rate": 2.0705549726379194e-06, "loss": 0.013114552199840545, "step": 99180 }, { "epoch": 0.9335058823529412, "grad_norm": 0.42809329816637814, "learning_rate": 2.0705027822375483e-06, "loss": 0.016344574093818665, "step": 99185 }, { "epoch": 0.9335529411764706, "grad_norm": 0.3581680139817934, "learning_rate": 2.070450595783511e-06, "loss": 0.008469604700803757, "step": 99190 }, { "epoch": 0.9336, "grad_norm": 0.4720778182829424, "learning_rate": 2.070398413275311e-06, "loss": 0.014354445040225983, "step": 99195 }, { "epoch": 0.9336470588235294, "grad_norm": 0.5826148234762579, "learning_rate": 2.0703462347124505e-06, "loss": 0.015572229027748108, "step": 99200 }, { "epoch": 0.9336941176470588, "grad_norm": 0.4424103036700641, "learning_rate": 2.0702940600944323e-06, "loss": 0.012858551740646363, "step": 99205 }, { "epoch": 0.9337411764705882, "grad_norm": 0.5611825728873191, "learning_rate": 2.0702418894207597e-06, "loss": 0.011124129593372344, "step": 99210 }, { "epoch": 0.9337882352941177, "grad_norm": 0.48343827532084166, "learning_rate": 2.0701897226909352e-06, "loss": 0.01518273502588272, "step": 99215 }, { "epoch": 0.9338352941176471, "grad_norm": 0.5440745674180604, "learning_rate": 2.0701375599044625e-06, "loss": 0.013449302315711975, "step": 99220 }, { "epoch": 0.9338823529411765, "grad_norm": 0.6934778520394531, "learning_rate": 2.0700854010608447e-06, "loss": 0.015573534369468688, "step": 99225 }, { "epoch": 0.9339294117647059, "grad_norm": 0.55075413095186, "learning_rate": 2.070033246159585e-06, "loss": 0.015217891335487366, "step": 99230 }, { "epoch": 0.9339764705882353, "grad_norm": 0.506152063864266, "learning_rate": 2.069981095200187e-06, "loss": 0.01553880274295807, "step": 99235 }, { "epoch": 0.9340235294117647, "grad_norm": 0.5716140072677907, "learning_rate": 2.0699289481821534e-06, "loss": 0.0202387273311615, "step": 99240 }, { "epoch": 0.9340705882352941, "grad_norm": 0.3279705417724929, "learning_rate": 2.0698768051049888e-06, "loss": 0.014179936051368714, "step": 99245 }, { "epoch": 0.9341176470588235, "grad_norm": 0.6177573988386456, "learning_rate": 2.069824665968197e-06, "loss": 0.014868168532848359, "step": 99250 }, { "epoch": 0.9341647058823529, "grad_norm": 0.6259283249835984, "learning_rate": 2.0697725307712804e-06, "loss": 0.01630837321281433, "step": 99255 }, { "epoch": 0.9342117647058824, "grad_norm": 0.4948077301578154, "learning_rate": 2.0697203995137435e-06, "loss": 0.01726315766572952, "step": 99260 }, { "epoch": 0.9342588235294118, "grad_norm": 0.4975691567732359, "learning_rate": 2.069668272195091e-06, "loss": 0.011978055536746978, "step": 99265 }, { "epoch": 0.9343058823529412, "grad_norm": 0.4424063861294189, "learning_rate": 2.069616148814826e-06, "loss": 0.015573178231716157, "step": 99270 }, { "epoch": 0.9343529411764706, "grad_norm": 0.3489033515032968, "learning_rate": 2.0695640293724526e-06, "loss": 0.010242128372192382, "step": 99275 }, { "epoch": 0.9344, "grad_norm": 0.5846807136486207, "learning_rate": 2.0695119138674754e-06, "loss": 0.017710594832897185, "step": 99280 }, { "epoch": 0.9344470588235294, "grad_norm": 0.41959385178925995, "learning_rate": 2.0694598022993985e-06, "loss": 0.016945120692253113, "step": 99285 }, { "epoch": 0.9344941176470588, "grad_norm": 0.5380977071829324, "learning_rate": 2.069407694667726e-06, "loss": 0.012128426134586335, "step": 99290 }, { "epoch": 0.9345411764705882, "grad_norm": 0.46413901948620906, "learning_rate": 2.069355590971963e-06, "loss": 0.011525078862905502, "step": 99295 }, { "epoch": 0.9345882352941176, "grad_norm": 0.42240134431061094, "learning_rate": 2.0693034912116137e-06, "loss": 0.017638598382472993, "step": 99300 }, { "epoch": 0.934635294117647, "grad_norm": 0.7201809831459214, "learning_rate": 2.069251395386182e-06, "loss": 0.013486464321613312, "step": 99305 }, { "epoch": 0.9346823529411765, "grad_norm": 0.40527141780796566, "learning_rate": 2.069199303495174e-06, "loss": 0.010829457640647888, "step": 99310 }, { "epoch": 0.9347294117647059, "grad_norm": 0.5456631480541408, "learning_rate": 2.069147215538093e-06, "loss": 0.0136527419090271, "step": 99315 }, { "epoch": 0.9347764705882353, "grad_norm": 0.5893633955968065, "learning_rate": 2.0690951315144455e-06, "loss": 0.0142237588763237, "step": 99320 }, { "epoch": 0.9348235294117647, "grad_norm": 0.4999316214566969, "learning_rate": 2.0690430514237346e-06, "loss": 0.013879889249801635, "step": 99325 }, { "epoch": 0.9348705882352941, "grad_norm": 0.2815714888784482, "learning_rate": 2.0689909752654673e-06, "loss": 0.01098945438861847, "step": 99330 }, { "epoch": 0.9349176470588235, "grad_norm": 0.4041905042874175, "learning_rate": 2.0689389030391473e-06, "loss": 0.01160954385995865, "step": 99335 }, { "epoch": 0.9349647058823529, "grad_norm": 0.3276491802537772, "learning_rate": 2.0688868347442803e-06, "loss": 0.013502898812294006, "step": 99340 }, { "epoch": 0.9350117647058823, "grad_norm": 0.5191000908310834, "learning_rate": 2.0688347703803717e-06, "loss": 0.015138086676597596, "step": 99345 }, { "epoch": 0.9350588235294117, "grad_norm": 0.42967322104908545, "learning_rate": 2.0687827099469268e-06, "loss": 0.014759893715381622, "step": 99350 }, { "epoch": 0.9351058823529412, "grad_norm": 0.323797090032788, "learning_rate": 2.0687306534434513e-06, "loss": 0.01362968385219574, "step": 99355 }, { "epoch": 0.9351529411764706, "grad_norm": 0.49056121519809454, "learning_rate": 2.06867860086945e-06, "loss": 0.013750147819519044, "step": 99360 }, { "epoch": 0.9352, "grad_norm": 0.5479423873595509, "learning_rate": 2.06862655222443e-06, "loss": 0.013491721451282501, "step": 99365 }, { "epoch": 0.9352470588235294, "grad_norm": 0.43404186770770575, "learning_rate": 2.0685745075078957e-06, "loss": 0.013233515620231628, "step": 99370 }, { "epoch": 0.9352941176470588, "grad_norm": 0.5123915615192267, "learning_rate": 2.068522466719354e-06, "loss": 0.012340395152568817, "step": 99375 }, { "epoch": 0.9353411764705882, "grad_norm": 0.41800458461602136, "learning_rate": 2.06847042985831e-06, "loss": 0.014177951216697692, "step": 99380 }, { "epoch": 0.9353882352941176, "grad_norm": 0.8268321895348307, "learning_rate": 2.06841839692427e-06, "loss": 0.030823352932929992, "step": 99385 }, { "epoch": 0.935435294117647, "grad_norm": 0.43910713732814866, "learning_rate": 2.06836636791674e-06, "loss": 0.008568821102380752, "step": 99390 }, { "epoch": 0.9354823529411764, "grad_norm": 0.32234239729029984, "learning_rate": 2.0683143428352265e-06, "loss": 0.010163266211748123, "step": 99395 }, { "epoch": 0.9355294117647058, "grad_norm": 0.4238053008801436, "learning_rate": 2.0682623216792355e-06, "loss": 0.01427026242017746, "step": 99400 }, { "epoch": 0.9355764705882353, "grad_norm": 0.6874592277680466, "learning_rate": 2.068210304448273e-06, "loss": 0.021227970719337463, "step": 99405 }, { "epoch": 0.9356235294117647, "grad_norm": 0.8048708656522656, "learning_rate": 2.068158291141847e-06, "loss": 0.011600840836763382, "step": 99410 }, { "epoch": 0.9356705882352941, "grad_norm": 0.5401159533683821, "learning_rate": 2.068106281759462e-06, "loss": 0.013159266114234925, "step": 99415 }, { "epoch": 0.9357176470588235, "grad_norm": 0.5145469401175772, "learning_rate": 2.0680542763006254e-06, "loss": 0.016215047240257262, "step": 99420 }, { "epoch": 0.9357647058823529, "grad_norm": 0.4397046924121491, "learning_rate": 2.068002274764845e-06, "loss": 0.017782068252563475, "step": 99425 }, { "epoch": 0.9358117647058823, "grad_norm": 0.3871757306138192, "learning_rate": 2.067950277151626e-06, "loss": 0.01279277503490448, "step": 99430 }, { "epoch": 0.9358588235294117, "grad_norm": 0.3530190946554651, "learning_rate": 2.0678982834604754e-06, "loss": 0.013482636213302613, "step": 99435 }, { "epoch": 0.9359058823529411, "grad_norm": 0.34697245302698365, "learning_rate": 2.067846293690902e-06, "loss": 0.01409035623073578, "step": 99440 }, { "epoch": 0.9359529411764705, "grad_norm": 0.3397857836561434, "learning_rate": 2.0677943078424104e-06, "loss": 0.012080173939466476, "step": 99445 }, { "epoch": 0.936, "grad_norm": 0.4349701397965536, "learning_rate": 2.0677423259145095e-06, "loss": 0.013712286949157715, "step": 99450 }, { "epoch": 0.9360470588235295, "grad_norm": 0.37675466632712856, "learning_rate": 2.0676903479067055e-06, "loss": 0.01557338833808899, "step": 99455 }, { "epoch": 0.9360941176470589, "grad_norm": 0.5260280452819228, "learning_rate": 2.0676383738185067e-06, "loss": 0.011997807025909423, "step": 99460 }, { "epoch": 0.9361411764705883, "grad_norm": 0.5606911794670738, "learning_rate": 2.0675864036494196e-06, "loss": 0.01486906111240387, "step": 99465 }, { "epoch": 0.9361882352941177, "grad_norm": 0.43591373939896866, "learning_rate": 2.067534437398952e-06, "loss": 0.014356982707977296, "step": 99470 }, { "epoch": 0.936235294117647, "grad_norm": 0.7300145014350479, "learning_rate": 2.0674824750666117e-06, "loss": 0.02928009331226349, "step": 99475 }, { "epoch": 0.9362823529411765, "grad_norm": 0.3797532823423024, "learning_rate": 2.067430516651906e-06, "loss": 0.014791223406791686, "step": 99480 }, { "epoch": 0.9363294117647059, "grad_norm": 0.37148637649368443, "learning_rate": 2.067378562154343e-06, "loss": 0.015432077646255492, "step": 99485 }, { "epoch": 0.9363764705882353, "grad_norm": 0.5117025127666065, "learning_rate": 2.0673266115734305e-06, "loss": 0.011576402187347411, "step": 99490 }, { "epoch": 0.9364235294117647, "grad_norm": 0.4117597112987128, "learning_rate": 2.0672746649086754e-06, "loss": 0.013286350667476654, "step": 99495 }, { "epoch": 0.9364705882352942, "grad_norm": 0.4412374530573743, "learning_rate": 2.0672227221595877e-06, "loss": 0.019166935980319978, "step": 99500 }, { "epoch": 0.9365176470588236, "grad_norm": 0.45052664190354175, "learning_rate": 2.067170783325674e-06, "loss": 0.015315735340118408, "step": 99505 }, { "epoch": 0.936564705882353, "grad_norm": 0.3564743149511459, "learning_rate": 2.0671188484064427e-06, "loss": 0.01462225466966629, "step": 99510 }, { "epoch": 0.9366117647058824, "grad_norm": 0.8071755999675002, "learning_rate": 2.067066917401402e-06, "loss": 0.01914406567811966, "step": 99515 }, { "epoch": 0.9366588235294118, "grad_norm": 0.5756624207138689, "learning_rate": 2.067014990310061e-06, "loss": 0.012796780467033387, "step": 99520 }, { "epoch": 0.9367058823529412, "grad_norm": 0.4341543514348546, "learning_rate": 2.0669630671319273e-06, "loss": 0.011923249810934067, "step": 99525 }, { "epoch": 0.9367529411764706, "grad_norm": 0.3979474412322313, "learning_rate": 2.06691114786651e-06, "loss": 0.017217609286308288, "step": 99530 }, { "epoch": 0.9368, "grad_norm": 0.3478448515987936, "learning_rate": 2.0668592325133176e-06, "loss": 0.014791564643383026, "step": 99535 }, { "epoch": 0.9368470588235294, "grad_norm": 0.4579115837088228, "learning_rate": 2.0668073210718585e-06, "loss": 0.012801006436347961, "step": 99540 }, { "epoch": 0.9368941176470589, "grad_norm": 0.6765617188286814, "learning_rate": 2.0667554135416418e-06, "loss": 0.01512967050075531, "step": 99545 }, { "epoch": 0.9369411764705883, "grad_norm": 0.2823975004509837, "learning_rate": 2.0667035099221753e-06, "loss": 0.014305602014064788, "step": 99550 }, { "epoch": 0.9369882352941177, "grad_norm": 0.3649957249252484, "learning_rate": 2.0666516102129704e-06, "loss": 0.012508799135684968, "step": 99555 }, { "epoch": 0.9370352941176471, "grad_norm": 0.3680427591396953, "learning_rate": 2.0665997144135336e-06, "loss": 0.014131945371627808, "step": 99560 }, { "epoch": 0.9370823529411765, "grad_norm": 0.46697038475036995, "learning_rate": 2.066547822523376e-06, "loss": 0.013514596223831176, "step": 99565 }, { "epoch": 0.9371294117647059, "grad_norm": 0.3084667820897113, "learning_rate": 2.066495934542005e-06, "loss": 0.013527612388134002, "step": 99570 }, { "epoch": 0.9371764705882353, "grad_norm": 0.7437654393943117, "learning_rate": 2.0664440504689307e-06, "loss": 0.013818773627281188, "step": 99575 }, { "epoch": 0.9372235294117647, "grad_norm": 0.6299584138533963, "learning_rate": 2.066392170303663e-06, "loss": 0.013152499496936799, "step": 99580 }, { "epoch": 0.9372705882352941, "grad_norm": 0.32259872393065203, "learning_rate": 2.0663402940457113e-06, "loss": 0.013658647239208222, "step": 99585 }, { "epoch": 0.9373176470588235, "grad_norm": 0.5668751316145706, "learning_rate": 2.0662884216945846e-06, "loss": 0.014949953556060791, "step": 99590 }, { "epoch": 0.937364705882353, "grad_norm": 0.5904945948270236, "learning_rate": 2.0662365532497923e-06, "loss": 0.015215471386909485, "step": 99595 }, { "epoch": 0.9374117647058824, "grad_norm": 0.5887009583796967, "learning_rate": 2.0661846887108455e-06, "loss": 0.014494094252586364, "step": 99600 }, { "epoch": 0.9374588235294118, "grad_norm": 0.49921491459191325, "learning_rate": 2.066132828077252e-06, "loss": 0.014330920577049256, "step": 99605 }, { "epoch": 0.9375058823529412, "grad_norm": 0.49414690551382867, "learning_rate": 2.0660809713485237e-06, "loss": 0.011979889124631882, "step": 99610 }, { "epoch": 0.9375529411764706, "grad_norm": 0.2782665746564061, "learning_rate": 2.066029118524169e-06, "loss": 0.012319344282150268, "step": 99615 }, { "epoch": 0.9376, "grad_norm": 0.37885496217883496, "learning_rate": 2.0659772696036997e-06, "loss": 0.01403399109840393, "step": 99620 }, { "epoch": 0.9376470588235294, "grad_norm": 0.5493421286306015, "learning_rate": 2.0659254245866244e-06, "loss": 0.015843677520751952, "step": 99625 }, { "epoch": 0.9376941176470588, "grad_norm": 0.2845158497826822, "learning_rate": 2.065873583472454e-06, "loss": 0.009670335054397582, "step": 99630 }, { "epoch": 0.9377411764705882, "grad_norm": 0.38563378278420024, "learning_rate": 2.065821746260699e-06, "loss": 0.011431580781936646, "step": 99635 }, { "epoch": 0.9377882352941177, "grad_norm": 0.6500785246970983, "learning_rate": 2.0657699129508698e-06, "loss": 0.01559707075357437, "step": 99640 }, { "epoch": 0.9378352941176471, "grad_norm": 0.3378988223011561, "learning_rate": 2.0657180835424764e-06, "loss": 0.014403754472732544, "step": 99645 }, { "epoch": 0.9378823529411765, "grad_norm": 0.39897924223117975, "learning_rate": 2.0656662580350297e-06, "loss": 0.009591809660196304, "step": 99650 }, { "epoch": 0.9379294117647059, "grad_norm": 0.4682840745109958, "learning_rate": 2.0656144364280405e-06, "loss": 0.013107661902904511, "step": 99655 }, { "epoch": 0.9379764705882353, "grad_norm": 0.4332323350014645, "learning_rate": 2.0655626187210197e-06, "loss": 0.018075764179229736, "step": 99660 }, { "epoch": 0.9380235294117647, "grad_norm": 0.499116528624058, "learning_rate": 2.0655108049134774e-06, "loss": 0.01596260368824005, "step": 99665 }, { "epoch": 0.9380705882352941, "grad_norm": 0.7690955080380252, "learning_rate": 2.065458995004925e-06, "loss": 0.017969290912151336, "step": 99670 }, { "epoch": 0.9381176470588235, "grad_norm": 0.7092588432801885, "learning_rate": 2.065407188994874e-06, "loss": 0.01425776183605194, "step": 99675 }, { "epoch": 0.9381647058823529, "grad_norm": 0.46516186626920614, "learning_rate": 2.065355386882835e-06, "loss": 0.012600019574165344, "step": 99680 }, { "epoch": 0.9382117647058823, "grad_norm": 0.41221256994050254, "learning_rate": 2.0653035886683194e-06, "loss": 0.01580657809972763, "step": 99685 }, { "epoch": 0.9382588235294118, "grad_norm": 0.6373165689718098, "learning_rate": 2.065251794350838e-06, "loss": 0.016340410709381102, "step": 99690 }, { "epoch": 0.9383058823529412, "grad_norm": 0.5103175862970372, "learning_rate": 2.065200003929903e-06, "loss": 0.010477352142333984, "step": 99695 }, { "epoch": 0.9383529411764706, "grad_norm": 0.4083221164166703, "learning_rate": 2.065148217405025e-06, "loss": 0.011733202636241913, "step": 99700 }, { "epoch": 0.9384, "grad_norm": 0.21850827137350237, "learning_rate": 2.065096434775716e-06, "loss": 0.011319595575332641, "step": 99705 }, { "epoch": 0.9384470588235294, "grad_norm": 0.47478821262629517, "learning_rate": 2.065044656041488e-06, "loss": 0.017165717482566834, "step": 99710 }, { "epoch": 0.9384941176470588, "grad_norm": 0.5215151834908062, "learning_rate": 2.064992881201852e-06, "loss": 0.013731960952281953, "step": 99715 }, { "epoch": 0.9385411764705882, "grad_norm": 0.4221060914424163, "learning_rate": 2.06494111025632e-06, "loss": 0.013543270528316498, "step": 99720 }, { "epoch": 0.9385882352941176, "grad_norm": 0.5528759367135461, "learning_rate": 2.0648893432044038e-06, "loss": 0.012510669231414796, "step": 99725 }, { "epoch": 0.938635294117647, "grad_norm": 0.6616227245266699, "learning_rate": 2.0648375800456158e-06, "loss": 0.017741641402244566, "step": 99730 }, { "epoch": 0.9386823529411765, "grad_norm": 0.3482723231392814, "learning_rate": 2.0647858207794684e-06, "loss": 0.01447923481464386, "step": 99735 }, { "epoch": 0.9387294117647059, "grad_norm": 0.4143450696303044, "learning_rate": 2.0647340654054723e-06, "loss": 0.011592214554548263, "step": 99740 }, { "epoch": 0.9387764705882353, "grad_norm": 0.8971771589027189, "learning_rate": 2.0646823139231406e-06, "loss": 0.018738748133182527, "step": 99745 }, { "epoch": 0.9388235294117647, "grad_norm": 0.45731491449996026, "learning_rate": 2.064630566331986e-06, "loss": 0.01594104468822479, "step": 99750 }, { "epoch": 0.9388705882352941, "grad_norm": 0.4305267335836054, "learning_rate": 2.06457882263152e-06, "loss": 0.014343807101249694, "step": 99755 }, { "epoch": 0.9389176470588235, "grad_norm": 0.4449051060364044, "learning_rate": 2.0645270828212557e-06, "loss": 0.013475178182125092, "step": 99760 }, { "epoch": 0.9389647058823529, "grad_norm": 0.4276555960987524, "learning_rate": 2.0644753469007052e-06, "loss": 0.013194642961025238, "step": 99765 }, { "epoch": 0.9390117647058823, "grad_norm": 0.6841337761897746, "learning_rate": 2.0644236148693823e-06, "loss": 0.014207650721073151, "step": 99770 }, { "epoch": 0.9390588235294117, "grad_norm": 0.5172702719635681, "learning_rate": 2.0643718867267983e-06, "loss": 0.01441616117954254, "step": 99775 }, { "epoch": 0.9391058823529411, "grad_norm": 0.528484461514919, "learning_rate": 2.064320162472467e-06, "loss": 0.017688533663749693, "step": 99780 }, { "epoch": 0.9391529411764706, "grad_norm": 0.36028625043119883, "learning_rate": 2.0642684421059002e-06, "loss": 0.011954890191555023, "step": 99785 }, { "epoch": 0.9392, "grad_norm": 0.41247870374010104, "learning_rate": 2.0642167256266126e-06, "loss": 0.014788135886192322, "step": 99790 }, { "epoch": 0.9392470588235294, "grad_norm": 0.5127856461432939, "learning_rate": 2.0641650130341156e-06, "loss": 0.015697461366653443, "step": 99795 }, { "epoch": 0.9392941176470588, "grad_norm": 0.36218192427890245, "learning_rate": 2.0641133043279236e-06, "loss": 0.014616599678993225, "step": 99800 }, { "epoch": 0.9393411764705882, "grad_norm": 0.488834887961949, "learning_rate": 2.0640615995075493e-06, "loss": 0.011669193208217622, "step": 99805 }, { "epoch": 0.9393882352941176, "grad_norm": 0.28212283509085706, "learning_rate": 2.0640098985725055e-06, "loss": 0.011715001612901687, "step": 99810 }, { "epoch": 0.939435294117647, "grad_norm": 0.667692315211481, "learning_rate": 2.0639582015223063e-06, "loss": 0.01361648142337799, "step": 99815 }, { "epoch": 0.9394823529411764, "grad_norm": 0.4100197476777414, "learning_rate": 2.063906508356466e-06, "loss": 0.01429692655801773, "step": 99820 }, { "epoch": 0.9395294117647058, "grad_norm": 0.27987171655347426, "learning_rate": 2.0638548190744965e-06, "loss": 0.011753116548061372, "step": 99825 }, { "epoch": 0.9395764705882353, "grad_norm": 0.4064883279982987, "learning_rate": 2.0638031336759127e-06, "loss": 0.015244096517562866, "step": 99830 }, { "epoch": 0.9396235294117647, "grad_norm": 0.47279652791773624, "learning_rate": 2.0637514521602274e-06, "loss": 0.016121441125869752, "step": 99835 }, { "epoch": 0.9396705882352941, "grad_norm": 0.47190901669825774, "learning_rate": 2.0636997745269554e-06, "loss": 0.012709689140319825, "step": 99840 }, { "epoch": 0.9397176470588235, "grad_norm": 0.4223863938184689, "learning_rate": 2.0636481007756103e-06, "loss": 0.013409976661205292, "step": 99845 }, { "epoch": 0.939764705882353, "grad_norm": 0.536247390754236, "learning_rate": 2.0635964309057056e-06, "loss": 0.012139303982257843, "step": 99850 }, { "epoch": 0.9398117647058823, "grad_norm": 0.3445228109432708, "learning_rate": 2.063544764916756e-06, "loss": 0.013557875156402588, "step": 99855 }, { "epoch": 0.9398588235294117, "grad_norm": 0.3763882392761161, "learning_rate": 2.0634931028082757e-06, "loss": 0.014270025491714477, "step": 99860 }, { "epoch": 0.9399058823529411, "grad_norm": 0.6317725941119166, "learning_rate": 2.063441444579779e-06, "loss": 0.013486334681510925, "step": 99865 }, { "epoch": 0.9399529411764705, "grad_norm": 0.44260584810641007, "learning_rate": 2.06338979023078e-06, "loss": 0.010974279046058655, "step": 99870 }, { "epoch": 0.94, "grad_norm": 0.4505598674971131, "learning_rate": 2.063338139760793e-06, "loss": 0.01224304735660553, "step": 99875 }, { "epoch": 0.9400470588235295, "grad_norm": 0.28709759234092774, "learning_rate": 2.063286493169333e-06, "loss": 0.013139735162258147, "step": 99880 }, { "epoch": 0.9400941176470589, "grad_norm": 0.9178760871209267, "learning_rate": 2.063234850455914e-06, "loss": 0.02084890455007553, "step": 99885 }, { "epoch": 0.9401411764705883, "grad_norm": 0.32513744082156815, "learning_rate": 2.0631832116200516e-06, "loss": 0.011176232993602753, "step": 99890 }, { "epoch": 0.9401882352941177, "grad_norm": 0.21222838818548342, "learning_rate": 2.0631315766612595e-06, "loss": 0.016849477589130402, "step": 99895 }, { "epoch": 0.9402352941176471, "grad_norm": 0.3299374404411667, "learning_rate": 2.063079945579053e-06, "loss": 0.011415359377861024, "step": 99900 }, { "epoch": 0.9402823529411765, "grad_norm": 0.3617834213742388, "learning_rate": 2.0630283183729478e-06, "loss": 0.012406182289123536, "step": 99905 }, { "epoch": 0.9403294117647059, "grad_norm": 0.42343555519907816, "learning_rate": 2.062976695042458e-06, "loss": 0.014756409823894501, "step": 99910 }, { "epoch": 0.9403764705882353, "grad_norm": 0.33646423959506944, "learning_rate": 2.062925075587099e-06, "loss": 0.013195335865020752, "step": 99915 }, { "epoch": 0.9404235294117647, "grad_norm": 0.7076843000670396, "learning_rate": 2.062873460006386e-06, "loss": 0.014217740297317505, "step": 99920 }, { "epoch": 0.9404705882352942, "grad_norm": 0.5933486728579522, "learning_rate": 2.062821848299835e-06, "loss": 0.017594876885414123, "step": 99925 }, { "epoch": 0.9405176470588236, "grad_norm": 0.8019149514057439, "learning_rate": 2.0627702404669595e-06, "loss": 0.017233943939208983, "step": 99930 }, { "epoch": 0.940564705882353, "grad_norm": 0.9020387054650922, "learning_rate": 2.0627186365072775e-06, "loss": 0.017832675576210023, "step": 99935 }, { "epoch": 0.9406117647058824, "grad_norm": 0.44505619075567227, "learning_rate": 2.0626670364203024e-06, "loss": 0.011135709285736085, "step": 99940 }, { "epoch": 0.9406588235294118, "grad_norm": 0.45902767530098354, "learning_rate": 2.062615440205551e-06, "loss": 0.013101212680339813, "step": 99945 }, { "epoch": 0.9407058823529412, "grad_norm": 0.4466077803762093, "learning_rate": 2.0625638478625385e-06, "loss": 0.01620664745569229, "step": 99950 }, { "epoch": 0.9407529411764706, "grad_norm": 0.7060546119621128, "learning_rate": 2.062512259390781e-06, "loss": 0.01721310168504715, "step": 99955 }, { "epoch": 0.9408, "grad_norm": 0.48842079005532624, "learning_rate": 2.062460674789794e-06, "loss": 0.013327430188655853, "step": 99960 }, { "epoch": 0.9408470588235294, "grad_norm": 0.41298671454815644, "learning_rate": 2.062409094059094e-06, "loss": 0.013915860652923584, "step": 99965 }, { "epoch": 0.9408941176470588, "grad_norm": 0.5292653367363411, "learning_rate": 2.0623575171981967e-06, "loss": 0.013153290748596192, "step": 99970 }, { "epoch": 0.9409411764705883, "grad_norm": 0.5585042149622655, "learning_rate": 2.0623059442066185e-06, "loss": 0.0150543212890625, "step": 99975 }, { "epoch": 0.9409882352941177, "grad_norm": 0.6528353102207297, "learning_rate": 2.0622543750838757e-06, "loss": 0.014178718626499175, "step": 99980 }, { "epoch": 0.9410352941176471, "grad_norm": 0.5402944098525639, "learning_rate": 2.062202809829484e-06, "loss": 0.014231635630130768, "step": 99985 }, { "epoch": 0.9410823529411765, "grad_norm": 0.3821601728797884, "learning_rate": 2.0621512484429602e-06, "loss": 0.012829327583312988, "step": 99990 }, { "epoch": 0.9411294117647059, "grad_norm": 0.5498508048882824, "learning_rate": 2.062099690923821e-06, "loss": 0.011121972650289535, "step": 99995 }, { "epoch": 0.9411764705882353, "grad_norm": 0.6176936713264888, "learning_rate": 2.0620481372715833e-06, "loss": 0.012181293964385987, "step": 100000 }, { "epoch": 0.9412235294117647, "grad_norm": 0.7156761335894364, "learning_rate": 2.061996587485762e-06, "loss": 0.013829770684242248, "step": 100005 }, { "epoch": 0.9412705882352941, "grad_norm": 0.3973826766032798, "learning_rate": 2.061945041565876e-06, "loss": 0.015577256679534912, "step": 100010 }, { "epoch": 0.9413176470588235, "grad_norm": 0.3700376401978852, "learning_rate": 2.0618934995114405e-06, "loss": 0.012424618005752563, "step": 100015 }, { "epoch": 0.941364705882353, "grad_norm": 0.3154810676000172, "learning_rate": 2.0618419613219733e-06, "loss": 0.014294539391994477, "step": 100020 }, { "epoch": 0.9414117647058824, "grad_norm": 0.5499245783460481, "learning_rate": 2.0617904269969913e-06, "loss": 0.012322883307933807, "step": 100025 }, { "epoch": 0.9414588235294118, "grad_norm": 0.37961127430017255, "learning_rate": 2.0617388965360115e-06, "loss": 0.012044939398765563, "step": 100030 }, { "epoch": 0.9415058823529412, "grad_norm": 0.527331749874443, "learning_rate": 2.0616873699385505e-06, "loss": 0.011928743124008179, "step": 100035 }, { "epoch": 0.9415529411764706, "grad_norm": 0.35661758585971565, "learning_rate": 2.061635847204126e-06, "loss": 0.01487966775894165, "step": 100040 }, { "epoch": 0.9416, "grad_norm": 0.6005217965847629, "learning_rate": 2.0615843283322557e-06, "loss": 0.017297548055648804, "step": 100045 }, { "epoch": 0.9416470588235294, "grad_norm": 0.5261475652617756, "learning_rate": 2.061532813322456e-06, "loss": 0.0165365070104599, "step": 100050 }, { "epoch": 0.9416941176470588, "grad_norm": 0.46201521733611123, "learning_rate": 2.061481302174246e-06, "loss": 0.010288520902395248, "step": 100055 }, { "epoch": 0.9417411764705882, "grad_norm": 0.31633945339117764, "learning_rate": 2.061429794887142e-06, "loss": 0.011472228914499283, "step": 100060 }, { "epoch": 0.9417882352941176, "grad_norm": 0.3493526164208617, "learning_rate": 2.061378291460662e-06, "loss": 0.013516673445701599, "step": 100065 }, { "epoch": 0.9418352941176471, "grad_norm": 0.4403550229722891, "learning_rate": 2.0613267918943236e-06, "loss": 0.01476183533668518, "step": 100070 }, { "epoch": 0.9418823529411765, "grad_norm": 0.6201615705648472, "learning_rate": 2.0612752961876443e-06, "loss": 0.014717449247837067, "step": 100075 }, { "epoch": 0.9419294117647059, "grad_norm": 0.7032592818116048, "learning_rate": 2.0612238043401432e-06, "loss": 0.014869612455368043, "step": 100080 }, { "epoch": 0.9419764705882353, "grad_norm": 0.5625151693973313, "learning_rate": 2.0611723163513376e-06, "loss": 0.013432630896568298, "step": 100085 }, { "epoch": 0.9420235294117647, "grad_norm": 0.40635240334431044, "learning_rate": 2.0611208322207455e-06, "loss": 0.01308591514825821, "step": 100090 }, { "epoch": 0.9420705882352941, "grad_norm": 0.46032267564343565, "learning_rate": 2.061069351947885e-06, "loss": 0.017669332027435303, "step": 100095 }, { "epoch": 0.9421176470588235, "grad_norm": 0.3973132204944141, "learning_rate": 2.0610178755322746e-06, "loss": 0.012334515899419784, "step": 100100 }, { "epoch": 0.9421647058823529, "grad_norm": 0.4503401932248099, "learning_rate": 2.060966402973432e-06, "loss": 0.016134899854660035, "step": 100105 }, { "epoch": 0.9422117647058823, "grad_norm": 0.42200841357271773, "learning_rate": 2.0609149342708767e-06, "loss": 0.01818714141845703, "step": 100110 }, { "epoch": 0.9422588235294118, "grad_norm": 0.28076666182668947, "learning_rate": 2.0608634694241267e-06, "loss": 0.011796219646930695, "step": 100115 }, { "epoch": 0.9423058823529412, "grad_norm": 0.5957991453159139, "learning_rate": 2.0608120084327006e-06, "loss": 0.012403813004493714, "step": 100120 }, { "epoch": 0.9423529411764706, "grad_norm": 0.5046042842577565, "learning_rate": 2.0607605512961168e-06, "loss": 0.014335143566131591, "step": 100125 }, { "epoch": 0.9424, "grad_norm": 0.6045429487393462, "learning_rate": 2.0607090980138945e-06, "loss": 0.01639380007982254, "step": 100130 }, { "epoch": 0.9424470588235294, "grad_norm": 0.6483207304744989, "learning_rate": 2.0606576485855523e-06, "loss": 0.01260281801223755, "step": 100135 }, { "epoch": 0.9424941176470588, "grad_norm": 0.47208443949566625, "learning_rate": 2.060606203010609e-06, "loss": 0.014548039436340332, "step": 100140 }, { "epoch": 0.9425411764705882, "grad_norm": 0.36659571932501783, "learning_rate": 2.0605547612885836e-06, "loss": 0.01018277108669281, "step": 100145 }, { "epoch": 0.9425882352941176, "grad_norm": 0.4763395090070204, "learning_rate": 2.060503323418996e-06, "loss": 0.012052159011363982, "step": 100150 }, { "epoch": 0.942635294117647, "grad_norm": 0.7569590527776097, "learning_rate": 2.0604518894013643e-06, "loss": 0.01557028740644455, "step": 100155 }, { "epoch": 0.9426823529411764, "grad_norm": 0.4350550416020635, "learning_rate": 2.060400459235208e-06, "loss": 0.012692362070083618, "step": 100160 }, { "epoch": 0.9427294117647059, "grad_norm": 0.6284198859426384, "learning_rate": 2.0603490329200476e-06, "loss": 0.014100442826747894, "step": 100165 }, { "epoch": 0.9427764705882353, "grad_norm": 0.32666215947928173, "learning_rate": 2.0602976104554005e-06, "loss": 0.01097201183438301, "step": 100170 }, { "epoch": 0.9428235294117647, "grad_norm": 0.38503716926550774, "learning_rate": 2.0602461918407883e-06, "loss": 0.014181900024414062, "step": 100175 }, { "epoch": 0.9428705882352941, "grad_norm": 0.4116476362980248, "learning_rate": 2.0601947770757296e-06, "loss": 0.012783727049827576, "step": 100180 }, { "epoch": 0.9429176470588235, "grad_norm": 0.49863943232693747, "learning_rate": 2.060143366159743e-06, "loss": 0.012298912554979325, "step": 100185 }, { "epoch": 0.9429647058823529, "grad_norm": 0.38085999453159025, "learning_rate": 2.06009195909235e-06, "loss": 0.009388010203838348, "step": 100190 }, { "epoch": 0.9430117647058823, "grad_norm": 0.3859686810400286, "learning_rate": 2.0600405558730703e-06, "loss": 0.012952920794487, "step": 100195 }, { "epoch": 0.9430588235294117, "grad_norm": 0.5129277387966963, "learning_rate": 2.0599891565014226e-06, "loss": 0.016921114921569825, "step": 100200 }, { "epoch": 0.9431058823529411, "grad_norm": 0.3888581056784059, "learning_rate": 2.059937760976928e-06, "loss": 0.013356827199459076, "step": 100205 }, { "epoch": 0.9431529411764706, "grad_norm": 0.40678349620064025, "learning_rate": 2.0598863692991064e-06, "loss": 0.014264968037605286, "step": 100210 }, { "epoch": 0.9432, "grad_norm": 0.451114528829194, "learning_rate": 2.0598349814674774e-06, "loss": 0.012610557675361633, "step": 100215 }, { "epoch": 0.9432470588235294, "grad_norm": 0.356710715820985, "learning_rate": 2.0597835974815622e-06, "loss": 0.014744141697883606, "step": 100220 }, { "epoch": 0.9432941176470588, "grad_norm": 0.4044923758275066, "learning_rate": 2.0597322173408806e-06, "loss": 0.011849245429039002, "step": 100225 }, { "epoch": 0.9433411764705882, "grad_norm": 0.5985962803229082, "learning_rate": 2.0596808410449527e-06, "loss": 0.01293657124042511, "step": 100230 }, { "epoch": 0.9433882352941176, "grad_norm": 0.3761162586439773, "learning_rate": 2.0596294685933003e-06, "loss": 0.010178643465042114, "step": 100235 }, { "epoch": 0.943435294117647, "grad_norm": 0.6880717171759594, "learning_rate": 2.0595780999854424e-06, "loss": 0.016652625799179078, "step": 100240 }, { "epoch": 0.9434823529411764, "grad_norm": 0.28314987587489043, "learning_rate": 2.0595267352209007e-06, "loss": 0.013787510991096496, "step": 100245 }, { "epoch": 0.9435294117647058, "grad_norm": 0.3667418953994537, "learning_rate": 2.0594753742991955e-06, "loss": 0.01008533537387848, "step": 100250 }, { "epoch": 0.9435764705882352, "grad_norm": 0.43695554395804725, "learning_rate": 2.059424017219848e-06, "loss": 0.013976314663887024, "step": 100255 }, { "epoch": 0.9436235294117647, "grad_norm": 0.5167922780623773, "learning_rate": 2.0593726639823793e-06, "loss": 0.012561053037643433, "step": 100260 }, { "epoch": 0.9436705882352941, "grad_norm": 0.45272323691796246, "learning_rate": 2.0593213145863102e-06, "loss": 0.016117678582668306, "step": 100265 }, { "epoch": 0.9437176470588235, "grad_norm": 0.42456177912115606, "learning_rate": 2.059269969031161e-06, "loss": 0.016087962687015532, "step": 100270 }, { "epoch": 0.943764705882353, "grad_norm": 0.42172816250968087, "learning_rate": 2.0592186273164545e-06, "loss": 0.01466875672340393, "step": 100275 }, { "epoch": 0.9438117647058824, "grad_norm": 0.7339097061302502, "learning_rate": 2.0591672894417107e-06, "loss": 0.01106986626982689, "step": 100280 }, { "epoch": 0.9438588235294118, "grad_norm": 0.3909302179518656, "learning_rate": 2.059115955406451e-06, "loss": 0.01544690728187561, "step": 100285 }, { "epoch": 0.9439058823529412, "grad_norm": 0.5098982929807037, "learning_rate": 2.0590646252101974e-06, "loss": 0.014604297280311585, "step": 100290 }, { "epoch": 0.9439529411764706, "grad_norm": 0.5823921865924057, "learning_rate": 2.0590132988524716e-06, "loss": 0.012864047288894653, "step": 100295 }, { "epoch": 0.944, "grad_norm": 0.5611399505016894, "learning_rate": 2.058961976332795e-06, "loss": 0.01482999324798584, "step": 100300 }, { "epoch": 0.9440470588235295, "grad_norm": 0.4556783691735417, "learning_rate": 2.058910657650688e-06, "loss": 0.01221158355474472, "step": 100305 }, { "epoch": 0.9440941176470589, "grad_norm": 1.2326203720362128, "learning_rate": 2.058859342805675e-06, "loss": 0.02069234400987625, "step": 100310 }, { "epoch": 0.9441411764705883, "grad_norm": 0.3251713320895857, "learning_rate": 2.0588080317972754e-06, "loss": 0.016569492220878602, "step": 100315 }, { "epoch": 0.9441882352941177, "grad_norm": 0.3472059804970108, "learning_rate": 2.0587567246250123e-06, "loss": 0.014518849551677704, "step": 100320 }, { "epoch": 0.9442352941176471, "grad_norm": 0.7256992195221396, "learning_rate": 2.0587054212884075e-06, "loss": 0.01575794517993927, "step": 100325 }, { "epoch": 0.9442823529411765, "grad_norm": 0.516184564352409, "learning_rate": 2.058654121786983e-06, "loss": 0.016333019733428954, "step": 100330 }, { "epoch": 0.9443294117647059, "grad_norm": 0.7814770253944195, "learning_rate": 2.0586028261202617e-06, "loss": 0.015170928835868836, "step": 100335 }, { "epoch": 0.9443764705882353, "grad_norm": 0.5009706427953704, "learning_rate": 2.0585515342877644e-06, "loss": 0.016680997610092164, "step": 100340 }, { "epoch": 0.9444235294117647, "grad_norm": 0.4077705616947778, "learning_rate": 2.0585002462890154e-06, "loss": 0.01244674324989319, "step": 100345 }, { "epoch": 0.9444705882352942, "grad_norm": 0.4967549538731862, "learning_rate": 2.0584489621235355e-06, "loss": 0.016749347746372222, "step": 100350 }, { "epoch": 0.9445176470588236, "grad_norm": 0.6410386114872972, "learning_rate": 2.0583976817908484e-06, "loss": 0.015963482856750488, "step": 100355 }, { "epoch": 0.944564705882353, "grad_norm": 0.41153730606227484, "learning_rate": 2.0583464052904755e-06, "loss": 0.015728023648262025, "step": 100360 }, { "epoch": 0.9446117647058824, "grad_norm": 0.4429695233474211, "learning_rate": 2.0582951326219407e-06, "loss": 0.012138725817203521, "step": 100365 }, { "epoch": 0.9446588235294118, "grad_norm": 0.4861642741967201, "learning_rate": 2.0582438637847655e-06, "loss": 0.017219582200050355, "step": 100370 }, { "epoch": 0.9447058823529412, "grad_norm": 0.3973806406584083, "learning_rate": 2.058192598778474e-06, "loss": 0.012365174293518067, "step": 100375 }, { "epoch": 0.9447529411764706, "grad_norm": 0.48540124534534923, "learning_rate": 2.0581413376025887e-06, "loss": 0.013749364018440246, "step": 100380 }, { "epoch": 0.9448, "grad_norm": 0.4719363800526842, "learning_rate": 2.058090080256632e-06, "loss": 0.014438220858573913, "step": 100385 }, { "epoch": 0.9448470588235294, "grad_norm": 0.5961064044382584, "learning_rate": 2.058038826740128e-06, "loss": 0.012659166753292084, "step": 100390 }, { "epoch": 0.9448941176470588, "grad_norm": 0.4158694264700918, "learning_rate": 2.057987577052599e-06, "loss": 0.010975167900323868, "step": 100395 }, { "epoch": 0.9449411764705883, "grad_norm": 0.5840847252103875, "learning_rate": 2.057936331193569e-06, "loss": 0.018142643570899963, "step": 100400 }, { "epoch": 0.9449882352941177, "grad_norm": 0.5260776385545632, "learning_rate": 2.057885089162561e-06, "loss": 0.014420083165168763, "step": 100405 }, { "epoch": 0.9450352941176471, "grad_norm": 0.46010208197109637, "learning_rate": 2.057833850959099e-06, "loss": 0.016739124059677125, "step": 100410 }, { "epoch": 0.9450823529411765, "grad_norm": 0.4592471865334421, "learning_rate": 2.0577826165827054e-06, "loss": 0.019879378378391266, "step": 100415 }, { "epoch": 0.9451294117647059, "grad_norm": 0.30582769020471806, "learning_rate": 2.057731386032905e-06, "loss": 0.01076592355966568, "step": 100420 }, { "epoch": 0.9451764705882353, "grad_norm": 0.45086132240380045, "learning_rate": 2.0576801593092203e-06, "loss": 0.01587119698524475, "step": 100425 }, { "epoch": 0.9452235294117647, "grad_norm": 0.524143085071145, "learning_rate": 2.057628936411176e-06, "loss": 0.012697823345661163, "step": 100430 }, { "epoch": 0.9452705882352941, "grad_norm": 0.3337626832500424, "learning_rate": 2.0575777173382956e-06, "loss": 0.009462989866733551, "step": 100435 }, { "epoch": 0.9453176470588235, "grad_norm": 0.4514400352405642, "learning_rate": 2.057526502090103e-06, "loss": 0.014049088954925537, "step": 100440 }, { "epoch": 0.945364705882353, "grad_norm": 0.6213647008496809, "learning_rate": 2.0574752906661225e-06, "loss": 0.016848383843898772, "step": 100445 }, { "epoch": 0.9454117647058824, "grad_norm": 0.41751425124613006, "learning_rate": 2.0574240830658778e-06, "loss": 0.011499026417732238, "step": 100450 }, { "epoch": 0.9454588235294118, "grad_norm": 0.46793544750162247, "learning_rate": 2.0573728792888937e-06, "loss": 0.0166021004319191, "step": 100455 }, { "epoch": 0.9455058823529412, "grad_norm": 0.5597158317987277, "learning_rate": 2.057321679334694e-06, "loss": 0.014740933477878571, "step": 100460 }, { "epoch": 0.9455529411764706, "grad_norm": 0.5108960492913249, "learning_rate": 2.057270483202803e-06, "loss": 0.013920164108276368, "step": 100465 }, { "epoch": 0.9456, "grad_norm": 0.5154149293134934, "learning_rate": 2.0572192908927455e-06, "loss": 0.014376242458820344, "step": 100470 }, { "epoch": 0.9456470588235294, "grad_norm": 0.4092640061013417, "learning_rate": 2.057168102404045e-06, "loss": 0.011503177881240844, "step": 100475 }, { "epoch": 0.9456941176470588, "grad_norm": 0.42292441910228146, "learning_rate": 2.0571169177362275e-06, "loss": 0.012858736515045165, "step": 100480 }, { "epoch": 0.9457411764705882, "grad_norm": 0.44832092046491184, "learning_rate": 2.057065736888817e-06, "loss": 0.01227717250585556, "step": 100485 }, { "epoch": 0.9457882352941176, "grad_norm": 0.4211545402444128, "learning_rate": 2.0570145598613384e-06, "loss": 0.010323292016983033, "step": 100490 }, { "epoch": 0.9458352941176471, "grad_norm": 0.4913741099895299, "learning_rate": 2.0569633866533164e-06, "loss": 0.013653488457202911, "step": 100495 }, { "epoch": 0.9458823529411765, "grad_norm": 0.41724486476769485, "learning_rate": 2.0569122172642763e-06, "loss": 0.014960090816020965, "step": 100500 }, { "epoch": 0.9459294117647059, "grad_norm": 0.47504712408747507, "learning_rate": 2.0568610516937424e-06, "loss": 0.012400881946086883, "step": 100505 }, { "epoch": 0.9459764705882353, "grad_norm": 0.4027883437578775, "learning_rate": 2.0568098899412405e-06, "loss": 0.01513076275587082, "step": 100510 }, { "epoch": 0.9460235294117647, "grad_norm": 0.4163009044091939, "learning_rate": 2.0567587320062953e-06, "loss": 0.014740866422653199, "step": 100515 }, { "epoch": 0.9460705882352941, "grad_norm": 0.3103733527655827, "learning_rate": 2.0567075778884325e-06, "loss": 0.01349261701107025, "step": 100520 }, { "epoch": 0.9461176470588235, "grad_norm": 0.8489176639455829, "learning_rate": 2.056656427587177e-06, "loss": 0.015098515152931213, "step": 100525 }, { "epoch": 0.9461647058823529, "grad_norm": 0.47544207284790346, "learning_rate": 2.0566052811020547e-06, "loss": 0.015277107059955598, "step": 100530 }, { "epoch": 0.9462117647058823, "grad_norm": 0.575303262768277, "learning_rate": 2.0565541384325905e-06, "loss": 0.01576244682073593, "step": 100535 }, { "epoch": 0.9462588235294118, "grad_norm": 0.4620720277335976, "learning_rate": 2.056502999578311e-06, "loss": 0.008993677794933319, "step": 100540 }, { "epoch": 0.9463058823529412, "grad_norm": 0.48977577006410494, "learning_rate": 2.056451864538741e-06, "loss": 0.015878814458847045, "step": 100545 }, { "epoch": 0.9463529411764706, "grad_norm": 0.49934337831042563, "learning_rate": 2.0564007333134063e-06, "loss": 0.01252475380897522, "step": 100550 }, { "epoch": 0.9464, "grad_norm": 0.6978876831578568, "learning_rate": 2.056349605901833e-06, "loss": 0.01617354154586792, "step": 100555 }, { "epoch": 0.9464470588235294, "grad_norm": 0.36444332373657146, "learning_rate": 2.056298482303547e-06, "loss": 0.011459692567586898, "step": 100560 }, { "epoch": 0.9464941176470588, "grad_norm": 1.8139318000781424, "learning_rate": 2.056247362518074e-06, "loss": 0.01679685413837433, "step": 100565 }, { "epoch": 0.9465411764705882, "grad_norm": 0.4152908012999647, "learning_rate": 2.0561962465449404e-06, "loss": 0.014731551706790923, "step": 100570 }, { "epoch": 0.9465882352941176, "grad_norm": 0.5773532498450851, "learning_rate": 2.0561451343836726e-06, "loss": 0.013956420123577118, "step": 100575 }, { "epoch": 0.946635294117647, "grad_norm": 0.3056617179798862, "learning_rate": 2.056094026033797e-06, "loss": 0.012981900572776794, "step": 100580 }, { "epoch": 0.9466823529411764, "grad_norm": 0.5783785798980949, "learning_rate": 2.056042921494839e-06, "loss": 0.01170962154865265, "step": 100585 }, { "epoch": 0.9467294117647059, "grad_norm": 0.4659352181752374, "learning_rate": 2.0559918207663254e-06, "loss": 0.01641903817653656, "step": 100590 }, { "epoch": 0.9467764705882353, "grad_norm": 0.2469726311298227, "learning_rate": 2.0559407238477827e-06, "loss": 0.015176805853843688, "step": 100595 }, { "epoch": 0.9468235294117647, "grad_norm": 0.6201891538514174, "learning_rate": 2.055889630738738e-06, "loss": 0.015504047274589539, "step": 100600 }, { "epoch": 0.9468705882352941, "grad_norm": 0.4965596875290046, "learning_rate": 2.055838541438717e-06, "loss": 0.015473181009292602, "step": 100605 }, { "epoch": 0.9469176470588235, "grad_norm": 0.37302909059559136, "learning_rate": 2.0557874559472477e-06, "loss": 0.013096687197685242, "step": 100610 }, { "epoch": 0.9469647058823529, "grad_norm": 0.8020804516966555, "learning_rate": 2.055736374263856e-06, "loss": 0.01629508286714554, "step": 100615 }, { "epoch": 0.9470117647058823, "grad_norm": 0.3794933469448166, "learning_rate": 2.0556852963880695e-06, "loss": 0.019056072831153868, "step": 100620 }, { "epoch": 0.9470588235294117, "grad_norm": 0.3655964030997358, "learning_rate": 2.055634222319414e-06, "loss": 0.011680471897125243, "step": 100625 }, { "epoch": 0.9471058823529411, "grad_norm": 0.4591303974282289, "learning_rate": 2.055583152057418e-06, "loss": 0.012551319599151612, "step": 100630 }, { "epoch": 0.9471529411764706, "grad_norm": 0.43853037050220295, "learning_rate": 2.055532085601608e-06, "loss": 0.014702920615673066, "step": 100635 }, { "epoch": 0.9472, "grad_norm": 0.613111115215771, "learning_rate": 2.055481022951511e-06, "loss": 0.013619408011436462, "step": 100640 }, { "epoch": 0.9472470588235294, "grad_norm": 0.3078161102448665, "learning_rate": 2.0554299641066543e-06, "loss": 0.013661278784275055, "step": 100645 }, { "epoch": 0.9472941176470588, "grad_norm": 0.4522960674706804, "learning_rate": 2.055378909066566e-06, "loss": 0.011514464020729065, "step": 100650 }, { "epoch": 0.9473411764705882, "grad_norm": 0.6735654062795057, "learning_rate": 2.055327857830773e-06, "loss": 0.013901627063751221, "step": 100655 }, { "epoch": 0.9473882352941176, "grad_norm": 0.49156686249303727, "learning_rate": 2.0552768103988034e-06, "loss": 0.012892545759677887, "step": 100660 }, { "epoch": 0.947435294117647, "grad_norm": 0.5730248196215703, "learning_rate": 2.0552257667701837e-06, "loss": 0.015930789709091186, "step": 100665 }, { "epoch": 0.9474823529411764, "grad_norm": 0.668249803038481, "learning_rate": 2.0551747269444433e-06, "loss": 0.0109029158949852, "step": 100670 }, { "epoch": 0.9475294117647058, "grad_norm": 0.343463971448256, "learning_rate": 2.0551236909211087e-06, "loss": 0.013683384656906128, "step": 100675 }, { "epoch": 0.9475764705882352, "grad_norm": 0.34431559707034, "learning_rate": 2.055072658699708e-06, "loss": 0.011567506194114684, "step": 100680 }, { "epoch": 0.9476235294117648, "grad_norm": 0.3737836033596643, "learning_rate": 2.05502163027977e-06, "loss": 0.014166389405727387, "step": 100685 }, { "epoch": 0.9476705882352942, "grad_norm": 0.34420156581430306, "learning_rate": 2.054970605660822e-06, "loss": 0.010953661799430848, "step": 100690 }, { "epoch": 0.9477176470588236, "grad_norm": 0.3494684223787308, "learning_rate": 2.0549195848423924e-06, "loss": 0.013344085216522217, "step": 100695 }, { "epoch": 0.947764705882353, "grad_norm": 0.39935977863158056, "learning_rate": 2.054868567824009e-06, "loss": 0.017322303354740144, "step": 100700 }, { "epoch": 0.9478117647058824, "grad_norm": 0.6280961623406555, "learning_rate": 2.0548175546052005e-06, "loss": 0.014844851195812225, "step": 100705 }, { "epoch": 0.9478588235294118, "grad_norm": 0.5753224044289289, "learning_rate": 2.0547665451854954e-06, "loss": 0.01774996817111969, "step": 100710 }, { "epoch": 0.9479058823529412, "grad_norm": 0.3905129802641643, "learning_rate": 2.054715539564422e-06, "loss": 0.011934653669595719, "step": 100715 }, { "epoch": 0.9479529411764706, "grad_norm": 0.34180141163816, "learning_rate": 2.0546645377415087e-06, "loss": 0.009352573752403259, "step": 100720 }, { "epoch": 0.948, "grad_norm": 1.1888766378509976, "learning_rate": 2.0546135397162852e-06, "loss": 0.010325668752193451, "step": 100725 }, { "epoch": 0.9480470588235295, "grad_norm": 0.3170136635848693, "learning_rate": 2.0545625454882782e-06, "loss": 0.014294791221618652, "step": 100730 }, { "epoch": 0.9480941176470589, "grad_norm": 0.3977749355130187, "learning_rate": 2.054511555057018e-06, "loss": 0.014778190851211548, "step": 100735 }, { "epoch": 0.9481411764705883, "grad_norm": 0.5816153937624202, "learning_rate": 2.054460568422034e-06, "loss": 0.01436794400215149, "step": 100740 }, { "epoch": 0.9481882352941177, "grad_norm": 0.3118536613760944, "learning_rate": 2.0544095855828532e-06, "loss": 0.01134602352976799, "step": 100745 }, { "epoch": 0.9482352941176471, "grad_norm": 0.4133225399626514, "learning_rate": 2.0543586065390058e-06, "loss": 0.01425495445728302, "step": 100750 }, { "epoch": 0.9482823529411765, "grad_norm": 0.28817715110889186, "learning_rate": 2.0543076312900213e-06, "loss": 0.013747267425060272, "step": 100755 }, { "epoch": 0.9483294117647059, "grad_norm": 0.641666748498847, "learning_rate": 2.0542566598354286e-06, "loss": 0.016067011654376982, "step": 100760 }, { "epoch": 0.9483764705882353, "grad_norm": 0.43206260882918357, "learning_rate": 2.0542056921747565e-06, "loss": 0.019789624214172363, "step": 100765 }, { "epoch": 0.9484235294117647, "grad_norm": 0.7309724738382547, "learning_rate": 2.054154728307535e-06, "loss": 0.013968828320503234, "step": 100770 }, { "epoch": 0.9484705882352941, "grad_norm": 1.9664150371408535, "learning_rate": 2.054103768233293e-06, "loss": 0.012930189073085786, "step": 100775 }, { "epoch": 0.9485176470588236, "grad_norm": 0.3856790284453321, "learning_rate": 2.05405281195156e-06, "loss": 0.011156543344259261, "step": 100780 }, { "epoch": 0.948564705882353, "grad_norm": 0.6060525132003407, "learning_rate": 2.054001859461867e-06, "loss": 0.013629402220249175, "step": 100785 }, { "epoch": 0.9486117647058824, "grad_norm": 0.5711074742050538, "learning_rate": 2.053950910763742e-06, "loss": 0.014207267761230468, "step": 100790 }, { "epoch": 0.9486588235294118, "grad_norm": 0.4190099500488559, "learning_rate": 2.053899965856715e-06, "loss": 0.012488356977701186, "step": 100795 }, { "epoch": 0.9487058823529412, "grad_norm": 0.31663435264279627, "learning_rate": 2.0538490247403166e-06, "loss": 0.010179242491722107, "step": 100800 }, { "epoch": 0.9487529411764706, "grad_norm": 0.5098658715078805, "learning_rate": 2.053798087414077e-06, "loss": 0.014348536729812622, "step": 100805 }, { "epoch": 0.9488, "grad_norm": 0.4234546462154162, "learning_rate": 2.053747153877525e-06, "loss": 0.011875772476196289, "step": 100810 }, { "epoch": 0.9488470588235294, "grad_norm": 0.6016848209762394, "learning_rate": 2.053696224130192e-06, "loss": 0.01705789566040039, "step": 100815 }, { "epoch": 0.9488941176470588, "grad_norm": 0.6286392327934567, "learning_rate": 2.0536452981716067e-06, "loss": 0.015822099149227144, "step": 100820 }, { "epoch": 0.9489411764705883, "grad_norm": 0.5791169669040971, "learning_rate": 2.053594376001301e-06, "loss": 0.014131233096122742, "step": 100825 }, { "epoch": 0.9489882352941177, "grad_norm": 0.591720408475941, "learning_rate": 2.0535434576188045e-06, "loss": 0.016750344634056093, "step": 100830 }, { "epoch": 0.9490352941176471, "grad_norm": 0.4852087579507589, "learning_rate": 2.0534925430236467e-06, "loss": 0.01673162877559662, "step": 100835 }, { "epoch": 0.9490823529411765, "grad_norm": 0.5464129734305844, "learning_rate": 2.05344163221536e-06, "loss": 0.019342419505119324, "step": 100840 }, { "epoch": 0.9491294117647059, "grad_norm": 0.46801901784893557, "learning_rate": 2.0533907251934736e-06, "loss": 0.012970109283924103, "step": 100845 }, { "epoch": 0.9491764705882353, "grad_norm": 0.5559148824903808, "learning_rate": 2.053339821957519e-06, "loss": 0.013638138771057129, "step": 100850 }, { "epoch": 0.9492235294117647, "grad_norm": 0.46037774986433794, "learning_rate": 2.0532889225070256e-06, "loss": 0.0156968355178833, "step": 100855 }, { "epoch": 0.9492705882352941, "grad_norm": 0.4539484465336646, "learning_rate": 2.0532380268415266e-06, "loss": 0.014194902777671815, "step": 100860 }, { "epoch": 0.9493176470588235, "grad_norm": 0.4302585958458314, "learning_rate": 2.0531871349605504e-06, "loss": 0.01576131284236908, "step": 100865 }, { "epoch": 0.9493647058823529, "grad_norm": 0.34635743153721715, "learning_rate": 2.0531362468636296e-06, "loss": 0.015299950540065766, "step": 100870 }, { "epoch": 0.9494117647058824, "grad_norm": 0.6107564488517773, "learning_rate": 2.0530853625502948e-06, "loss": 0.017593520879745483, "step": 100875 }, { "epoch": 0.9494588235294118, "grad_norm": 0.4294357744987079, "learning_rate": 2.0530344820200772e-06, "loss": 0.01593681573867798, "step": 100880 }, { "epoch": 0.9495058823529412, "grad_norm": 0.35958451585382956, "learning_rate": 2.052983605272508e-06, "loss": 0.01310301423072815, "step": 100885 }, { "epoch": 0.9495529411764706, "grad_norm": 0.4729051063468109, "learning_rate": 2.052932732307118e-06, "loss": 0.013923093676567078, "step": 100890 }, { "epoch": 0.9496, "grad_norm": 0.3472184847143859, "learning_rate": 2.05288186312344e-06, "loss": 0.013145956397056579, "step": 100895 }, { "epoch": 0.9496470588235294, "grad_norm": 0.48218192467181675, "learning_rate": 2.0528309977210044e-06, "loss": 0.013372339308261871, "step": 100900 }, { "epoch": 0.9496941176470588, "grad_norm": 0.44478573673748434, "learning_rate": 2.0527801360993434e-06, "loss": 0.012005249410867691, "step": 100905 }, { "epoch": 0.9497411764705882, "grad_norm": 0.43421780639974594, "learning_rate": 2.0527292782579876e-06, "loss": 0.012958711385726929, "step": 100910 }, { "epoch": 0.9497882352941176, "grad_norm": 0.5516790177714823, "learning_rate": 2.0526784241964696e-06, "loss": 0.010427483916282653, "step": 100915 }, { "epoch": 0.9498352941176471, "grad_norm": 0.3222779364255105, "learning_rate": 2.0526275739143217e-06, "loss": 0.010968755185604095, "step": 100920 }, { "epoch": 0.9498823529411765, "grad_norm": 0.4071715102892871, "learning_rate": 2.0525767274110744e-06, "loss": 0.015206749737262725, "step": 100925 }, { "epoch": 0.9499294117647059, "grad_norm": 0.3476704110598293, "learning_rate": 2.0525258846862607e-06, "loss": 0.013603588938713074, "step": 100930 }, { "epoch": 0.9499764705882353, "grad_norm": 0.6116970959765152, "learning_rate": 2.0524750457394126e-06, "loss": 0.016243049502372743, "step": 100935 }, { "epoch": 0.9500235294117647, "grad_norm": 0.445202829018469, "learning_rate": 2.0524242105700616e-06, "loss": 0.012301291525363921, "step": 100940 }, { "epoch": 0.9500705882352941, "grad_norm": 0.35897102686365256, "learning_rate": 2.0523733791777407e-06, "loss": 0.015493589639663696, "step": 100945 }, { "epoch": 0.9501176470588235, "grad_norm": 0.5543788916262822, "learning_rate": 2.0523225515619818e-06, "loss": 0.013192225992679597, "step": 100950 }, { "epoch": 0.9501647058823529, "grad_norm": 0.43659297855845763, "learning_rate": 2.0522717277223174e-06, "loss": 0.012206625938415528, "step": 100955 }, { "epoch": 0.9502117647058823, "grad_norm": 0.5085652649997648, "learning_rate": 2.05222090765828e-06, "loss": 0.01367446631193161, "step": 100960 }, { "epoch": 0.9502588235294117, "grad_norm": 0.6696240020675069, "learning_rate": 2.0521700913694013e-06, "loss": 0.013936471939086915, "step": 100965 }, { "epoch": 0.9503058823529412, "grad_norm": 0.4642751424916429, "learning_rate": 2.0521192788552153e-06, "loss": 0.012102216482162476, "step": 100970 }, { "epoch": 0.9503529411764706, "grad_norm": 0.4310674437783173, "learning_rate": 2.0520684701152545e-06, "loss": 0.01173095554113388, "step": 100975 }, { "epoch": 0.9504, "grad_norm": 0.3327297618599474, "learning_rate": 2.0520176651490507e-06, "loss": 0.014923985302448272, "step": 100980 }, { "epoch": 0.9504470588235294, "grad_norm": 0.46603533611972425, "learning_rate": 2.0519668639561373e-06, "loss": 0.013958349823951721, "step": 100985 }, { "epoch": 0.9504941176470588, "grad_norm": 0.514123852081098, "learning_rate": 2.0519160665360476e-06, "loss": 0.02081824988126755, "step": 100990 }, { "epoch": 0.9505411764705882, "grad_norm": 0.7025769403978851, "learning_rate": 2.051865272888314e-06, "loss": 0.016053393483161926, "step": 100995 }, { "epoch": 0.9505882352941176, "grad_norm": 0.48492904247181823, "learning_rate": 2.05181448301247e-06, "loss": 0.016244234144687654, "step": 101000 }, { "epoch": 0.950635294117647, "grad_norm": 0.6370432906179772, "learning_rate": 2.051763696908049e-06, "loss": 0.01570916920900345, "step": 101005 }, { "epoch": 0.9506823529411764, "grad_norm": 0.5968532836543188, "learning_rate": 2.051712914574584e-06, "loss": 0.019081038236618043, "step": 101010 }, { "epoch": 0.9507294117647059, "grad_norm": 0.4618461790693188, "learning_rate": 2.0516621360116086e-06, "loss": 0.013634961843490601, "step": 101015 }, { "epoch": 0.9507764705882353, "grad_norm": 0.4984902546183603, "learning_rate": 2.0516113612186556e-06, "loss": 0.012908202409744263, "step": 101020 }, { "epoch": 0.9508235294117647, "grad_norm": 0.4153049879480566, "learning_rate": 2.051560590195259e-06, "loss": 0.010767585039138794, "step": 101025 }, { "epoch": 0.9508705882352941, "grad_norm": 0.6533928789860192, "learning_rate": 2.0515098229409524e-06, "loss": 0.01593618541955948, "step": 101030 }, { "epoch": 0.9509176470588235, "grad_norm": 0.37060545488395946, "learning_rate": 2.051459059455269e-06, "loss": 0.012464669346809388, "step": 101035 }, { "epoch": 0.9509647058823529, "grad_norm": 0.8500622118871728, "learning_rate": 2.0514082997377434e-06, "loss": 0.01540694832801819, "step": 101040 }, { "epoch": 0.9510117647058823, "grad_norm": 0.6078813707482972, "learning_rate": 2.0513575437879094e-06, "loss": 0.011216241121292114, "step": 101045 }, { "epoch": 0.9510588235294117, "grad_norm": 0.48198090933272936, "learning_rate": 2.0513067916053002e-06, "loss": 0.01325855404138565, "step": 101050 }, { "epoch": 0.9511058823529411, "grad_norm": 0.7770378547449077, "learning_rate": 2.05125604318945e-06, "loss": 0.016080622375011445, "step": 101055 }, { "epoch": 0.9511529411764705, "grad_norm": 0.2844692038272016, "learning_rate": 2.051205298539893e-06, "loss": 0.014262965321540833, "step": 101060 }, { "epoch": 0.9512, "grad_norm": 0.4069819370800213, "learning_rate": 2.0511545576561636e-06, "loss": 0.012720686197280884, "step": 101065 }, { "epoch": 0.9512470588235294, "grad_norm": 0.37703550875296243, "learning_rate": 2.0511038205377956e-06, "loss": 0.01007826030254364, "step": 101070 }, { "epoch": 0.9512941176470588, "grad_norm": 0.5582740050952061, "learning_rate": 2.0510530871843236e-06, "loss": 0.01591087430715561, "step": 101075 }, { "epoch": 0.9513411764705882, "grad_norm": 0.3959740150179155, "learning_rate": 2.0510023575952824e-06, "loss": 0.013755956292152404, "step": 101080 }, { "epoch": 0.9513882352941176, "grad_norm": 0.6998863547308556, "learning_rate": 2.0509516317702057e-06, "loss": 0.01471305787563324, "step": 101085 }, { "epoch": 0.951435294117647, "grad_norm": 0.579287304397361, "learning_rate": 2.050900909708628e-06, "loss": 0.011760488152503967, "step": 101090 }, { "epoch": 0.9514823529411764, "grad_norm": 0.5059264092850067, "learning_rate": 2.050850191410085e-06, "loss": 0.014433929324150085, "step": 101095 }, { "epoch": 0.9515294117647058, "grad_norm": 0.4273545791878014, "learning_rate": 2.0507994768741106e-06, "loss": 0.011741679161787033, "step": 101100 }, { "epoch": 0.9515764705882352, "grad_norm": 0.49739208178965066, "learning_rate": 2.05074876610024e-06, "loss": 0.009931907057762146, "step": 101105 }, { "epoch": 0.9516235294117648, "grad_norm": 0.5017281211886079, "learning_rate": 2.0506980590880075e-06, "loss": 0.012714897096157075, "step": 101110 }, { "epoch": 0.9516705882352942, "grad_norm": 0.5632114754434661, "learning_rate": 2.0506473558369484e-06, "loss": 0.014566367864608765, "step": 101115 }, { "epoch": 0.9517176470588236, "grad_norm": 0.42499377070125344, "learning_rate": 2.0505966563465983e-06, "loss": 0.010880516469478607, "step": 101120 }, { "epoch": 0.951764705882353, "grad_norm": 0.671321412400497, "learning_rate": 2.0505459606164917e-06, "loss": 0.017537155747413637, "step": 101125 }, { "epoch": 0.9518117647058824, "grad_norm": 0.3758133445498055, "learning_rate": 2.0504952686461636e-06, "loss": 0.015020547807216645, "step": 101130 }, { "epoch": 0.9518588235294118, "grad_norm": 0.3437173396049315, "learning_rate": 2.05044458043515e-06, "loss": 0.013749778270721436, "step": 101135 }, { "epoch": 0.9519058823529412, "grad_norm": 0.34530791891492546, "learning_rate": 2.0503938959829854e-06, "loss": 0.013175976276397706, "step": 101140 }, { "epoch": 0.9519529411764706, "grad_norm": 0.3684185332484258, "learning_rate": 2.0503432152892065e-06, "loss": 0.012730517983436584, "step": 101145 }, { "epoch": 0.952, "grad_norm": 0.5026191956632485, "learning_rate": 2.0502925383533478e-06, "loss": 0.025674739480018617, "step": 101150 }, { "epoch": 0.9520470588235294, "grad_norm": 0.45579000784865303, "learning_rate": 2.050241865174945e-06, "loss": 0.00887870490550995, "step": 101155 }, { "epoch": 0.9520941176470589, "grad_norm": 0.7203941475376316, "learning_rate": 2.0501911957535343e-06, "loss": 0.017974750697612764, "step": 101160 }, { "epoch": 0.9521411764705883, "grad_norm": 0.28732780632777166, "learning_rate": 2.050140530088651e-06, "loss": 0.013229380548000335, "step": 101165 }, { "epoch": 0.9521882352941177, "grad_norm": 0.532485714577713, "learning_rate": 2.050089868179831e-06, "loss": 0.013715097308158874, "step": 101170 }, { "epoch": 0.9522352941176471, "grad_norm": 0.31778902343515, "learning_rate": 2.050039210026611e-06, "loss": 0.012745556235313416, "step": 101175 }, { "epoch": 0.9522823529411765, "grad_norm": 0.3198265715488617, "learning_rate": 2.0499885556285255e-06, "loss": 0.011239276081323624, "step": 101180 }, { "epoch": 0.9523294117647059, "grad_norm": 0.3189362309372473, "learning_rate": 2.0499379049851124e-06, "loss": 0.014842279255390167, "step": 101185 }, { "epoch": 0.9523764705882353, "grad_norm": 0.4633887348730935, "learning_rate": 2.0498872580959064e-06, "loss": 0.01465504765510559, "step": 101190 }, { "epoch": 0.9524235294117647, "grad_norm": 0.19641188158967823, "learning_rate": 2.049836614960444e-06, "loss": 0.012697581946849824, "step": 101195 }, { "epoch": 0.9524705882352941, "grad_norm": 0.4308179650986976, "learning_rate": 2.049785975578263e-06, "loss": 0.013889199495315552, "step": 101200 }, { "epoch": 0.9525176470588236, "grad_norm": 0.735206567556044, "learning_rate": 2.0497353399488976e-06, "loss": 0.01711384654045105, "step": 101205 }, { "epoch": 0.952564705882353, "grad_norm": 0.3266697379297301, "learning_rate": 2.0496847080718856e-06, "loss": 0.01357661783695221, "step": 101210 }, { "epoch": 0.9526117647058824, "grad_norm": 0.4073151356527953, "learning_rate": 2.0496340799467633e-06, "loss": 0.014093868434429169, "step": 101215 }, { "epoch": 0.9526588235294118, "grad_norm": 0.49001209626965564, "learning_rate": 2.0495834555730674e-06, "loss": 0.013879890739917754, "step": 101220 }, { "epoch": 0.9527058823529412, "grad_norm": 0.5265834155229016, "learning_rate": 2.049532834950335e-06, "loss": 0.014964981377124787, "step": 101225 }, { "epoch": 0.9527529411764706, "grad_norm": 0.4624135893881751, "learning_rate": 2.049482218078102e-06, "loss": 0.015393945574760436, "step": 101230 }, { "epoch": 0.9528, "grad_norm": 0.4379940459613445, "learning_rate": 2.0494316049559064e-06, "loss": 0.010849544405937194, "step": 101235 }, { "epoch": 0.9528470588235294, "grad_norm": 0.4975047124615381, "learning_rate": 2.0493809955832843e-06, "loss": 0.0121200792491436, "step": 101240 }, { "epoch": 0.9528941176470588, "grad_norm": 0.5436307012278759, "learning_rate": 2.0493303899597726e-06, "loss": 0.013653014600276948, "step": 101245 }, { "epoch": 0.9529411764705882, "grad_norm": 0.6023308225394098, "learning_rate": 2.0492797880849095e-06, "loss": 0.012236793339252473, "step": 101250 }, { "epoch": 0.9529882352941177, "grad_norm": 0.5627561944416523, "learning_rate": 2.0492291899582315e-06, "loss": 0.013379755616188049, "step": 101255 }, { "epoch": 0.9530352941176471, "grad_norm": 0.3948168855636107, "learning_rate": 2.0491785955792763e-06, "loss": 0.014414235949516296, "step": 101260 }, { "epoch": 0.9530823529411765, "grad_norm": 0.41370609163332, "learning_rate": 2.0491280049475808e-06, "loss": 0.012197807431221008, "step": 101265 }, { "epoch": 0.9531294117647059, "grad_norm": 0.6647038833614138, "learning_rate": 2.0490774180626826e-06, "loss": 0.01391943097114563, "step": 101270 }, { "epoch": 0.9531764705882353, "grad_norm": 0.40667623137930337, "learning_rate": 2.0490268349241192e-06, "loss": 0.012592527270317077, "step": 101275 }, { "epoch": 0.9532235294117647, "grad_norm": 0.7853346934268195, "learning_rate": 2.0489762555314286e-06, "loss": 0.016659700870513917, "step": 101280 }, { "epoch": 0.9532705882352941, "grad_norm": 0.46964176606771313, "learning_rate": 2.048925679884148e-06, "loss": 0.01109250783920288, "step": 101285 }, { "epoch": 0.9533176470588235, "grad_norm": 0.46244304885400267, "learning_rate": 2.0488751079818154e-06, "loss": 0.015425677597522735, "step": 101290 }, { "epoch": 0.9533647058823529, "grad_norm": 0.42437671720169967, "learning_rate": 2.048824539823968e-06, "loss": 0.01298927366733551, "step": 101295 }, { "epoch": 0.9534117647058824, "grad_norm": 0.5028367529470457, "learning_rate": 2.0487739754101455e-06, "loss": 0.013792237639427185, "step": 101300 }, { "epoch": 0.9534588235294118, "grad_norm": 1.0624450795055027, "learning_rate": 2.0487234147398845e-06, "loss": 0.011535067856311799, "step": 101305 }, { "epoch": 0.9535058823529412, "grad_norm": 0.6349491774859072, "learning_rate": 2.0486728578127227e-06, "loss": 0.016138190031051637, "step": 101310 }, { "epoch": 0.9535529411764706, "grad_norm": 0.5057795601973805, "learning_rate": 2.0486223046281996e-06, "loss": 0.010588993132114411, "step": 101315 }, { "epoch": 0.9536, "grad_norm": 0.6139478846663164, "learning_rate": 2.0485717551858525e-06, "loss": 0.014715781807899475, "step": 101320 }, { "epoch": 0.9536470588235294, "grad_norm": 0.4499333568105384, "learning_rate": 2.0485212094852204e-06, "loss": 0.013435105979442596, "step": 101325 }, { "epoch": 0.9536941176470588, "grad_norm": 0.2963988738098058, "learning_rate": 2.048470667525841e-06, "loss": 0.00912080705165863, "step": 101330 }, { "epoch": 0.9537411764705882, "grad_norm": 0.3044099241313917, "learning_rate": 2.0484201293072534e-06, "loss": 0.012107169628143311, "step": 101335 }, { "epoch": 0.9537882352941176, "grad_norm": 0.44219500627842734, "learning_rate": 2.048369594828996e-06, "loss": 0.012507443130016328, "step": 101340 }, { "epoch": 0.953835294117647, "grad_norm": 0.6022692814208139, "learning_rate": 2.048319064090607e-06, "loss": 0.01182791292667389, "step": 101345 }, { "epoch": 0.9538823529411765, "grad_norm": 0.418918818424447, "learning_rate": 2.0482685370916257e-06, "loss": 0.008789855241775512, "step": 101350 }, { "epoch": 0.9539294117647059, "grad_norm": 0.34217155263705673, "learning_rate": 2.0482180138315904e-06, "loss": 0.017213639616966248, "step": 101355 }, { "epoch": 0.9539764705882353, "grad_norm": 0.3271261998578029, "learning_rate": 2.048167494310041e-06, "loss": 0.009355209767818451, "step": 101360 }, { "epoch": 0.9540235294117647, "grad_norm": 0.4475031904676412, "learning_rate": 2.0481169785265156e-06, "loss": 0.012128137052059174, "step": 101365 }, { "epoch": 0.9540705882352941, "grad_norm": 0.636005001032765, "learning_rate": 2.048066466480553e-06, "loss": 0.011118409782648086, "step": 101370 }, { "epoch": 0.9541176470588235, "grad_norm": 0.49549995329507185, "learning_rate": 2.0480159581716934e-06, "loss": 0.013879062235355377, "step": 101375 }, { "epoch": 0.9541647058823529, "grad_norm": 0.345029917936657, "learning_rate": 2.047965453599475e-06, "loss": 0.01121906042098999, "step": 101380 }, { "epoch": 0.9542117647058823, "grad_norm": 0.7574593618629697, "learning_rate": 2.0479149527634375e-06, "loss": 0.017503365874290466, "step": 101385 }, { "epoch": 0.9542588235294117, "grad_norm": 0.49346233773818526, "learning_rate": 2.0478644556631203e-06, "loss": 0.012079043686389923, "step": 101390 }, { "epoch": 0.9543058823529412, "grad_norm": 0.2958474388503374, "learning_rate": 2.047813962298063e-06, "loss": 0.009642837196588516, "step": 101395 }, { "epoch": 0.9543529411764706, "grad_norm": 0.5509728074061669, "learning_rate": 2.0477634726678043e-06, "loss": 0.015466597676277161, "step": 101400 }, { "epoch": 0.9544, "grad_norm": 0.5003389348429916, "learning_rate": 2.0477129867718852e-06, "loss": 0.013801667094230651, "step": 101405 }, { "epoch": 0.9544470588235294, "grad_norm": 0.31805153358941446, "learning_rate": 2.047662504609844e-06, "loss": 0.012857615947723389, "step": 101410 }, { "epoch": 0.9544941176470588, "grad_norm": 0.370452543422477, "learning_rate": 2.047612026181221e-06, "loss": 0.018594780564308168, "step": 101415 }, { "epoch": 0.9545411764705882, "grad_norm": 1.3588326801763482, "learning_rate": 2.0475615514855566e-06, "loss": 0.020102515816688538, "step": 101420 }, { "epoch": 0.9545882352941176, "grad_norm": 0.457296718490383, "learning_rate": 2.0475110805223905e-06, "loss": 0.021518467366695403, "step": 101425 }, { "epoch": 0.954635294117647, "grad_norm": 0.5132712221318887, "learning_rate": 2.047460613291262e-06, "loss": 0.02013426423072815, "step": 101430 }, { "epoch": 0.9546823529411764, "grad_norm": 0.5483600397638967, "learning_rate": 2.0474101497917115e-06, "loss": 0.014844666421413421, "step": 101435 }, { "epoch": 0.9547294117647059, "grad_norm": 0.3700864404058628, "learning_rate": 2.0473596900232796e-06, "loss": 0.010887262970209121, "step": 101440 }, { "epoch": 0.9547764705882353, "grad_norm": 0.4578727360915709, "learning_rate": 2.0473092339855065e-06, "loss": 0.014417755603790283, "step": 101445 }, { "epoch": 0.9548235294117647, "grad_norm": 0.494846164717862, "learning_rate": 2.047258781677932e-06, "loss": 0.013921836018562317, "step": 101450 }, { "epoch": 0.9548705882352941, "grad_norm": 0.3014483822326771, "learning_rate": 2.0472083331000965e-06, "loss": 0.009630090743303298, "step": 101455 }, { "epoch": 0.9549176470588235, "grad_norm": 0.6964849416341786, "learning_rate": 2.047157888251541e-06, "loss": 0.015668366849422456, "step": 101460 }, { "epoch": 0.9549647058823529, "grad_norm": 0.5657075821812237, "learning_rate": 2.0471074471318062e-06, "loss": 0.024349138140678406, "step": 101465 }, { "epoch": 0.9550117647058823, "grad_norm": 0.3265022029015965, "learning_rate": 2.047057009740432e-06, "loss": 0.012787693738937378, "step": 101470 }, { "epoch": 0.9550588235294117, "grad_norm": 0.5320162580854676, "learning_rate": 2.047006576076959e-06, "loss": 0.010330414772033692, "step": 101475 }, { "epoch": 0.9551058823529411, "grad_norm": 0.5562347354132443, "learning_rate": 2.0469561461409298e-06, "loss": 0.014861577749252319, "step": 101480 }, { "epoch": 0.9551529411764705, "grad_norm": 0.4957357332171253, "learning_rate": 2.0469057199318827e-06, "loss": 0.013350763916969299, "step": 101485 }, { "epoch": 0.9552, "grad_norm": 0.3080895527844878, "learning_rate": 2.0468552974493605e-06, "loss": 0.012046745419502259, "step": 101490 }, { "epoch": 0.9552470588235294, "grad_norm": 0.43383717078976053, "learning_rate": 2.0468048786929036e-06, "loss": 0.012472884356975555, "step": 101495 }, { "epoch": 0.9552941176470588, "grad_norm": 0.30136723287933515, "learning_rate": 2.046754463662053e-06, "loss": 0.011861351877450943, "step": 101500 }, { "epoch": 0.9553411764705882, "grad_norm": 0.44527998341736413, "learning_rate": 2.04670405235635e-06, "loss": 0.01470142900943756, "step": 101505 }, { "epoch": 0.9553882352941176, "grad_norm": 0.45610889940945304, "learning_rate": 2.0466536447753365e-06, "loss": 0.013516150414943695, "step": 101510 }, { "epoch": 0.955435294117647, "grad_norm": 0.3479228373471067, "learning_rate": 2.0466032409185525e-06, "loss": 0.02047540247440338, "step": 101515 }, { "epoch": 0.9554823529411764, "grad_norm": 0.5042048777400401, "learning_rate": 2.0465528407855405e-06, "loss": 0.020091222226619722, "step": 101520 }, { "epoch": 0.9555294117647058, "grad_norm": 0.3697728095248918, "learning_rate": 2.046502444375842e-06, "loss": 0.013018667697906494, "step": 101525 }, { "epoch": 0.9555764705882352, "grad_norm": 0.4601260563811707, "learning_rate": 2.046452051688998e-06, "loss": 0.014792323112487793, "step": 101530 }, { "epoch": 0.9556235294117648, "grad_norm": 0.41209431221381143, "learning_rate": 2.04640166272455e-06, "loss": 0.013212838768959045, "step": 101535 }, { "epoch": 0.9556705882352942, "grad_norm": 0.4907970177876704, "learning_rate": 2.046351277482041e-06, "loss": 0.01496610939502716, "step": 101540 }, { "epoch": 0.9557176470588236, "grad_norm": 0.4184485773976221, "learning_rate": 2.0463008959610124e-06, "loss": 0.013960704207420349, "step": 101545 }, { "epoch": 0.955764705882353, "grad_norm": 0.3212517250425795, "learning_rate": 2.046250518161005e-06, "loss": 0.014413274824619293, "step": 101550 }, { "epoch": 0.9558117647058824, "grad_norm": 0.34172943108549647, "learning_rate": 2.0462001440815617e-06, "loss": 0.01162164956331253, "step": 101555 }, { "epoch": 0.9558588235294118, "grad_norm": 0.625529741677842, "learning_rate": 2.0461497737222246e-06, "loss": 0.013670015335083007, "step": 101560 }, { "epoch": 0.9559058823529412, "grad_norm": 0.5846049904999151, "learning_rate": 2.046099407082535e-06, "loss": 0.014659546315670013, "step": 101565 }, { "epoch": 0.9559529411764706, "grad_norm": 0.5547997214331958, "learning_rate": 2.0460490441620366e-06, "loss": 0.010785404592752457, "step": 101570 }, { "epoch": 0.956, "grad_norm": 0.2633443989671808, "learning_rate": 2.045998684960271e-06, "loss": 0.01254112869501114, "step": 101575 }, { "epoch": 0.9560470588235294, "grad_norm": 0.39048862883128044, "learning_rate": 2.0459483294767797e-06, "loss": 0.01327391415834427, "step": 101580 }, { "epoch": 0.9560941176470589, "grad_norm": 0.6626656147488379, "learning_rate": 2.045897977711106e-06, "loss": 0.014123935997486115, "step": 101585 }, { "epoch": 0.9561411764705883, "grad_norm": 0.6495677971426288, "learning_rate": 2.0458476296627925e-06, "loss": 0.014505581557750702, "step": 101590 }, { "epoch": 0.9561882352941177, "grad_norm": 0.4972199555979529, "learning_rate": 2.0457972853313823e-06, "loss": 0.013101203739643097, "step": 101595 }, { "epoch": 0.9562352941176471, "grad_norm": 0.5434477172409875, "learning_rate": 2.045746944716417e-06, "loss": 0.01627955436706543, "step": 101600 }, { "epoch": 0.9562823529411765, "grad_norm": 0.5730755858321795, "learning_rate": 2.045696607817439e-06, "loss": 0.008968351781368256, "step": 101605 }, { "epoch": 0.9563294117647059, "grad_norm": 0.3487222884784877, "learning_rate": 2.045646274633993e-06, "loss": 0.013566115498542785, "step": 101610 }, { "epoch": 0.9563764705882353, "grad_norm": 0.5694121868338089, "learning_rate": 2.045595945165621e-06, "loss": 0.014174084365367889, "step": 101615 }, { "epoch": 0.9564235294117647, "grad_norm": 0.5096932113538102, "learning_rate": 2.0455456194118653e-06, "loss": 0.012862062454223633, "step": 101620 }, { "epoch": 0.9564705882352941, "grad_norm": 0.47026493514281964, "learning_rate": 2.04549529737227e-06, "loss": 0.015024721622467041, "step": 101625 }, { "epoch": 0.9565176470588236, "grad_norm": 0.4974832600158772, "learning_rate": 2.0454449790463777e-06, "loss": 0.014075630903244018, "step": 101630 }, { "epoch": 0.956564705882353, "grad_norm": 0.6186181703022059, "learning_rate": 2.0453946644337317e-06, "loss": 0.012869726121425628, "step": 101635 }, { "epoch": 0.9566117647058824, "grad_norm": 0.2562334036164449, "learning_rate": 2.0453443535338755e-06, "loss": 0.010569092631340028, "step": 101640 }, { "epoch": 0.9566588235294118, "grad_norm": 0.38893895270082557, "learning_rate": 2.045294046346352e-06, "loss": 0.013578011095523835, "step": 101645 }, { "epoch": 0.9567058823529412, "grad_norm": 0.6398065374703382, "learning_rate": 2.0452437428707054e-06, "loss": 0.013658711314201355, "step": 101650 }, { "epoch": 0.9567529411764706, "grad_norm": 0.38805103483725506, "learning_rate": 2.0451934431064796e-06, "loss": 0.012430254369974136, "step": 101655 }, { "epoch": 0.9568, "grad_norm": 0.48210170302702526, "learning_rate": 2.0451431470532174e-06, "loss": 0.014837232232093812, "step": 101660 }, { "epoch": 0.9568470588235294, "grad_norm": 0.46919430250116095, "learning_rate": 2.0450928547104623e-06, "loss": 0.013896554708480835, "step": 101665 }, { "epoch": 0.9568941176470588, "grad_norm": 0.4666274187354241, "learning_rate": 2.045042566077759e-06, "loss": 0.013331618905067445, "step": 101670 }, { "epoch": 0.9569411764705882, "grad_norm": 0.33625680972950645, "learning_rate": 2.04499228115465e-06, "loss": 0.012524935603141784, "step": 101675 }, { "epoch": 0.9569882352941177, "grad_norm": 0.505245080362591, "learning_rate": 2.044941999940681e-06, "loss": 0.012865221500396729, "step": 101680 }, { "epoch": 0.9570352941176471, "grad_norm": 0.4216016047237491, "learning_rate": 2.044891722435395e-06, "loss": 0.013123679161071777, "step": 101685 }, { "epoch": 0.9570823529411765, "grad_norm": 0.9167847846530311, "learning_rate": 2.044841448638336e-06, "loss": 0.014649099111557007, "step": 101690 }, { "epoch": 0.9571294117647059, "grad_norm": 0.5086083270327703, "learning_rate": 2.044791178549049e-06, "loss": 0.013011536002159119, "step": 101695 }, { "epoch": 0.9571764705882353, "grad_norm": 0.4474533719308924, "learning_rate": 2.0447409121670775e-06, "loss": 0.012488268315792084, "step": 101700 }, { "epoch": 0.9572235294117647, "grad_norm": 0.46308185109905925, "learning_rate": 2.0446906494919656e-06, "loss": 0.012020057439804077, "step": 101705 }, { "epoch": 0.9572705882352941, "grad_norm": 0.5872818473746407, "learning_rate": 2.0446403905232593e-06, "loss": 0.013933932781219483, "step": 101710 }, { "epoch": 0.9573176470588235, "grad_norm": 0.35015231487963844, "learning_rate": 2.0445901352605014e-06, "loss": 0.014719870686531068, "step": 101715 }, { "epoch": 0.9573647058823529, "grad_norm": 0.5333148250455763, "learning_rate": 2.044539883703237e-06, "loss": 0.015871556103229524, "step": 101720 }, { "epoch": 0.9574117647058824, "grad_norm": 0.5560100723062491, "learning_rate": 2.044489635851011e-06, "loss": 0.014941592514514924, "step": 101725 }, { "epoch": 0.9574588235294118, "grad_norm": 0.30472536207400197, "learning_rate": 2.044439391703368e-06, "loss": 0.011282341182231903, "step": 101730 }, { "epoch": 0.9575058823529412, "grad_norm": 0.49969211420558673, "learning_rate": 2.0443891512598528e-06, "loss": 0.01285427063703537, "step": 101735 }, { "epoch": 0.9575529411764706, "grad_norm": 0.4270655314465584, "learning_rate": 2.04433891452001e-06, "loss": 0.013607355952262878, "step": 101740 }, { "epoch": 0.9576, "grad_norm": 0.4409034021512984, "learning_rate": 2.0442886814833847e-06, "loss": 0.012153908610343933, "step": 101745 }, { "epoch": 0.9576470588235294, "grad_norm": 0.3045569800303228, "learning_rate": 2.0442384521495225e-06, "loss": 0.014819124341011047, "step": 101750 }, { "epoch": 0.9576941176470588, "grad_norm": 0.4107279125962024, "learning_rate": 2.0441882265179684e-06, "loss": 0.01352689266204834, "step": 101755 }, { "epoch": 0.9577411764705882, "grad_norm": 0.5764295893085642, "learning_rate": 2.0441380045882668e-06, "loss": 0.011108414828777313, "step": 101760 }, { "epoch": 0.9577882352941176, "grad_norm": 0.6068401008821742, "learning_rate": 2.044087786359964e-06, "loss": 0.013662292063236237, "step": 101765 }, { "epoch": 0.957835294117647, "grad_norm": 0.5488135512067441, "learning_rate": 2.044037571832605e-06, "loss": 0.01188792809844017, "step": 101770 }, { "epoch": 0.9578823529411765, "grad_norm": 0.493771652338952, "learning_rate": 2.0439873610057344e-06, "loss": 0.014376914501190186, "step": 101775 }, { "epoch": 0.9579294117647059, "grad_norm": 0.3680503693670848, "learning_rate": 2.043937153878899e-06, "loss": 0.01363183856010437, "step": 101780 }, { "epoch": 0.9579764705882353, "grad_norm": 0.44941965319306365, "learning_rate": 2.043886950451644e-06, "loss": 0.010752034932374954, "step": 101785 }, { "epoch": 0.9580235294117647, "grad_norm": 0.6079939111581715, "learning_rate": 2.0438367507235143e-06, "loss": 0.018812111020088194, "step": 101790 }, { "epoch": 0.9580705882352941, "grad_norm": 0.3946895702706412, "learning_rate": 2.043786554694056e-06, "loss": 0.01566626876592636, "step": 101795 }, { "epoch": 0.9581176470588235, "grad_norm": 0.5850889111671074, "learning_rate": 2.0437363623628164e-06, "loss": 0.012182588130235672, "step": 101800 }, { "epoch": 0.9581647058823529, "grad_norm": 0.5694788254355774, "learning_rate": 2.0436861737293396e-06, "loss": 0.014069083333015441, "step": 101805 }, { "epoch": 0.9582117647058823, "grad_norm": 0.6055321180403416, "learning_rate": 2.043635988793172e-06, "loss": 0.01692419946193695, "step": 101810 }, { "epoch": 0.9582588235294117, "grad_norm": 0.361359595740977, "learning_rate": 2.0435858075538605e-06, "loss": 0.012226856499910354, "step": 101815 }, { "epoch": 0.9583058823529412, "grad_norm": 0.23471134654395615, "learning_rate": 2.0435356300109503e-06, "loss": 0.017879235744476318, "step": 101820 }, { "epoch": 0.9583529411764706, "grad_norm": 0.35992492024534056, "learning_rate": 2.043485456163988e-06, "loss": 0.009038307517766953, "step": 101825 }, { "epoch": 0.9584, "grad_norm": 0.44862399528923763, "learning_rate": 2.0434352860125192e-06, "loss": 0.015163017809391022, "step": 101830 }, { "epoch": 0.9584470588235294, "grad_norm": 0.46182687944455353, "learning_rate": 2.0433851195560916e-06, "loss": 0.014381010830402375, "step": 101835 }, { "epoch": 0.9584941176470588, "grad_norm": 0.5632663196701939, "learning_rate": 2.0433349567942507e-06, "loss": 0.01113930568099022, "step": 101840 }, { "epoch": 0.9585411764705882, "grad_norm": 1.5378542226802734, "learning_rate": 2.043284797726543e-06, "loss": 0.016205650568008424, "step": 101845 }, { "epoch": 0.9585882352941176, "grad_norm": 0.5129319980274695, "learning_rate": 2.0432346423525155e-06, "loss": 0.01390804648399353, "step": 101850 }, { "epoch": 0.958635294117647, "grad_norm": 0.6823986819619441, "learning_rate": 2.0431844906717154e-06, "loss": 0.013037090003490449, "step": 101855 }, { "epoch": 0.9586823529411764, "grad_norm": 0.42227871926585137, "learning_rate": 2.0431343426836884e-06, "loss": 0.01222672164440155, "step": 101860 }, { "epoch": 0.9587294117647058, "grad_norm": 0.43972030949099233, "learning_rate": 2.043084198387982e-06, "loss": 0.01216207593679428, "step": 101865 }, { "epoch": 0.9587764705882353, "grad_norm": 0.46524128068639664, "learning_rate": 2.043034057784143e-06, "loss": 0.01651448607444763, "step": 101870 }, { "epoch": 0.9588235294117647, "grad_norm": 0.47924333478788916, "learning_rate": 2.042983920871718e-06, "loss": 0.013042965531349182, "step": 101875 }, { "epoch": 0.9588705882352941, "grad_norm": 0.46579903279796014, "learning_rate": 2.0429337876502547e-06, "loss": 0.015597026050090789, "step": 101880 }, { "epoch": 0.9589176470588235, "grad_norm": 0.5081629599643477, "learning_rate": 2.0428836581192997e-06, "loss": 0.017037755250930785, "step": 101885 }, { "epoch": 0.9589647058823529, "grad_norm": 0.5334840334390678, "learning_rate": 2.0428335322784005e-06, "loss": 0.00885516256093979, "step": 101890 }, { "epoch": 0.9590117647058823, "grad_norm": 0.32893569922300275, "learning_rate": 2.0427834101271044e-06, "loss": 0.0129214808344841, "step": 101895 }, { "epoch": 0.9590588235294117, "grad_norm": 0.2456315980985754, "learning_rate": 2.042733291664959e-06, "loss": 0.013973765075206757, "step": 101900 }, { "epoch": 0.9591058823529411, "grad_norm": 0.4671695855162679, "learning_rate": 2.0426831768915114e-06, "loss": 0.012269887328147887, "step": 101905 }, { "epoch": 0.9591529411764705, "grad_norm": 0.6257023072561491, "learning_rate": 2.0426330658063094e-06, "loss": 0.01730375587940216, "step": 101910 }, { "epoch": 0.9592, "grad_norm": 0.30055616253697176, "learning_rate": 2.042582958408901e-06, "loss": 0.010151693969964981, "step": 101915 }, { "epoch": 0.9592470588235295, "grad_norm": 0.4783677853302606, "learning_rate": 2.0425328546988326e-06, "loss": 0.012573562562465668, "step": 101920 }, { "epoch": 0.9592941176470589, "grad_norm": 0.5150278426330321, "learning_rate": 2.042482754675653e-06, "loss": 0.012895609438419341, "step": 101925 }, { "epoch": 0.9593411764705883, "grad_norm": 0.33073613667858787, "learning_rate": 2.04243265833891e-06, "loss": 0.014806704223155975, "step": 101930 }, { "epoch": 0.9593882352941177, "grad_norm": 0.40340532554459685, "learning_rate": 2.0423825656881515e-06, "loss": 0.010801038146018982, "step": 101935 }, { "epoch": 0.959435294117647, "grad_norm": 0.6431182672479501, "learning_rate": 2.0423324767229257e-06, "loss": 0.019527900218963622, "step": 101940 }, { "epoch": 0.9594823529411765, "grad_norm": 0.3198671716099848, "learning_rate": 2.04228239144278e-06, "loss": 0.013521340489387513, "step": 101945 }, { "epoch": 0.9595294117647059, "grad_norm": 0.3719222092838237, "learning_rate": 2.0422323098472624e-06, "loss": 0.010309600085020066, "step": 101950 }, { "epoch": 0.9595764705882353, "grad_norm": 0.4569561343766141, "learning_rate": 2.042182231935922e-06, "loss": 0.01253059208393097, "step": 101955 }, { "epoch": 0.9596235294117647, "grad_norm": 0.3415931855789421, "learning_rate": 2.042132157708307e-06, "loss": 0.010127393901348114, "step": 101960 }, { "epoch": 0.9596705882352942, "grad_norm": 0.9119373233375412, "learning_rate": 2.042082087163966e-06, "loss": 0.015587183833122253, "step": 101965 }, { "epoch": 0.9597176470588236, "grad_norm": 0.506675750601362, "learning_rate": 2.042032020302447e-06, "loss": 0.013780838251113892, "step": 101970 }, { "epoch": 0.959764705882353, "grad_norm": 0.4361572175921019, "learning_rate": 2.041981957123298e-06, "loss": 0.010226111859083176, "step": 101975 }, { "epoch": 0.9598117647058824, "grad_norm": 0.617363795249707, "learning_rate": 2.0419318976260686e-06, "loss": 0.016842103004455565, "step": 101980 }, { "epoch": 0.9598588235294118, "grad_norm": 0.494811704463519, "learning_rate": 2.0418818418103073e-06, "loss": 0.01864912807941437, "step": 101985 }, { "epoch": 0.9599058823529412, "grad_norm": 0.35125418645432843, "learning_rate": 2.041831789675563e-06, "loss": 0.013082119822502136, "step": 101990 }, { "epoch": 0.9599529411764706, "grad_norm": 0.4997068699567732, "learning_rate": 2.041781741221384e-06, "loss": 0.011144286394119263, "step": 101995 }, { "epoch": 0.96, "grad_norm": 0.45481306122792053, "learning_rate": 2.04173169644732e-06, "loss": 0.011309409886598587, "step": 102000 }, { "epoch": 0.9600470588235294, "grad_norm": 0.45553841231394215, "learning_rate": 2.041681655352919e-06, "loss": 0.01303141713142395, "step": 102005 }, { "epoch": 0.9600941176470589, "grad_norm": 0.464685138620944, "learning_rate": 2.041631617937731e-06, "loss": 0.01174471601843834, "step": 102010 }, { "epoch": 0.9601411764705883, "grad_norm": 0.23182613813429429, "learning_rate": 2.0415815842013048e-06, "loss": 0.014700555801391601, "step": 102015 }, { "epoch": 0.9601882352941177, "grad_norm": 0.40790709233416017, "learning_rate": 2.04153155414319e-06, "loss": 0.012044520676136016, "step": 102020 }, { "epoch": 0.9602352941176471, "grad_norm": 0.4952526300802197, "learning_rate": 2.041481527762935e-06, "loss": 0.011984258890151978, "step": 102025 }, { "epoch": 0.9602823529411765, "grad_norm": 0.4824140449098319, "learning_rate": 2.0414315050600905e-06, "loss": 0.016911713778972624, "step": 102030 }, { "epoch": 0.9603294117647059, "grad_norm": 0.28235266266582737, "learning_rate": 2.041381486034205e-06, "loss": 0.014485085010528564, "step": 102035 }, { "epoch": 0.9603764705882353, "grad_norm": 0.6211900786800608, "learning_rate": 2.0413314706848285e-06, "loss": 0.013060583174228669, "step": 102040 }, { "epoch": 0.9604235294117647, "grad_norm": 0.5212162990842314, "learning_rate": 2.0412814590115104e-06, "loss": 0.014461490511894225, "step": 102045 }, { "epoch": 0.9604705882352941, "grad_norm": 0.48599687850842, "learning_rate": 2.0412314510138e-06, "loss": 0.015035778284072876, "step": 102050 }, { "epoch": 0.9605176470588235, "grad_norm": 0.4885399940395521, "learning_rate": 2.041181446691248e-06, "loss": 0.013443252444267273, "step": 102055 }, { "epoch": 0.960564705882353, "grad_norm": 0.41077946248816305, "learning_rate": 2.0411314460434047e-06, "loss": 0.01583479642868042, "step": 102060 }, { "epoch": 0.9606117647058824, "grad_norm": 0.5508342175011213, "learning_rate": 2.041081449069818e-06, "loss": 0.013631895184516907, "step": 102065 }, { "epoch": 0.9606588235294118, "grad_norm": 0.7839439988185707, "learning_rate": 2.0410314557700405e-06, "loss": 0.021167805790901183, "step": 102070 }, { "epoch": 0.9607058823529412, "grad_norm": 0.47713520103833873, "learning_rate": 2.04098146614362e-06, "loss": 0.01363697201013565, "step": 102075 }, { "epoch": 0.9607529411764706, "grad_norm": 0.3471097763513068, "learning_rate": 2.0409314801901077e-06, "loss": 0.011448437720537186, "step": 102080 }, { "epoch": 0.9608, "grad_norm": 0.40531395952241267, "learning_rate": 2.040881497909054e-06, "loss": 0.011200923472642899, "step": 102085 }, { "epoch": 0.9608470588235294, "grad_norm": 0.4402679628133114, "learning_rate": 2.040831519300009e-06, "loss": 0.014505678415298462, "step": 102090 }, { "epoch": 0.9608941176470588, "grad_norm": 0.3057923672442544, "learning_rate": 2.040781544362523e-06, "loss": 0.011649475246667863, "step": 102095 }, { "epoch": 0.9609411764705882, "grad_norm": 0.33110186896603727, "learning_rate": 2.040731573096147e-06, "loss": 0.010909603536128997, "step": 102100 }, { "epoch": 0.9609882352941177, "grad_norm": 0.43092284224538824, "learning_rate": 2.0406816055004307e-06, "loss": 0.01558169275522232, "step": 102105 }, { "epoch": 0.9610352941176471, "grad_norm": 0.6870330637146603, "learning_rate": 2.040631641574925e-06, "loss": 0.014725816249847413, "step": 102110 }, { "epoch": 0.9610823529411765, "grad_norm": 0.4193398749076254, "learning_rate": 2.0405816813191814e-06, "loss": 0.014677737653255463, "step": 102115 }, { "epoch": 0.9611294117647059, "grad_norm": 0.5399148574302373, "learning_rate": 2.04053172473275e-06, "loss": 0.014963231980800629, "step": 102120 }, { "epoch": 0.9611764705882353, "grad_norm": 0.39247074569451695, "learning_rate": 2.040481771815182e-06, "loss": 0.01748829483985901, "step": 102125 }, { "epoch": 0.9612235294117647, "grad_norm": 0.4489985500230607, "learning_rate": 2.0404318225660277e-06, "loss": 0.01273987591266632, "step": 102130 }, { "epoch": 0.9612705882352941, "grad_norm": 0.4188533504079707, "learning_rate": 2.0403818769848387e-06, "loss": 0.0118067666888237, "step": 102135 }, { "epoch": 0.9613176470588235, "grad_norm": 0.42130833693763076, "learning_rate": 2.0403319350711657e-06, "loss": 0.010225576162338258, "step": 102140 }, { "epoch": 0.9613647058823529, "grad_norm": 1.2676404039407199, "learning_rate": 2.040281996824561e-06, "loss": 0.01333140730857849, "step": 102145 }, { "epoch": 0.9614117647058823, "grad_norm": 0.3476489464867303, "learning_rate": 2.040232062244574e-06, "loss": 0.014979997277259826, "step": 102150 }, { "epoch": 0.9614588235294118, "grad_norm": 0.40796245017095567, "learning_rate": 2.0401821313307575e-06, "loss": 0.009108495712280274, "step": 102155 }, { "epoch": 0.9615058823529412, "grad_norm": 1.478474167096255, "learning_rate": 2.0401322040826625e-06, "loss": 0.012554574012756347, "step": 102160 }, { "epoch": 0.9615529411764706, "grad_norm": 0.4451245020967228, "learning_rate": 2.0400822804998403e-06, "loss": 0.010058210045099259, "step": 102165 }, { "epoch": 0.9616, "grad_norm": 0.4385506830375095, "learning_rate": 2.0400323605818426e-06, "loss": 0.014002549648284911, "step": 102170 }, { "epoch": 0.9616470588235294, "grad_norm": 0.41696767246087885, "learning_rate": 2.039982444328221e-06, "loss": 0.013546329736709595, "step": 102175 }, { "epoch": 0.9616941176470588, "grad_norm": 0.5065476941108423, "learning_rate": 2.0399325317385273e-06, "loss": 0.01412457674741745, "step": 102180 }, { "epoch": 0.9617411764705882, "grad_norm": 0.4939713579983415, "learning_rate": 2.039882622812313e-06, "loss": 0.012572506070137024, "step": 102185 }, { "epoch": 0.9617882352941176, "grad_norm": 0.5847624926351481, "learning_rate": 2.03983271754913e-06, "loss": 0.011878884583711623, "step": 102190 }, { "epoch": 0.961835294117647, "grad_norm": 0.28802098331830317, "learning_rate": 2.039782815948531e-06, "loss": 0.010390824824571609, "step": 102195 }, { "epoch": 0.9618823529411765, "grad_norm": 0.47398311492010103, "learning_rate": 2.0397329180100673e-06, "loss": 0.015368583798408508, "step": 102200 }, { "epoch": 0.9619294117647059, "grad_norm": 0.4419003170489978, "learning_rate": 2.0396830237332913e-06, "loss": 0.013334806263446807, "step": 102205 }, { "epoch": 0.9619764705882353, "grad_norm": 0.6937424849776394, "learning_rate": 2.0396331331177547e-06, "loss": 0.014202424883842468, "step": 102210 }, { "epoch": 0.9620235294117647, "grad_norm": 0.45917099720950877, "learning_rate": 2.03958324616301e-06, "loss": 0.01578567028045654, "step": 102215 }, { "epoch": 0.9620705882352941, "grad_norm": 1.0259676503727377, "learning_rate": 2.03953336286861e-06, "loss": 0.013596823811531067, "step": 102220 }, { "epoch": 0.9621176470588235, "grad_norm": 0.5599782450605961, "learning_rate": 2.039483483234107e-06, "loss": 0.016489675641059874, "step": 102225 }, { "epoch": 0.9621647058823529, "grad_norm": 0.45106785879425393, "learning_rate": 2.0394336072590523e-06, "loss": 0.012690967321395874, "step": 102230 }, { "epoch": 0.9622117647058823, "grad_norm": 0.37313027997051484, "learning_rate": 2.0393837349429996e-06, "loss": 0.01577990800142288, "step": 102235 }, { "epoch": 0.9622588235294117, "grad_norm": 1.1397839947691981, "learning_rate": 2.039333866285502e-06, "loss": 0.013460865616798401, "step": 102240 }, { "epoch": 0.9623058823529411, "grad_norm": 0.5561156442621753, "learning_rate": 2.0392840012861108e-06, "loss": 0.014521728456020355, "step": 102245 }, { "epoch": 0.9623529411764706, "grad_norm": 0.4173041964079813, "learning_rate": 2.03923413994438e-06, "loss": 0.016232165694236755, "step": 102250 }, { "epoch": 0.9624, "grad_norm": 0.45744139635160247, "learning_rate": 2.0391842822598615e-06, "loss": 0.014816226065158844, "step": 102255 }, { "epoch": 0.9624470588235294, "grad_norm": 0.3193260690413304, "learning_rate": 2.0391344282321095e-06, "loss": 0.01108694076538086, "step": 102260 }, { "epoch": 0.9624941176470588, "grad_norm": 0.4191839123393142, "learning_rate": 2.039084577860675e-06, "loss": 0.013825255632400512, "step": 102265 }, { "epoch": 0.9625411764705882, "grad_norm": 0.2817848469558605, "learning_rate": 2.0390347311451134e-06, "loss": 0.01437746435403824, "step": 102270 }, { "epoch": 0.9625882352941176, "grad_norm": 0.7344513871798546, "learning_rate": 2.038984888084976e-06, "loss": 0.01320311576128006, "step": 102275 }, { "epoch": 0.962635294117647, "grad_norm": 0.42415834546888237, "learning_rate": 2.038935048679818e-06, "loss": 0.012925681471824647, "step": 102280 }, { "epoch": 0.9626823529411764, "grad_norm": 0.4174735859637035, "learning_rate": 2.0388852129291907e-06, "loss": 0.019139762222766876, "step": 102285 }, { "epoch": 0.9627294117647058, "grad_norm": 0.5221330833386925, "learning_rate": 2.0388353808326485e-06, "loss": 0.014260342717170716, "step": 102290 }, { "epoch": 0.9627764705882353, "grad_norm": 0.5072071768353466, "learning_rate": 2.0387855523897444e-06, "loss": 0.012764987349510194, "step": 102295 }, { "epoch": 0.9628235294117647, "grad_norm": 0.4125152744105674, "learning_rate": 2.0387357276000332e-06, "loss": 0.0114603191614151, "step": 102300 }, { "epoch": 0.9628705882352941, "grad_norm": 0.37161694060633343, "learning_rate": 2.038685906463067e-06, "loss": 0.011130467802286149, "step": 102305 }, { "epoch": 0.9629176470588235, "grad_norm": 0.4095009941112527, "learning_rate": 2.0386360889784e-06, "loss": 0.01492893397808075, "step": 102310 }, { "epoch": 0.962964705882353, "grad_norm": 0.4680575079167114, "learning_rate": 2.038586275145586e-06, "loss": 0.015155607461929321, "step": 102315 }, { "epoch": 0.9630117647058823, "grad_norm": 0.5215709829460216, "learning_rate": 2.03853646496418e-06, "loss": 0.016828036308288573, "step": 102320 }, { "epoch": 0.9630588235294117, "grad_norm": 0.6790838826726842, "learning_rate": 2.038486658433734e-06, "loss": 0.014267972111701966, "step": 102325 }, { "epoch": 0.9631058823529411, "grad_norm": 0.7395260172338032, "learning_rate": 2.0384368555538026e-06, "loss": 0.014958743751049042, "step": 102330 }, { "epoch": 0.9631529411764705, "grad_norm": 0.47264554652722407, "learning_rate": 2.0383870563239413e-06, "loss": 0.013083232939243317, "step": 102335 }, { "epoch": 0.9632, "grad_norm": 0.4725707794255405, "learning_rate": 2.0383372607437022e-06, "loss": 0.011402697116136552, "step": 102340 }, { "epoch": 0.9632470588235295, "grad_norm": 0.2230771645408757, "learning_rate": 2.038287468812641e-06, "loss": 0.012890812754631043, "step": 102345 }, { "epoch": 0.9632941176470589, "grad_norm": 0.3721673716642574, "learning_rate": 2.0382376805303118e-06, "loss": 0.012911254167556762, "step": 102350 }, { "epoch": 0.9633411764705883, "grad_norm": 0.3968288389492953, "learning_rate": 2.038187895896268e-06, "loss": 0.01371823251247406, "step": 102355 }, { "epoch": 0.9633882352941177, "grad_norm": 0.6980182228738062, "learning_rate": 2.0381381149100653e-06, "loss": 0.014386674761772156, "step": 102360 }, { "epoch": 0.9634352941176471, "grad_norm": 0.710519414600035, "learning_rate": 2.0380883375712576e-06, "loss": 0.016037961840629576, "step": 102365 }, { "epoch": 0.9634823529411765, "grad_norm": 0.6671782394706963, "learning_rate": 2.0380385638794002e-06, "loss": 0.01907716542482376, "step": 102370 }, { "epoch": 0.9635294117647059, "grad_norm": 0.4152056553363724, "learning_rate": 2.0379887938340468e-06, "loss": 0.012452230602502824, "step": 102375 }, { "epoch": 0.9635764705882353, "grad_norm": 0.428339929385071, "learning_rate": 2.0379390274347526e-06, "loss": 0.0142135888338089, "step": 102380 }, { "epoch": 0.9636235294117647, "grad_norm": 0.4036057250573207, "learning_rate": 2.037889264681072e-06, "loss": 0.01399724930524826, "step": 102385 }, { "epoch": 0.9636705882352942, "grad_norm": 0.35822489140490166, "learning_rate": 2.0378395055725606e-06, "loss": 0.013069948554039002, "step": 102390 }, { "epoch": 0.9637176470588236, "grad_norm": 0.5582281795824702, "learning_rate": 2.0377897501087735e-06, "loss": 0.013869436085224151, "step": 102395 }, { "epoch": 0.963764705882353, "grad_norm": 0.5256769774398375, "learning_rate": 2.0377399982892656e-06, "loss": 0.013551701605319978, "step": 102400 }, { "epoch": 0.9638117647058824, "grad_norm": 0.5009291079719027, "learning_rate": 2.037690250113592e-06, "loss": 0.013929447531700135, "step": 102405 }, { "epoch": 0.9638588235294118, "grad_norm": 0.6519451228559857, "learning_rate": 2.0376405055813078e-06, "loss": 0.01515621244907379, "step": 102410 }, { "epoch": 0.9639058823529412, "grad_norm": 0.5871217711959065, "learning_rate": 2.0375907646919676e-06, "loss": 0.0150704488158226, "step": 102415 }, { "epoch": 0.9639529411764706, "grad_norm": 0.3268863365018477, "learning_rate": 2.0375410274451283e-06, "loss": 0.013728678226470947, "step": 102420 }, { "epoch": 0.964, "grad_norm": 0.33933273575353334, "learning_rate": 2.0374912938403444e-06, "loss": 0.014160466194152833, "step": 102425 }, { "epoch": 0.9640470588235294, "grad_norm": 0.4706214114200114, "learning_rate": 2.0374415638771724e-06, "loss": 0.0191205695271492, "step": 102430 }, { "epoch": 0.9640941176470588, "grad_norm": 0.5997772256767214, "learning_rate": 2.037391837555166e-06, "loss": 0.015034723281860351, "step": 102435 }, { "epoch": 0.9641411764705883, "grad_norm": 0.5099803859112686, "learning_rate": 2.0373421148738827e-06, "loss": 0.015385009348392487, "step": 102440 }, { "epoch": 0.9641882352941177, "grad_norm": 0.45227942492974066, "learning_rate": 2.0372923958328773e-06, "loss": 0.013427145779132843, "step": 102445 }, { "epoch": 0.9642352941176471, "grad_norm": 0.6309061564130155, "learning_rate": 2.037242680431706e-06, "loss": 0.013454076647758485, "step": 102450 }, { "epoch": 0.9642823529411765, "grad_norm": 0.7262583941235029, "learning_rate": 2.0371929686699248e-06, "loss": 0.01375427544116974, "step": 102455 }, { "epoch": 0.9643294117647059, "grad_norm": 0.5743122995575974, "learning_rate": 2.0371432605470894e-06, "loss": 0.01525925099849701, "step": 102460 }, { "epoch": 0.9643764705882353, "grad_norm": 0.2787954392369521, "learning_rate": 2.0370935560627565e-06, "loss": 0.010869131982326507, "step": 102465 }, { "epoch": 0.9644235294117647, "grad_norm": 0.5683854007931076, "learning_rate": 2.0370438552164805e-06, "loss": 0.01377759724855423, "step": 102470 }, { "epoch": 0.9644705882352941, "grad_norm": 0.393036599131871, "learning_rate": 2.0369941580078197e-06, "loss": 0.019220852851867677, "step": 102475 }, { "epoch": 0.9645176470588235, "grad_norm": 0.3777373005816014, "learning_rate": 2.0369444644363303e-06, "loss": 0.014886701107025146, "step": 102480 }, { "epoch": 0.964564705882353, "grad_norm": 0.43476036902772924, "learning_rate": 2.036894774501567e-06, "loss": 0.012304817885160446, "step": 102485 }, { "epoch": 0.9646117647058824, "grad_norm": 0.4891117222258828, "learning_rate": 2.036845088203087e-06, "loss": 0.013387402892112732, "step": 102490 }, { "epoch": 0.9646588235294118, "grad_norm": 0.6685864454166841, "learning_rate": 2.0367954055404476e-06, "loss": 0.014302180707454681, "step": 102495 }, { "epoch": 0.9647058823529412, "grad_norm": 0.39244704865671853, "learning_rate": 2.036745726513204e-06, "loss": 0.012001225352287292, "step": 102500 }, { "epoch": 0.9647529411764706, "grad_norm": 0.5525819858449778, "learning_rate": 2.0366960511209145e-06, "loss": 0.01679651141166687, "step": 102505 }, { "epoch": 0.9648, "grad_norm": 0.4401531881854875, "learning_rate": 2.0366463793631344e-06, "loss": 0.011800996959209442, "step": 102510 }, { "epoch": 0.9648470588235294, "grad_norm": 0.49378267499827566, "learning_rate": 2.0365967112394216e-06, "loss": 0.011145205050706864, "step": 102515 }, { "epoch": 0.9648941176470588, "grad_norm": 0.6448769197186597, "learning_rate": 2.036547046749332e-06, "loss": 0.017439839243888856, "step": 102520 }, { "epoch": 0.9649411764705882, "grad_norm": 0.44482448291057747, "learning_rate": 2.0364973858924234e-06, "loss": 0.014277334511280059, "step": 102525 }, { "epoch": 0.9649882352941177, "grad_norm": 0.5180853159771326, "learning_rate": 2.0364477286682527e-06, "loss": 0.015002824366092682, "step": 102530 }, { "epoch": 0.9650352941176471, "grad_norm": 0.384071652661779, "learning_rate": 2.036398075076377e-06, "loss": 0.013463294506072998, "step": 102535 }, { "epoch": 0.9650823529411765, "grad_norm": 0.9514578876236738, "learning_rate": 2.0363484251163528e-06, "loss": 0.016671308875083925, "step": 102540 }, { "epoch": 0.9651294117647059, "grad_norm": 0.38448059392397915, "learning_rate": 2.036298778787738e-06, "loss": 0.01148681566119194, "step": 102545 }, { "epoch": 0.9651764705882353, "grad_norm": 0.3846345319485631, "learning_rate": 2.03624913609009e-06, "loss": 0.015076848864555358, "step": 102550 }, { "epoch": 0.9652235294117647, "grad_norm": 0.38092453055451375, "learning_rate": 2.0361994970229663e-06, "loss": 0.011670148372650147, "step": 102555 }, { "epoch": 0.9652705882352941, "grad_norm": 0.47628232810781623, "learning_rate": 2.0361498615859242e-06, "loss": 0.012320291996002198, "step": 102560 }, { "epoch": 0.9653176470588235, "grad_norm": 0.7356694035900003, "learning_rate": 2.0361002297785213e-06, "loss": 0.014719611406326294, "step": 102565 }, { "epoch": 0.9653647058823529, "grad_norm": 0.5405839011840403, "learning_rate": 2.036050601600315e-06, "loss": 0.01477014720439911, "step": 102570 }, { "epoch": 0.9654117647058823, "grad_norm": 0.6490633528761556, "learning_rate": 2.0360009770508636e-06, "loss": 0.012166884541511536, "step": 102575 }, { "epoch": 0.9654588235294118, "grad_norm": 0.39650962849637666, "learning_rate": 2.035951356129724e-06, "loss": 0.01367933750152588, "step": 102580 }, { "epoch": 0.9655058823529412, "grad_norm": 0.3595782849186718, "learning_rate": 2.0359017388364552e-06, "loss": 0.011380957067012787, "step": 102585 }, { "epoch": 0.9655529411764706, "grad_norm": 0.40916864043267137, "learning_rate": 2.0358521251706146e-06, "loss": 0.011854714900255203, "step": 102590 }, { "epoch": 0.9656, "grad_norm": 0.37158966583782355, "learning_rate": 2.03580251513176e-06, "loss": 0.013131439685821533, "step": 102595 }, { "epoch": 0.9656470588235294, "grad_norm": 0.9963521403060134, "learning_rate": 2.0357529087194506e-06, "loss": 0.013178287446498871, "step": 102600 }, { "epoch": 0.9656941176470588, "grad_norm": 0.5338112000184119, "learning_rate": 2.0357033059332426e-06, "loss": 0.012900830805301666, "step": 102605 }, { "epoch": 0.9657411764705882, "grad_norm": 0.38384202449361354, "learning_rate": 2.035653706772696e-06, "loss": 0.010663321614265442, "step": 102610 }, { "epoch": 0.9657882352941176, "grad_norm": 0.5028633930822712, "learning_rate": 2.0356041112373685e-06, "loss": 0.014162424206733703, "step": 102615 }, { "epoch": 0.965835294117647, "grad_norm": 0.4868336716769055, "learning_rate": 2.0355545193268183e-06, "loss": 0.012160560488700867, "step": 102620 }, { "epoch": 0.9658823529411765, "grad_norm": 0.5100008059677932, "learning_rate": 2.0355049310406042e-06, "loss": 0.014213240146636963, "step": 102625 }, { "epoch": 0.9659294117647059, "grad_norm": 0.5979272406462176, "learning_rate": 2.035455346378285e-06, "loss": 0.01101209968328476, "step": 102630 }, { "epoch": 0.9659764705882353, "grad_norm": 0.4027248888304438, "learning_rate": 2.0354057653394185e-06, "loss": 0.014800222218036651, "step": 102635 }, { "epoch": 0.9660235294117647, "grad_norm": 0.4815848523327198, "learning_rate": 2.035356187923564e-06, "loss": 0.011813030391931535, "step": 102640 }, { "epoch": 0.9660705882352941, "grad_norm": 0.5121789747844148, "learning_rate": 2.0353066141302803e-06, "loss": 0.012145213782787323, "step": 102645 }, { "epoch": 0.9661176470588235, "grad_norm": 0.6541731825183666, "learning_rate": 2.0352570439591263e-06, "loss": 0.014950057864189148, "step": 102650 }, { "epoch": 0.9661647058823529, "grad_norm": 0.4324152649736979, "learning_rate": 2.035207477409661e-06, "loss": 0.022751030325889588, "step": 102655 }, { "epoch": 0.9662117647058823, "grad_norm": 0.34815120069320893, "learning_rate": 2.035157914481443e-06, "loss": 0.018140310049057008, "step": 102660 }, { "epoch": 0.9662588235294117, "grad_norm": 0.5779865550732256, "learning_rate": 2.0351083551740315e-06, "loss": 0.014122575521469116, "step": 102665 }, { "epoch": 0.9663058823529411, "grad_norm": 0.3288663447469741, "learning_rate": 2.0350587994869854e-06, "loss": 0.011369136720895767, "step": 102670 }, { "epoch": 0.9663529411764706, "grad_norm": 0.48986748623679377, "learning_rate": 2.035009247419865e-06, "loss": 0.010806478559970856, "step": 102675 }, { "epoch": 0.9664, "grad_norm": 0.39950754177619774, "learning_rate": 2.0349596989722284e-06, "loss": 0.014224544167518616, "step": 102680 }, { "epoch": 0.9664470588235294, "grad_norm": 0.7030801551014835, "learning_rate": 2.0349101541436356e-06, "loss": 0.015399360656738281, "step": 102685 }, { "epoch": 0.9664941176470588, "grad_norm": 0.4610021809159189, "learning_rate": 2.0348606129336463e-06, "loss": 0.012721575796604156, "step": 102690 }, { "epoch": 0.9665411764705882, "grad_norm": 1.1607118545832307, "learning_rate": 2.034811075341819e-06, "loss": 0.01313878744840622, "step": 102695 }, { "epoch": 0.9665882352941176, "grad_norm": 0.40393119342153466, "learning_rate": 2.0347615413677145e-06, "loss": 0.01329548954963684, "step": 102700 }, { "epoch": 0.966635294117647, "grad_norm": 0.5618471282059063, "learning_rate": 2.034712011010892e-06, "loss": 0.014516738057136536, "step": 102705 }, { "epoch": 0.9666823529411764, "grad_norm": 0.6071842143246075, "learning_rate": 2.034662484270911e-06, "loss": 0.013259948790073394, "step": 102710 }, { "epoch": 0.9667294117647058, "grad_norm": 0.41792433721967065, "learning_rate": 2.034612961147331e-06, "loss": 0.015700799226760865, "step": 102715 }, { "epoch": 0.9667764705882353, "grad_norm": 0.36128938781957776, "learning_rate": 2.0345634416397134e-06, "loss": 0.012283213436603546, "step": 102720 }, { "epoch": 0.9668235294117647, "grad_norm": 0.2951638022047888, "learning_rate": 2.034513925747617e-06, "loss": 0.01323789805173874, "step": 102725 }, { "epoch": 0.9668705882352941, "grad_norm": 0.3552391180839266, "learning_rate": 2.034464413470602e-06, "loss": 0.014500978589057922, "step": 102730 }, { "epoch": 0.9669176470588235, "grad_norm": 0.4118597170871406, "learning_rate": 2.0344149048082285e-06, "loss": 0.016201388835906983, "step": 102735 }, { "epoch": 0.966964705882353, "grad_norm": 0.6726667871693304, "learning_rate": 2.034365399760057e-06, "loss": 0.016017170250415803, "step": 102740 }, { "epoch": 0.9670117647058823, "grad_norm": 0.34863475246545245, "learning_rate": 2.0343158983256478e-06, "loss": 0.01595780998468399, "step": 102745 }, { "epoch": 0.9670588235294117, "grad_norm": 0.4945611778969103, "learning_rate": 2.0342664005045613e-06, "loss": 0.013183987140655518, "step": 102750 }, { "epoch": 0.9671058823529411, "grad_norm": 0.46881631148268965, "learning_rate": 2.034216906296358e-06, "loss": 0.015614604949951172, "step": 102755 }, { "epoch": 0.9671529411764705, "grad_norm": 0.47442336235283616, "learning_rate": 2.034167415700597e-06, "loss": 0.013975176215171813, "step": 102760 }, { "epoch": 0.9672, "grad_norm": 0.4060839548688806, "learning_rate": 2.034117928716841e-06, "loss": 0.015045389533042908, "step": 102765 }, { "epoch": 0.9672470588235295, "grad_norm": 0.36222436786825213, "learning_rate": 2.034068445344649e-06, "loss": 0.014457981288433074, "step": 102770 }, { "epoch": 0.9672941176470589, "grad_norm": 0.3312400953842951, "learning_rate": 2.034018965583583e-06, "loss": 0.011429867148399353, "step": 102775 }, { "epoch": 0.9673411764705883, "grad_norm": 0.622274094771423, "learning_rate": 2.0339694894332032e-06, "loss": 0.013366425037384033, "step": 102780 }, { "epoch": 0.9673882352941177, "grad_norm": 0.6249119742523611, "learning_rate": 2.0339200168930703e-06, "loss": 0.014292795956134797, "step": 102785 }, { "epoch": 0.9674352941176471, "grad_norm": 0.36349038216510376, "learning_rate": 2.0338705479627454e-06, "loss": 0.015210950374603271, "step": 102790 }, { "epoch": 0.9674823529411765, "grad_norm": 0.31458427773049225, "learning_rate": 2.0338210826417896e-06, "loss": 0.013972482085227967, "step": 102795 }, { "epoch": 0.9675294117647059, "grad_norm": 0.5322431275302781, "learning_rate": 2.0337716209297646e-06, "loss": 0.016614820063114166, "step": 102800 }, { "epoch": 0.9675764705882353, "grad_norm": 0.26380104232674695, "learning_rate": 2.0337221628262305e-06, "loss": 0.011689133942127228, "step": 102805 }, { "epoch": 0.9676235294117647, "grad_norm": 0.3720567447777234, "learning_rate": 2.0336727083307492e-06, "loss": 0.018008926510810853, "step": 102810 }, { "epoch": 0.9676705882352942, "grad_norm": 0.4269278466880534, "learning_rate": 2.033623257442882e-06, "loss": 0.018250468373298644, "step": 102815 }, { "epoch": 0.9677176470588236, "grad_norm": 0.4706716626345534, "learning_rate": 2.0335738101621903e-06, "loss": 0.013604824244976044, "step": 102820 }, { "epoch": 0.967764705882353, "grad_norm": 0.46487978913824135, "learning_rate": 2.033524366488235e-06, "loss": 0.012344843149185181, "step": 102825 }, { "epoch": 0.9678117647058824, "grad_norm": 0.4760474693844999, "learning_rate": 2.0334749264205785e-06, "loss": 0.014858844876289367, "step": 102830 }, { "epoch": 0.9678588235294118, "grad_norm": 0.46109353581800855, "learning_rate": 2.0334254899587823e-06, "loss": 0.010812471061944962, "step": 102835 }, { "epoch": 0.9679058823529412, "grad_norm": 0.6084965031872803, "learning_rate": 2.0333760571024073e-06, "loss": 0.01181691735982895, "step": 102840 }, { "epoch": 0.9679529411764706, "grad_norm": 0.34627463848252776, "learning_rate": 2.033326627851016e-06, "loss": 0.013459068536758424, "step": 102845 }, { "epoch": 0.968, "grad_norm": 0.3940121479270774, "learning_rate": 2.0332772022041707e-06, "loss": 0.011837249994277954, "step": 102850 }, { "epoch": 0.9680470588235294, "grad_norm": 0.5638692754119694, "learning_rate": 2.033227780161432e-06, "loss": 0.01638970375061035, "step": 102855 }, { "epoch": 0.9680941176470588, "grad_norm": 0.32002594289595493, "learning_rate": 2.0331783617223633e-06, "loss": 0.010574093461036682, "step": 102860 }, { "epoch": 0.9681411764705883, "grad_norm": 0.4915378162978181, "learning_rate": 2.0331289468865253e-06, "loss": 0.015388846397399902, "step": 102865 }, { "epoch": 0.9681882352941177, "grad_norm": 0.5160463949247545, "learning_rate": 2.0330795356534813e-06, "loss": 0.012082985043525696, "step": 102870 }, { "epoch": 0.9682352941176471, "grad_norm": 0.7164508438366131, "learning_rate": 2.033030128022793e-06, "loss": 0.010470837354660034, "step": 102875 }, { "epoch": 0.9682823529411765, "grad_norm": 0.3787062107652346, "learning_rate": 2.0329807239940227e-06, "loss": 0.011696840077638626, "step": 102880 }, { "epoch": 0.9683294117647059, "grad_norm": 0.4895508017063421, "learning_rate": 2.0329313235667335e-06, "loss": 0.014726148545742035, "step": 102885 }, { "epoch": 0.9683764705882353, "grad_norm": 0.39030227929580447, "learning_rate": 2.0328819267404867e-06, "loss": 0.01167823225259781, "step": 102890 }, { "epoch": 0.9684235294117647, "grad_norm": 0.4233002595392079, "learning_rate": 2.0328325335148453e-06, "loss": 0.0112343929708004, "step": 102895 }, { "epoch": 0.9684705882352941, "grad_norm": 0.48834702245972966, "learning_rate": 2.032783143889372e-06, "loss": 0.0150656595826149, "step": 102900 }, { "epoch": 0.9685176470588235, "grad_norm": 0.4110297657279473, "learning_rate": 2.0327337578636293e-06, "loss": 0.013010157644748688, "step": 102905 }, { "epoch": 0.968564705882353, "grad_norm": 0.43485479445112346, "learning_rate": 2.0326843754371806e-06, "loss": 0.014279451966285706, "step": 102910 }, { "epoch": 0.9686117647058824, "grad_norm": 0.6511503207246007, "learning_rate": 2.0326349966095877e-06, "loss": 0.020082105696201325, "step": 102915 }, { "epoch": 0.9686588235294118, "grad_norm": 0.41178947280434697, "learning_rate": 2.0325856213804144e-06, "loss": 0.013123875856399536, "step": 102920 }, { "epoch": 0.9687058823529412, "grad_norm": 0.34938917090295346, "learning_rate": 2.0325362497492228e-06, "loss": 0.017094016075134277, "step": 102925 }, { "epoch": 0.9687529411764706, "grad_norm": 0.42977122696513503, "learning_rate": 2.032486881715577e-06, "loss": 0.015292462706565858, "step": 102930 }, { "epoch": 0.9688, "grad_norm": 0.3791412699811081, "learning_rate": 2.0324375172790388e-06, "loss": 0.012811680138111115, "step": 102935 }, { "epoch": 0.9688470588235294, "grad_norm": 0.6212766320404995, "learning_rate": 2.0323881564391726e-06, "loss": 0.018941468000411986, "step": 102940 }, { "epoch": 0.9688941176470588, "grad_norm": 0.30865716516335673, "learning_rate": 2.032338799195541e-06, "loss": 0.013345709443092347, "step": 102945 }, { "epoch": 0.9689411764705882, "grad_norm": 0.443423138782231, "learning_rate": 2.032289445547708e-06, "loss": 0.013722749054431915, "step": 102950 }, { "epoch": 0.9689882352941176, "grad_norm": 0.4437186825309787, "learning_rate": 2.032240095495236e-06, "loss": 0.01411150097846985, "step": 102955 }, { "epoch": 0.9690352941176471, "grad_norm": 0.5813344742669332, "learning_rate": 2.032190749037689e-06, "loss": 0.021292755007743837, "step": 102960 }, { "epoch": 0.9690823529411765, "grad_norm": 0.33047766747500273, "learning_rate": 2.032141406174631e-06, "loss": 0.012690219283103942, "step": 102965 }, { "epoch": 0.9691294117647059, "grad_norm": 0.42691239170081874, "learning_rate": 2.032092066905625e-06, "loss": 0.011230076104402542, "step": 102970 }, { "epoch": 0.9691764705882353, "grad_norm": 0.3718291318636552, "learning_rate": 2.0320427312302353e-06, "loss": 0.014516255259513855, "step": 102975 }, { "epoch": 0.9692235294117647, "grad_norm": 0.4823345147046764, "learning_rate": 2.0319933991480246e-06, "loss": 0.012398500740528107, "step": 102980 }, { "epoch": 0.9692705882352941, "grad_norm": 0.635268586645689, "learning_rate": 2.0319440706585583e-06, "loss": 0.012339873611927033, "step": 102985 }, { "epoch": 0.9693176470588235, "grad_norm": 0.4596726568351296, "learning_rate": 2.0318947457613995e-06, "loss": 0.013355594873428345, "step": 102990 }, { "epoch": 0.9693647058823529, "grad_norm": 0.4069250571145022, "learning_rate": 2.0318454244561118e-06, "loss": 0.011267478764057159, "step": 102995 }, { "epoch": 0.9694117647058823, "grad_norm": 0.3103048124543492, "learning_rate": 2.03179610674226e-06, "loss": 0.012374057620763778, "step": 103000 }, { "epoch": 0.9694588235294118, "grad_norm": 0.6110274746447235, "learning_rate": 2.0317467926194075e-06, "loss": 0.015709690749645233, "step": 103005 }, { "epoch": 0.9695058823529412, "grad_norm": 0.6157382763255371, "learning_rate": 2.03169748208712e-06, "loss": 0.01435535103082657, "step": 103010 }, { "epoch": 0.9695529411764706, "grad_norm": 0.3592849008007134, "learning_rate": 2.03164817514496e-06, "loss": 0.011831944435834884, "step": 103015 }, { "epoch": 0.9696, "grad_norm": 0.36879889423710127, "learning_rate": 2.031598871792493e-06, "loss": 0.01151544377207756, "step": 103020 }, { "epoch": 0.9696470588235294, "grad_norm": 0.5368956763135367, "learning_rate": 2.031549572029283e-06, "loss": 0.013018441200256348, "step": 103025 }, { "epoch": 0.9696941176470588, "grad_norm": 0.5004711707740801, "learning_rate": 2.031500275854895e-06, "loss": 0.014417102932929993, "step": 103030 }, { "epoch": 0.9697411764705882, "grad_norm": 0.6766792931337016, "learning_rate": 2.031450983268893e-06, "loss": 0.012381218373775482, "step": 103035 }, { "epoch": 0.9697882352941176, "grad_norm": 0.3934877424924517, "learning_rate": 2.031401694270842e-06, "loss": 0.011279035359621048, "step": 103040 }, { "epoch": 0.969835294117647, "grad_norm": 0.4439441819199725, "learning_rate": 2.0313524088603067e-06, "loss": 0.010585915297269821, "step": 103045 }, { "epoch": 0.9698823529411764, "grad_norm": 0.5963233477920703, "learning_rate": 2.0313031270368522e-06, "loss": 0.01181960254907608, "step": 103050 }, { "epoch": 0.9699294117647059, "grad_norm": 0.6874985109668388, "learning_rate": 2.0312538488000428e-06, "loss": 0.013929462432861328, "step": 103055 }, { "epoch": 0.9699764705882353, "grad_norm": 0.4260759672349504, "learning_rate": 2.031204574149444e-06, "loss": 0.013093367218971252, "step": 103060 }, { "epoch": 0.9700235294117647, "grad_norm": 0.4225846884363444, "learning_rate": 2.031155303084621e-06, "loss": 0.010350612550973892, "step": 103065 }, { "epoch": 0.9700705882352941, "grad_norm": 0.4751501432878191, "learning_rate": 2.031106035605138e-06, "loss": 0.012003527581691742, "step": 103070 }, { "epoch": 0.9701176470588235, "grad_norm": 0.4880228540760979, "learning_rate": 2.031056771710561e-06, "loss": 0.023630674183368682, "step": 103075 }, { "epoch": 0.9701647058823529, "grad_norm": 0.669347657369049, "learning_rate": 2.031007511400455e-06, "loss": 0.015627709031105042, "step": 103080 }, { "epoch": 0.9702117647058823, "grad_norm": 0.37228173499283373, "learning_rate": 2.0309582546743857e-06, "loss": 0.012068292498588562, "step": 103085 }, { "epoch": 0.9702588235294117, "grad_norm": 0.6655309955853269, "learning_rate": 2.030909001531918e-06, "loss": 0.017848311364650725, "step": 103090 }, { "epoch": 0.9703058823529411, "grad_norm": 0.380973414882326, "learning_rate": 2.030859751972617e-06, "loss": 0.016470128297805788, "step": 103095 }, { "epoch": 0.9703529411764706, "grad_norm": 0.2933459886746327, "learning_rate": 2.0308105059960497e-06, "loss": 0.015998657047748565, "step": 103100 }, { "epoch": 0.9704, "grad_norm": 0.39398980590986266, "learning_rate": 2.0307612636017808e-06, "loss": 0.011496402323246002, "step": 103105 }, { "epoch": 0.9704470588235294, "grad_norm": 0.359399732559024, "learning_rate": 2.0307120247893757e-06, "loss": 0.011693640798330306, "step": 103110 }, { "epoch": 0.9704941176470588, "grad_norm": 0.5107721986057236, "learning_rate": 2.0306627895584004e-06, "loss": 0.012851271033287048, "step": 103115 }, { "epoch": 0.9705411764705882, "grad_norm": 0.6183145215501094, "learning_rate": 2.0306135579084213e-06, "loss": 0.016697919368743895, "step": 103120 }, { "epoch": 0.9705882352941176, "grad_norm": 0.2552384373920847, "learning_rate": 2.030564329839004e-06, "loss": 0.011739178001880646, "step": 103125 }, { "epoch": 0.970635294117647, "grad_norm": 0.6328293056248299, "learning_rate": 2.030515105349714e-06, "loss": 0.015521451830863953, "step": 103130 }, { "epoch": 0.9706823529411764, "grad_norm": 0.5757951790298027, "learning_rate": 2.030465884440118e-06, "loss": 0.016298775374889374, "step": 103135 }, { "epoch": 0.9707294117647058, "grad_norm": 0.5859723947603246, "learning_rate": 2.0304166671097827e-06, "loss": 0.013597843050956727, "step": 103140 }, { "epoch": 0.9707764705882352, "grad_norm": 0.5944294127516916, "learning_rate": 2.030367453358273e-06, "loss": 0.016906552016735077, "step": 103145 }, { "epoch": 0.9708235294117648, "grad_norm": 0.46013069879353763, "learning_rate": 2.0303182431851555e-06, "loss": 0.012883496284484864, "step": 103150 }, { "epoch": 0.9708705882352942, "grad_norm": 0.406810172791146, "learning_rate": 2.0302690365899976e-06, "loss": 0.012459459900856017, "step": 103155 }, { "epoch": 0.9709176470588236, "grad_norm": 0.4456158363526663, "learning_rate": 2.0302198335723646e-06, "loss": 0.01820135712623596, "step": 103160 }, { "epoch": 0.970964705882353, "grad_norm": 0.3975725977322646, "learning_rate": 2.0301706341318238e-06, "loss": 0.010819333791732787, "step": 103165 }, { "epoch": 0.9710117647058824, "grad_norm": 0.49285343763457723, "learning_rate": 2.030121438267941e-06, "loss": 0.02565755546092987, "step": 103170 }, { "epoch": 0.9710588235294118, "grad_norm": 0.4777536866898848, "learning_rate": 2.0300722459802834e-06, "loss": 0.0145140141248703, "step": 103175 }, { "epoch": 0.9711058823529412, "grad_norm": 0.3598981052091991, "learning_rate": 2.030023057268418e-06, "loss": 0.012378492206335068, "step": 103180 }, { "epoch": 0.9711529411764706, "grad_norm": 0.43229950124113686, "learning_rate": 2.029973872131911e-06, "loss": 0.012747874855995179, "step": 103185 }, { "epoch": 0.9712, "grad_norm": 0.6561821732911745, "learning_rate": 2.029924690570329e-06, "loss": 0.016303357481956483, "step": 103190 }, { "epoch": 0.9712470588235295, "grad_norm": 0.3588023569053847, "learning_rate": 2.0298755125832406e-06, "loss": 0.012499960511922837, "step": 103195 }, { "epoch": 0.9712941176470589, "grad_norm": 0.2467244783093048, "learning_rate": 2.0298263381702113e-06, "loss": 0.01310502290725708, "step": 103200 }, { "epoch": 0.9713411764705883, "grad_norm": 0.3459663393060444, "learning_rate": 2.029777167330808e-06, "loss": 0.013828559219837189, "step": 103205 }, { "epoch": 0.9713882352941177, "grad_norm": 0.3808583154326606, "learning_rate": 2.0297280000645996e-06, "loss": 0.009570035338401794, "step": 103210 }, { "epoch": 0.9714352941176471, "grad_norm": 0.4112257195421183, "learning_rate": 2.0296788363711517e-06, "loss": 0.010255617648363113, "step": 103215 }, { "epoch": 0.9714823529411765, "grad_norm": 0.40988364427169705, "learning_rate": 2.0296296762500324e-06, "loss": 0.011167035251855851, "step": 103220 }, { "epoch": 0.9715294117647059, "grad_norm": 0.6602198517720493, "learning_rate": 2.0295805197008086e-06, "loss": 0.01742306351661682, "step": 103225 }, { "epoch": 0.9715764705882353, "grad_norm": 0.39666975337802424, "learning_rate": 2.0295313667230484e-06, "loss": 0.01636854112148285, "step": 103230 }, { "epoch": 0.9716235294117647, "grad_norm": 0.295058978672465, "learning_rate": 2.0294822173163185e-06, "loss": 0.012163604795932769, "step": 103235 }, { "epoch": 0.9716705882352941, "grad_norm": 0.5305413616953423, "learning_rate": 2.029433071480188e-06, "loss": 0.020258964598178865, "step": 103240 }, { "epoch": 0.9717176470588236, "grad_norm": 0.6647414279049794, "learning_rate": 2.029383929214223e-06, "loss": 0.01720424145460129, "step": 103245 }, { "epoch": 0.971764705882353, "grad_norm": 0.49871867573213136, "learning_rate": 2.0293347905179924e-06, "loss": 0.013951075077056885, "step": 103250 }, { "epoch": 0.9718117647058824, "grad_norm": 0.547462499138914, "learning_rate": 2.0292856553910628e-06, "loss": 0.019722814857959747, "step": 103255 }, { "epoch": 0.9718588235294118, "grad_norm": 0.5284719418266803, "learning_rate": 2.0292365238330036e-06, "loss": 0.017994812130928038, "step": 103260 }, { "epoch": 0.9719058823529412, "grad_norm": 0.39367480020031526, "learning_rate": 2.0291873958433815e-06, "loss": 0.012427937984466553, "step": 103265 }, { "epoch": 0.9719529411764706, "grad_norm": 0.5394001443381583, "learning_rate": 2.0291382714217654e-06, "loss": 0.01550317108631134, "step": 103270 }, { "epoch": 0.972, "grad_norm": 0.3920656882221511, "learning_rate": 2.0290891505677234e-06, "loss": 0.011329559236764907, "step": 103275 }, { "epoch": 0.9720470588235294, "grad_norm": 0.32966028491124794, "learning_rate": 2.0290400332808226e-06, "loss": 0.013737913966178895, "step": 103280 }, { "epoch": 0.9720941176470588, "grad_norm": 0.5783485886255618, "learning_rate": 2.028990919560633e-06, "loss": 0.01390579342842102, "step": 103285 }, { "epoch": 0.9721411764705883, "grad_norm": 0.4584957153053189, "learning_rate": 2.0289418094067216e-06, "loss": 0.015057212114334107, "step": 103290 }, { "epoch": 0.9721882352941177, "grad_norm": 0.30377445349068677, "learning_rate": 2.0288927028186575e-06, "loss": 0.01443840265274048, "step": 103295 }, { "epoch": 0.9722352941176471, "grad_norm": 0.3060197087329069, "learning_rate": 2.028843599796009e-06, "loss": 0.012361326813697815, "step": 103300 }, { "epoch": 0.9722823529411765, "grad_norm": 0.45324139040478273, "learning_rate": 2.0287945003383447e-06, "loss": 0.011705666780471802, "step": 103305 }, { "epoch": 0.9723294117647059, "grad_norm": 0.43098946884904626, "learning_rate": 2.0287454044452332e-06, "loss": 0.01100994348526001, "step": 103310 }, { "epoch": 0.9723764705882353, "grad_norm": 0.5785785628993094, "learning_rate": 2.0286963121162433e-06, "loss": 0.012036752700805665, "step": 103315 }, { "epoch": 0.9724235294117647, "grad_norm": 0.6910615760637124, "learning_rate": 2.0286472233509438e-06, "loss": 0.01404857486486435, "step": 103320 }, { "epoch": 0.9724705882352941, "grad_norm": 0.48526255538006563, "learning_rate": 2.0285981381489035e-06, "loss": 0.01707507371902466, "step": 103325 }, { "epoch": 0.9725176470588235, "grad_norm": 0.3393554020625899, "learning_rate": 2.028549056509691e-06, "loss": 0.012887699902057648, "step": 103330 }, { "epoch": 0.9725647058823529, "grad_norm": 0.5723270009929352, "learning_rate": 2.0284999784328756e-06, "loss": 0.0164615660905838, "step": 103335 }, { "epoch": 0.9726117647058824, "grad_norm": 0.32719809958445456, "learning_rate": 2.028450903918027e-06, "loss": 0.013634000718593598, "step": 103340 }, { "epoch": 0.9726588235294118, "grad_norm": 0.4251938117345334, "learning_rate": 2.0284018329647136e-06, "loss": 0.012259590625762939, "step": 103345 }, { "epoch": 0.9727058823529412, "grad_norm": 0.5312673651511532, "learning_rate": 2.028352765572505e-06, "loss": 0.010538515448570252, "step": 103350 }, { "epoch": 0.9727529411764706, "grad_norm": 0.27639483436008694, "learning_rate": 2.0283037017409695e-06, "loss": 0.01145552545785904, "step": 103355 }, { "epoch": 0.9728, "grad_norm": 0.3741161439291194, "learning_rate": 2.0282546414696783e-06, "loss": 0.011714057624340057, "step": 103360 }, { "epoch": 0.9728470588235294, "grad_norm": 0.39860380499346604, "learning_rate": 2.028205584758199e-06, "loss": 0.01578606516122818, "step": 103365 }, { "epoch": 0.9728941176470588, "grad_norm": 0.6430965509061907, "learning_rate": 2.0281565316061028e-06, "loss": 0.014125356078147888, "step": 103370 }, { "epoch": 0.9729411764705882, "grad_norm": 0.5687075224751615, "learning_rate": 2.028107482012958e-06, "loss": 0.015859915316104888, "step": 103375 }, { "epoch": 0.9729882352941176, "grad_norm": 0.4936570594921762, "learning_rate": 2.0280584359783347e-06, "loss": 0.014095798134803772, "step": 103380 }, { "epoch": 0.9730352941176471, "grad_norm": 0.39145547720137475, "learning_rate": 2.0280093935018026e-06, "loss": 0.013317397236824036, "step": 103385 }, { "epoch": 0.9730823529411765, "grad_norm": 0.5552834864998348, "learning_rate": 2.0279603545829317e-06, "loss": 0.012965145707130431, "step": 103390 }, { "epoch": 0.9731294117647059, "grad_norm": 0.4503204885081808, "learning_rate": 2.0279113192212918e-06, "loss": 0.013279440999031066, "step": 103395 }, { "epoch": 0.9731764705882353, "grad_norm": 0.48092968279031834, "learning_rate": 2.027862287416453e-06, "loss": 0.016795407235622405, "step": 103400 }, { "epoch": 0.9732235294117647, "grad_norm": 0.5796584272963976, "learning_rate": 2.0278132591679846e-06, "loss": 0.015363454818725586, "step": 103405 }, { "epoch": 0.9732705882352941, "grad_norm": 0.466343148766287, "learning_rate": 2.0277642344754573e-06, "loss": 0.011598263680934907, "step": 103410 }, { "epoch": 0.9733176470588235, "grad_norm": 0.4441909133208512, "learning_rate": 2.027715213338442e-06, "loss": 0.016946816444396974, "step": 103415 }, { "epoch": 0.9733647058823529, "grad_norm": 0.34067465663592494, "learning_rate": 2.0276661957565075e-06, "loss": 0.011446206271648407, "step": 103420 }, { "epoch": 0.9734117647058823, "grad_norm": 0.4342616776957087, "learning_rate": 2.027617181729225e-06, "loss": 0.010229770839214326, "step": 103425 }, { "epoch": 0.9734588235294117, "grad_norm": 0.632804128934067, "learning_rate": 2.027568171256165e-06, "loss": 0.01293083131313324, "step": 103430 }, { "epoch": 0.9735058823529412, "grad_norm": 0.3778636488277118, "learning_rate": 2.027519164336897e-06, "loss": 0.011532159149646759, "step": 103435 }, { "epoch": 0.9735529411764706, "grad_norm": 0.4925947538828965, "learning_rate": 2.0274701609709927e-06, "loss": 0.014653380215167999, "step": 103440 }, { "epoch": 0.9736, "grad_norm": 0.7395431330634726, "learning_rate": 2.027421161158022e-06, "loss": 0.015724754333496092, "step": 103445 }, { "epoch": 0.9736470588235294, "grad_norm": 0.5533659345110894, "learning_rate": 2.027372164897556e-06, "loss": 0.014641234278678894, "step": 103450 }, { "epoch": 0.9736941176470588, "grad_norm": 0.45965852798342843, "learning_rate": 2.027323172189165e-06, "loss": 0.012769454717636108, "step": 103455 }, { "epoch": 0.9737411764705882, "grad_norm": 0.40335483690530866, "learning_rate": 2.02727418303242e-06, "loss": 0.014769002795219421, "step": 103460 }, { "epoch": 0.9737882352941176, "grad_norm": 0.4558080231370261, "learning_rate": 2.0272251974268924e-06, "loss": 0.012076565623283386, "step": 103465 }, { "epoch": 0.973835294117647, "grad_norm": 0.5780083192927216, "learning_rate": 2.0271762153721525e-06, "loss": 0.014835193753242493, "step": 103470 }, { "epoch": 0.9738823529411764, "grad_norm": 0.5346861589786951, "learning_rate": 2.0271272368677714e-06, "loss": 0.012560711801052093, "step": 103475 }, { "epoch": 0.9739294117647059, "grad_norm": 0.4902204418578222, "learning_rate": 2.027078261913321e-06, "loss": 0.01641722321510315, "step": 103480 }, { "epoch": 0.9739764705882353, "grad_norm": 0.47549210887683335, "learning_rate": 2.027029290508372e-06, "loss": 0.01508401483297348, "step": 103485 }, { "epoch": 0.9740235294117647, "grad_norm": 3.1927203316998933, "learning_rate": 2.0269803226524947e-06, "loss": 0.012865380942821502, "step": 103490 }, { "epoch": 0.9740705882352941, "grad_norm": 0.4014597710564068, "learning_rate": 2.026931358345262e-06, "loss": 0.014275822043418884, "step": 103495 }, { "epoch": 0.9741176470588235, "grad_norm": 0.39927942653214693, "learning_rate": 2.0268823975862446e-06, "loss": 0.015517456829547882, "step": 103500 }, { "epoch": 0.9741647058823529, "grad_norm": 0.8918321939462306, "learning_rate": 2.026833440375014e-06, "loss": 0.015748126804828642, "step": 103505 }, { "epoch": 0.9742117647058823, "grad_norm": 0.42237629579032543, "learning_rate": 2.026784486711142e-06, "loss": 0.01624339520931244, "step": 103510 }, { "epoch": 0.9742588235294117, "grad_norm": 0.5502493791036412, "learning_rate": 2.0267355365941993e-06, "loss": 0.014386886358261108, "step": 103515 }, { "epoch": 0.9743058823529411, "grad_norm": 0.4648863048546694, "learning_rate": 2.026686590023759e-06, "loss": 0.016280540823936464, "step": 103520 }, { "epoch": 0.9743529411764705, "grad_norm": 0.6085252192594474, "learning_rate": 2.0266376469993915e-06, "loss": 0.016760441660881042, "step": 103525 }, { "epoch": 0.9744, "grad_norm": 0.4418296144276535, "learning_rate": 2.02658870752067e-06, "loss": 0.013827666640281677, "step": 103530 }, { "epoch": 0.9744470588235294, "grad_norm": 0.38290335094586314, "learning_rate": 2.0265397715871657e-06, "loss": 0.011736062169075013, "step": 103535 }, { "epoch": 0.9744941176470588, "grad_norm": 0.32041302699200547, "learning_rate": 2.02649083919845e-06, "loss": 0.011346482485532761, "step": 103540 }, { "epoch": 0.9745411764705882, "grad_norm": 0.4707404298563636, "learning_rate": 2.026441910354096e-06, "loss": 0.013914132118225097, "step": 103545 }, { "epoch": 0.9745882352941176, "grad_norm": 0.6609162348170493, "learning_rate": 2.0263929850536753e-06, "loss": 0.012525971233844756, "step": 103550 }, { "epoch": 0.974635294117647, "grad_norm": 0.2811153550208425, "learning_rate": 2.026344063296761e-06, "loss": 0.013825301826000214, "step": 103555 }, { "epoch": 0.9746823529411764, "grad_norm": 0.4583628295572011, "learning_rate": 2.0262951450829237e-06, "loss": 0.013020551204681397, "step": 103560 }, { "epoch": 0.9747294117647058, "grad_norm": 0.46500861055250126, "learning_rate": 2.026246230411737e-06, "loss": 0.0115911103785038, "step": 103565 }, { "epoch": 0.9747764705882352, "grad_norm": 0.6604948319175158, "learning_rate": 2.026197319282773e-06, "loss": 0.010823079943656921, "step": 103570 }, { "epoch": 0.9748235294117648, "grad_norm": 0.6538626264729388, "learning_rate": 2.026148411695604e-06, "loss": 0.01311398446559906, "step": 103575 }, { "epoch": 0.9748705882352942, "grad_norm": 0.41440963554870935, "learning_rate": 2.026099507649803e-06, "loss": 0.014198988676071167, "step": 103580 }, { "epoch": 0.9749176470588236, "grad_norm": 0.5254607112860363, "learning_rate": 2.026050607144942e-06, "loss": 0.013992483913898467, "step": 103585 }, { "epoch": 0.974964705882353, "grad_norm": 0.5368289061915134, "learning_rate": 2.0260017101805944e-06, "loss": 0.010717163980007171, "step": 103590 }, { "epoch": 0.9750117647058824, "grad_norm": 0.43278918266030186, "learning_rate": 2.0259528167563328e-06, "loss": 0.01477942317724228, "step": 103595 }, { "epoch": 0.9750588235294118, "grad_norm": 0.6785617815959087, "learning_rate": 2.02590392687173e-06, "loss": 0.014659449458122253, "step": 103600 }, { "epoch": 0.9751058823529412, "grad_norm": 0.5632722641714686, "learning_rate": 2.0258550405263585e-06, "loss": 0.012993185222148896, "step": 103605 }, { "epoch": 0.9751529411764706, "grad_norm": 0.4905612631108865, "learning_rate": 2.025806157719792e-06, "loss": 0.016117681562900544, "step": 103610 }, { "epoch": 0.9752, "grad_norm": 0.3795766342534511, "learning_rate": 2.0257572784516032e-06, "loss": 0.013887424767017365, "step": 103615 }, { "epoch": 0.9752470588235295, "grad_norm": 0.5539997856276975, "learning_rate": 2.025708402721365e-06, "loss": 0.015527942776679992, "step": 103620 }, { "epoch": 0.9752941176470589, "grad_norm": 0.2926885705592178, "learning_rate": 2.0256595305286514e-06, "loss": 0.014464868605136872, "step": 103625 }, { "epoch": 0.9753411764705883, "grad_norm": 0.5790560515568087, "learning_rate": 2.025610661873035e-06, "loss": 0.01887533813714981, "step": 103630 }, { "epoch": 0.9753882352941177, "grad_norm": 0.382621672120247, "learning_rate": 2.0255617967540893e-06, "loss": 0.013358168303966522, "step": 103635 }, { "epoch": 0.9754352941176471, "grad_norm": 0.4066896345724677, "learning_rate": 2.0255129351713877e-06, "loss": 0.011983750760555268, "step": 103640 }, { "epoch": 0.9754823529411765, "grad_norm": 0.5727777154913444, "learning_rate": 2.0254640771245036e-06, "loss": 0.014165815711021424, "step": 103645 }, { "epoch": 0.9755294117647059, "grad_norm": 0.5196515149107886, "learning_rate": 2.0254152226130114e-06, "loss": 0.015093408524990082, "step": 103650 }, { "epoch": 0.9755764705882353, "grad_norm": 0.43528039695295406, "learning_rate": 2.0253663716364836e-06, "loss": 0.011827165633440018, "step": 103655 }, { "epoch": 0.9756235294117647, "grad_norm": 0.49340114205884505, "learning_rate": 2.025317524194495e-06, "loss": 0.016497299075126648, "step": 103660 }, { "epoch": 0.9756705882352941, "grad_norm": 0.4351591458360173, "learning_rate": 2.025268680286618e-06, "loss": 0.013605713844299316, "step": 103665 }, { "epoch": 0.9757176470588236, "grad_norm": 0.31089974417641913, "learning_rate": 2.0252198399124283e-06, "loss": 0.012556791305541992, "step": 103670 }, { "epoch": 0.975764705882353, "grad_norm": 0.5144893434509026, "learning_rate": 2.025171003071498e-06, "loss": 0.013316632807254791, "step": 103675 }, { "epoch": 0.9758117647058824, "grad_norm": 0.47420050038560196, "learning_rate": 2.0251221697634026e-06, "loss": 0.013058573007583618, "step": 103680 }, { "epoch": 0.9758588235294118, "grad_norm": 0.3872001820032837, "learning_rate": 2.0250733399877152e-06, "loss": 0.01731821000576019, "step": 103685 }, { "epoch": 0.9759058823529412, "grad_norm": 0.6270353786897683, "learning_rate": 2.0250245137440104e-06, "loss": 0.012566044926643372, "step": 103690 }, { "epoch": 0.9759529411764706, "grad_norm": 0.5540703728812773, "learning_rate": 2.024975691031862e-06, "loss": 0.015232926607131958, "step": 103695 }, { "epoch": 0.976, "grad_norm": 0.44654956268810253, "learning_rate": 2.024926871850845e-06, "loss": 0.012821760773658753, "step": 103700 }, { "epoch": 0.9760470588235294, "grad_norm": 0.2998328242601911, "learning_rate": 2.0248780562005337e-06, "loss": 0.015523475408554078, "step": 103705 }, { "epoch": 0.9760941176470588, "grad_norm": 0.3254331815693442, "learning_rate": 2.0248292440805014e-06, "loss": 0.011765050143003464, "step": 103710 }, { "epoch": 0.9761411764705883, "grad_norm": 0.4580550675096334, "learning_rate": 2.024780435490324e-06, "loss": 0.008184537291526794, "step": 103715 }, { "epoch": 0.9761882352941177, "grad_norm": 0.5087218228905119, "learning_rate": 2.0247316304295753e-06, "loss": 0.01076764464378357, "step": 103720 }, { "epoch": 0.9762352941176471, "grad_norm": 0.43268695651101363, "learning_rate": 2.02468282889783e-06, "loss": 0.00936509221792221, "step": 103725 }, { "epoch": 0.9762823529411765, "grad_norm": 0.5807808205302006, "learning_rate": 2.0246340308946638e-06, "loss": 0.014299911260604859, "step": 103730 }, { "epoch": 0.9763294117647059, "grad_norm": 0.5057674326307168, "learning_rate": 2.02458523641965e-06, "loss": 0.02257487326860428, "step": 103735 }, { "epoch": 0.9763764705882353, "grad_norm": 0.4743485576181407, "learning_rate": 2.0245364454723642e-06, "loss": 0.014143145084381104, "step": 103740 }, { "epoch": 0.9764235294117647, "grad_norm": 0.5339225902346408, "learning_rate": 2.0244876580523815e-06, "loss": 0.01562950909137726, "step": 103745 }, { "epoch": 0.9764705882352941, "grad_norm": 0.3892139529604247, "learning_rate": 2.0244388741592767e-06, "loss": 0.014978629350662232, "step": 103750 }, { "epoch": 0.9765176470588235, "grad_norm": 0.4360212113819758, "learning_rate": 2.0243900937926246e-06, "loss": 0.013472941517829896, "step": 103755 }, { "epoch": 0.9765647058823529, "grad_norm": 0.4782842823060495, "learning_rate": 2.0243413169520013e-06, "loss": 0.013246816396713258, "step": 103760 }, { "epoch": 0.9766117647058824, "grad_norm": 0.43515387094182073, "learning_rate": 2.024292543636981e-06, "loss": 0.009005922079086303, "step": 103765 }, { "epoch": 0.9766588235294118, "grad_norm": 0.5682926634394855, "learning_rate": 2.024243773847139e-06, "loss": 0.012647661566734313, "step": 103770 }, { "epoch": 0.9767058823529412, "grad_norm": 0.47146905387861665, "learning_rate": 2.024195007582052e-06, "loss": 0.012955041229724884, "step": 103775 }, { "epoch": 0.9767529411764706, "grad_norm": 0.4335831438378598, "learning_rate": 2.024146244841294e-06, "loss": 0.012189503014087676, "step": 103780 }, { "epoch": 0.9768, "grad_norm": 0.41711679704377125, "learning_rate": 2.0240974856244415e-06, "loss": 0.013911068439483643, "step": 103785 }, { "epoch": 0.9768470588235294, "grad_norm": 0.28648252691105275, "learning_rate": 2.024048729931069e-06, "loss": 0.012003066390752793, "step": 103790 }, { "epoch": 0.9768941176470588, "grad_norm": 0.347898671623103, "learning_rate": 2.023999977760753e-06, "loss": 0.014141346514225005, "step": 103795 }, { "epoch": 0.9769411764705882, "grad_norm": 0.4619981612788677, "learning_rate": 2.0239512291130696e-06, "loss": 0.01253843903541565, "step": 103800 }, { "epoch": 0.9769882352941176, "grad_norm": 0.33769768746147744, "learning_rate": 2.023902483987593e-06, "loss": 0.011605539917945861, "step": 103805 }, { "epoch": 0.9770352941176471, "grad_norm": 0.5889231057289535, "learning_rate": 2.0238537423839007e-06, "loss": 0.01237356811761856, "step": 103810 }, { "epoch": 0.9770823529411765, "grad_norm": 0.3452278814560992, "learning_rate": 2.023805004301568e-06, "loss": 0.020405119657516478, "step": 103815 }, { "epoch": 0.9771294117647059, "grad_norm": 0.3794032201232809, "learning_rate": 2.0237562697401705e-06, "loss": 0.013021083176136016, "step": 103820 }, { "epoch": 0.9771764705882353, "grad_norm": 0.3686646088433012, "learning_rate": 2.0237075386992852e-06, "loss": 0.013229654729366302, "step": 103825 }, { "epoch": 0.9772235294117647, "grad_norm": 0.5815648397390968, "learning_rate": 2.023658811178488e-06, "loss": 0.016108816862106322, "step": 103830 }, { "epoch": 0.9772705882352941, "grad_norm": 0.600412174144682, "learning_rate": 2.0236100871773546e-06, "loss": 0.01511632204055786, "step": 103835 }, { "epoch": 0.9773176470588235, "grad_norm": 0.4248424901835495, "learning_rate": 2.0235613666954617e-06, "loss": 0.018660560250282288, "step": 103840 }, { "epoch": 0.9773647058823529, "grad_norm": 0.32482073674442685, "learning_rate": 2.0235126497323856e-06, "loss": 0.012346011400222779, "step": 103845 }, { "epoch": 0.9774117647058823, "grad_norm": 0.3592231961414707, "learning_rate": 2.0234639362877026e-06, "loss": 0.012812858819961548, "step": 103850 }, { "epoch": 0.9774588235294117, "grad_norm": 0.4829410850868326, "learning_rate": 2.0234152263609897e-06, "loss": 0.014813171327114105, "step": 103855 }, { "epoch": 0.9775058823529412, "grad_norm": 0.5406739477047994, "learning_rate": 2.0233665199518225e-06, "loss": 0.014823791384696961, "step": 103860 }, { "epoch": 0.9775529411764706, "grad_norm": 0.8859427080355818, "learning_rate": 2.023317817059779e-06, "loss": 0.013466379046440125, "step": 103865 }, { "epoch": 0.9776, "grad_norm": 0.5835905879329716, "learning_rate": 2.023269117684435e-06, "loss": 0.015082493424415588, "step": 103870 }, { "epoch": 0.9776470588235294, "grad_norm": 0.7613202020084775, "learning_rate": 2.0232204218253678e-06, "loss": 0.019826361536979677, "step": 103875 }, { "epoch": 0.9776941176470588, "grad_norm": 0.3854482332279254, "learning_rate": 2.023171729482154e-06, "loss": 0.011466385424137115, "step": 103880 }, { "epoch": 0.9777411764705882, "grad_norm": 0.461138414757521, "learning_rate": 2.02312304065437e-06, "loss": 0.009885871410369873, "step": 103885 }, { "epoch": 0.9777882352941176, "grad_norm": 0.35700849162284126, "learning_rate": 2.023074355341594e-06, "loss": 0.012015487253665923, "step": 103890 }, { "epoch": 0.977835294117647, "grad_norm": 0.5092537534941937, "learning_rate": 2.0230256735434023e-06, "loss": 0.013878291845321656, "step": 103895 }, { "epoch": 0.9778823529411764, "grad_norm": 0.33172026983642744, "learning_rate": 2.022976995259372e-06, "loss": 0.013578169047832489, "step": 103900 }, { "epoch": 0.9779294117647059, "grad_norm": 0.507781435864504, "learning_rate": 2.0229283204890806e-06, "loss": 0.01303328424692154, "step": 103905 }, { "epoch": 0.9779764705882353, "grad_norm": 0.9253503334034352, "learning_rate": 2.0228796492321056e-06, "loss": 0.011960914731025696, "step": 103910 }, { "epoch": 0.9780235294117647, "grad_norm": 0.5349275288128448, "learning_rate": 2.022830981488024e-06, "loss": 0.01452205777168274, "step": 103915 }, { "epoch": 0.9780705882352941, "grad_norm": 0.5477811058433386, "learning_rate": 2.022782317256414e-06, "loss": 0.012048888206481933, "step": 103920 }, { "epoch": 0.9781176470588235, "grad_norm": 0.2753777082985456, "learning_rate": 2.0227336565368514e-06, "loss": 0.011112545430660248, "step": 103925 }, { "epoch": 0.9781647058823529, "grad_norm": 0.7820894289789831, "learning_rate": 2.0226849993289156e-06, "loss": 0.015435780584812164, "step": 103930 }, { "epoch": 0.9782117647058823, "grad_norm": 0.6442979392535482, "learning_rate": 2.022636345632183e-06, "loss": 0.013918113708496094, "step": 103935 }, { "epoch": 0.9782588235294117, "grad_norm": 0.467080966757202, "learning_rate": 2.022587695446232e-06, "loss": 0.01148698925971985, "step": 103940 }, { "epoch": 0.9783058823529411, "grad_norm": 0.6577882797147865, "learning_rate": 2.0225390487706406e-06, "loss": 0.018194107711315154, "step": 103945 }, { "epoch": 0.9783529411764705, "grad_norm": 0.35678164853496147, "learning_rate": 2.022490405604986e-06, "loss": 0.01043880581855774, "step": 103950 }, { "epoch": 0.9784, "grad_norm": 0.40704508476768225, "learning_rate": 2.022441765948847e-06, "loss": 0.01276358962059021, "step": 103955 }, { "epoch": 0.9784470588235294, "grad_norm": 0.3586475117027287, "learning_rate": 2.0223931298018004e-06, "loss": 0.013183271884918213, "step": 103960 }, { "epoch": 0.9784941176470588, "grad_norm": 0.46800343055044896, "learning_rate": 2.0223444971634253e-06, "loss": 0.011754782497882843, "step": 103965 }, { "epoch": 0.9785411764705882, "grad_norm": 0.5996344313647322, "learning_rate": 2.0222958680332993e-06, "loss": 0.010202032327651978, "step": 103970 }, { "epoch": 0.9785882352941176, "grad_norm": 0.6012198852075498, "learning_rate": 2.022247242411001e-06, "loss": 0.012618857622146606, "step": 103975 }, { "epoch": 0.978635294117647, "grad_norm": 0.7018301222116778, "learning_rate": 2.0221986202961087e-06, "loss": 0.011972134560346603, "step": 103980 }, { "epoch": 0.9786823529411764, "grad_norm": 0.5796199571586345, "learning_rate": 2.022150001688201e-06, "loss": 0.01610337048768997, "step": 103985 }, { "epoch": 0.9787294117647058, "grad_norm": 0.4352183268241056, "learning_rate": 2.022101386586855e-06, "loss": 0.014266102015972138, "step": 103990 }, { "epoch": 0.9787764705882352, "grad_norm": 0.856677044729567, "learning_rate": 2.0220527749916504e-06, "loss": 0.015817904472351076, "step": 103995 }, { "epoch": 0.9788235294117648, "grad_norm": 0.4475379632349137, "learning_rate": 2.022004166902166e-06, "loss": 0.011241781711578368, "step": 104000 }, { "epoch": 0.9788705882352942, "grad_norm": 0.4341336383902808, "learning_rate": 2.0219555623179794e-06, "loss": 0.01556999385356903, "step": 104005 }, { "epoch": 0.9789176470588236, "grad_norm": 0.3311472221125839, "learning_rate": 2.0219069612386704e-06, "loss": 0.011649991571903228, "step": 104010 }, { "epoch": 0.978964705882353, "grad_norm": 0.4275072424570592, "learning_rate": 2.0218583636638167e-06, "loss": 0.009262195229530335, "step": 104015 }, { "epoch": 0.9790117647058824, "grad_norm": 1.518906927959307, "learning_rate": 2.0218097695929985e-06, "loss": 0.020012879371643068, "step": 104020 }, { "epoch": 0.9790588235294118, "grad_norm": 0.5420416170442243, "learning_rate": 2.0217611790257936e-06, "loss": 0.015454946458339692, "step": 104025 }, { "epoch": 0.9791058823529412, "grad_norm": 0.41643782753420655, "learning_rate": 2.0217125919617814e-06, "loss": 0.01672475039958954, "step": 104030 }, { "epoch": 0.9791529411764706, "grad_norm": 0.5577305376518098, "learning_rate": 2.0216640084005406e-06, "loss": 0.01484152376651764, "step": 104035 }, { "epoch": 0.9792, "grad_norm": 0.3519114803352702, "learning_rate": 2.0216154283416515e-06, "loss": 0.017985311150550843, "step": 104040 }, { "epoch": 0.9792470588235294, "grad_norm": 0.4480836979757851, "learning_rate": 2.021566851784692e-06, "loss": 0.011916731297969819, "step": 104045 }, { "epoch": 0.9792941176470589, "grad_norm": 0.47992940787451993, "learning_rate": 2.021518278729242e-06, "loss": 0.012594318389892578, "step": 104050 }, { "epoch": 0.9793411764705883, "grad_norm": 0.42238155568298136, "learning_rate": 2.0214697091748807e-06, "loss": 0.014063867926597595, "step": 104055 }, { "epoch": 0.9793882352941177, "grad_norm": 0.3128190058905722, "learning_rate": 2.021421143121188e-06, "loss": 0.010990676283836365, "step": 104060 }, { "epoch": 0.9794352941176471, "grad_norm": 0.3773650658530581, "learning_rate": 2.0213725805677428e-06, "loss": 0.012913741171360016, "step": 104065 }, { "epoch": 0.9794823529411765, "grad_norm": 0.6641021711228814, "learning_rate": 2.0213240215141247e-06, "loss": 0.012020173668861388, "step": 104070 }, { "epoch": 0.9795294117647059, "grad_norm": 0.4014783184822663, "learning_rate": 2.0212754659599135e-06, "loss": 0.013001744449138642, "step": 104075 }, { "epoch": 0.9795764705882353, "grad_norm": 0.42074531898741346, "learning_rate": 2.0212269139046896e-06, "loss": 0.012222302705049514, "step": 104080 }, { "epoch": 0.9796235294117647, "grad_norm": 0.6527650459864782, "learning_rate": 2.0211783653480314e-06, "loss": 0.01497105062007904, "step": 104085 }, { "epoch": 0.9796705882352941, "grad_norm": 0.503760277311219, "learning_rate": 2.02112982028952e-06, "loss": 0.01802108287811279, "step": 104090 }, { "epoch": 0.9797176470588236, "grad_norm": 0.6887553030598317, "learning_rate": 2.021081278728734e-06, "loss": 0.011015196144580842, "step": 104095 }, { "epoch": 0.979764705882353, "grad_norm": 0.38599509447713143, "learning_rate": 2.021032740665255e-06, "loss": 0.015840144455432893, "step": 104100 }, { "epoch": 0.9798117647058824, "grad_norm": 0.4871765394687311, "learning_rate": 2.020984206098662e-06, "loss": 0.017290040850639343, "step": 104105 }, { "epoch": 0.9798588235294118, "grad_norm": 0.35481031832623955, "learning_rate": 2.020935675028535e-06, "loss": 0.00968136191368103, "step": 104110 }, { "epoch": 0.9799058823529412, "grad_norm": 0.49759640317796566, "learning_rate": 2.0208871474544553e-06, "loss": 0.015860298275947572, "step": 104115 }, { "epoch": 0.9799529411764706, "grad_norm": 0.627302125185596, "learning_rate": 2.020838623376002e-06, "loss": 0.013790535926818847, "step": 104120 }, { "epoch": 0.98, "grad_norm": 0.5884325352527201, "learning_rate": 2.0207901027927564e-06, "loss": 0.01186865270137787, "step": 104125 }, { "epoch": 0.9800470588235294, "grad_norm": 0.5315505828037107, "learning_rate": 2.020741585704298e-06, "loss": 0.015647149085998534, "step": 104130 }, { "epoch": 0.9800941176470588, "grad_norm": 1.0190130989415174, "learning_rate": 2.020693072110208e-06, "loss": 0.012120968103408814, "step": 104135 }, { "epoch": 0.9801411764705882, "grad_norm": 0.5541605756673706, "learning_rate": 2.0206445620100664e-06, "loss": 0.014303372800350189, "step": 104140 }, { "epoch": 0.9801882352941177, "grad_norm": 0.4936764386416859, "learning_rate": 2.0205960554034545e-06, "loss": 0.01733911633491516, "step": 104145 }, { "epoch": 0.9802352941176471, "grad_norm": 0.5632282970163639, "learning_rate": 2.020547552289952e-06, "loss": 0.01384861022233963, "step": 104150 }, { "epoch": 0.9802823529411765, "grad_norm": 0.5030438154788529, "learning_rate": 2.020499052669141e-06, "loss": 0.014370600879192352, "step": 104155 }, { "epoch": 0.9803294117647059, "grad_norm": 0.5562425475982745, "learning_rate": 2.020450556540601e-06, "loss": 0.01200222373008728, "step": 104160 }, { "epoch": 0.9803764705882353, "grad_norm": 0.47528069921702293, "learning_rate": 2.020402063903914e-06, "loss": 0.012335912883281707, "step": 104165 }, { "epoch": 0.9804235294117647, "grad_norm": 0.46665565243657686, "learning_rate": 2.0203535747586604e-06, "loss": 0.010566715896129609, "step": 104170 }, { "epoch": 0.9804705882352941, "grad_norm": 0.5386977959860733, "learning_rate": 2.0203050891044215e-06, "loss": 0.013748547434806824, "step": 104175 }, { "epoch": 0.9805176470588235, "grad_norm": 0.39594566830108846, "learning_rate": 2.0202566069407786e-06, "loss": 0.010384216904640198, "step": 104180 }, { "epoch": 0.9805647058823529, "grad_norm": 0.42218106845721565, "learning_rate": 2.0202081282673117e-06, "loss": 0.014245745539665223, "step": 104185 }, { "epoch": 0.9806117647058824, "grad_norm": 0.6898644718318183, "learning_rate": 2.0201596530836037e-06, "loss": 0.015974491834640503, "step": 104190 }, { "epoch": 0.9806588235294118, "grad_norm": 0.40583709854057737, "learning_rate": 2.020111181389235e-06, "loss": 0.015532276034355164, "step": 104195 }, { "epoch": 0.9807058823529412, "grad_norm": 0.6894410662145218, "learning_rate": 2.0200627131837876e-06, "loss": 0.013606750965118408, "step": 104200 }, { "epoch": 0.9807529411764706, "grad_norm": 0.5581780750298989, "learning_rate": 2.020014248466842e-06, "loss": 0.011947061121463775, "step": 104205 }, { "epoch": 0.9808, "grad_norm": 0.46013551361656524, "learning_rate": 2.01996578723798e-06, "loss": 0.016850164532661437, "step": 104210 }, { "epoch": 0.9808470588235294, "grad_norm": 0.4427541767049514, "learning_rate": 2.0199173294967844e-06, "loss": 0.012930913269519806, "step": 104215 }, { "epoch": 0.9808941176470588, "grad_norm": 0.4617870118415617, "learning_rate": 2.0198688752428357e-06, "loss": 0.015243555605411529, "step": 104220 }, { "epoch": 0.9809411764705882, "grad_norm": 0.4560192555957306, "learning_rate": 2.019820424475716e-06, "loss": 0.01679205596446991, "step": 104225 }, { "epoch": 0.9809882352941176, "grad_norm": 0.4105459300788934, "learning_rate": 2.0197719771950065e-06, "loss": 0.009562676399946212, "step": 104230 }, { "epoch": 0.981035294117647, "grad_norm": 0.4350012263236012, "learning_rate": 2.019723533400291e-06, "loss": 0.009633159637451172, "step": 104235 }, { "epoch": 0.9810823529411765, "grad_norm": 0.640069429076254, "learning_rate": 2.019675093091149e-06, "loss": 0.013880658149719238, "step": 104240 }, { "epoch": 0.9811294117647059, "grad_norm": 0.4585823168415557, "learning_rate": 2.0196266562671638e-06, "loss": 0.009909008443355561, "step": 104245 }, { "epoch": 0.9811764705882353, "grad_norm": 0.35282404867341427, "learning_rate": 2.019578222927918e-06, "loss": 0.009417085349559784, "step": 104250 }, { "epoch": 0.9812235294117647, "grad_norm": 0.4781987297746632, "learning_rate": 2.019529793072993e-06, "loss": 0.02100030183792114, "step": 104255 }, { "epoch": 0.9812705882352941, "grad_norm": 0.5070080166250783, "learning_rate": 2.0194813667019708e-06, "loss": 0.017320360243320464, "step": 104260 }, { "epoch": 0.9813176470588235, "grad_norm": 0.6420300092188347, "learning_rate": 2.0194329438144347e-06, "loss": 0.011765654385089874, "step": 104265 }, { "epoch": 0.9813647058823529, "grad_norm": 0.41086430454297657, "learning_rate": 2.0193845244099663e-06, "loss": 0.014347806572914124, "step": 104270 }, { "epoch": 0.9814117647058823, "grad_norm": 0.45240086800201385, "learning_rate": 2.019336108488148e-06, "loss": 0.014957618713378907, "step": 104275 }, { "epoch": 0.9814588235294117, "grad_norm": 0.6134677278040459, "learning_rate": 2.019287696048563e-06, "loss": 0.01446930319070816, "step": 104280 }, { "epoch": 0.9815058823529412, "grad_norm": 0.38898584628292654, "learning_rate": 2.0192392870907934e-06, "loss": 0.009765434265136718, "step": 104285 }, { "epoch": 0.9815529411764706, "grad_norm": 0.3101262024817095, "learning_rate": 2.019190881614422e-06, "loss": 0.007704511284828186, "step": 104290 }, { "epoch": 0.9816, "grad_norm": 0.3700066888437469, "learning_rate": 2.019142479619032e-06, "loss": 0.013153699040412904, "step": 104295 }, { "epoch": 0.9816470588235294, "grad_norm": 0.4919239130243889, "learning_rate": 2.019094081104205e-06, "loss": 0.012879322469234466, "step": 104300 }, { "epoch": 0.9816941176470588, "grad_norm": 0.5097913164219057, "learning_rate": 2.0190456860695244e-06, "loss": 0.011419047415256501, "step": 104305 }, { "epoch": 0.9817411764705882, "grad_norm": 0.6915308975933228, "learning_rate": 2.018997294514574e-06, "loss": 0.01704072058200836, "step": 104310 }, { "epoch": 0.9817882352941176, "grad_norm": 0.5073106027160517, "learning_rate": 2.0189489064389363e-06, "loss": 0.012475623190402985, "step": 104315 }, { "epoch": 0.981835294117647, "grad_norm": 0.6922683491946509, "learning_rate": 2.0189005218421934e-06, "loss": 0.012807631492614746, "step": 104320 }, { "epoch": 0.9818823529411764, "grad_norm": 0.4447722527118807, "learning_rate": 2.0188521407239297e-06, "loss": 0.012196804583072662, "step": 104325 }, { "epoch": 0.9819294117647058, "grad_norm": 0.35230382478845185, "learning_rate": 2.0188037630837286e-06, "loss": 0.011968784034252167, "step": 104330 }, { "epoch": 0.9819764705882353, "grad_norm": 0.28218979941693234, "learning_rate": 2.0187553889211725e-06, "loss": 0.012333337962627412, "step": 104335 }, { "epoch": 0.9820235294117647, "grad_norm": 0.5972157633126796, "learning_rate": 2.018707018235845e-06, "loss": 0.013702699542045593, "step": 104340 }, { "epoch": 0.9820705882352941, "grad_norm": 0.5330140963295608, "learning_rate": 2.0186586510273294e-06, "loss": 0.013976803421974182, "step": 104345 }, { "epoch": 0.9821176470588235, "grad_norm": 0.3753030346509195, "learning_rate": 2.0186102872952094e-06, "loss": 0.012077479809522628, "step": 104350 }, { "epoch": 0.9821647058823529, "grad_norm": 0.4219467222833651, "learning_rate": 2.018561927039069e-06, "loss": 0.01616395264863968, "step": 104355 }, { "epoch": 0.9822117647058823, "grad_norm": 0.42528723462436296, "learning_rate": 2.0185135702584917e-06, "loss": 0.014751949906349182, "step": 104360 }, { "epoch": 0.9822588235294117, "grad_norm": 0.4100435386488709, "learning_rate": 2.018465216953061e-06, "loss": 0.012190508842468261, "step": 104365 }, { "epoch": 0.9823058823529411, "grad_norm": 0.457801544284282, "learning_rate": 2.01841686712236e-06, "loss": 0.010114794969558716, "step": 104370 }, { "epoch": 0.9823529411764705, "grad_norm": 0.7841232650512051, "learning_rate": 2.0183685207659738e-06, "loss": 0.016589753329753876, "step": 104375 }, { "epoch": 0.9824, "grad_norm": 0.6363426556011675, "learning_rate": 2.018320177883485e-06, "loss": 0.015083083510398864, "step": 104380 }, { "epoch": 0.9824470588235295, "grad_norm": 0.5052599621266357, "learning_rate": 2.0182718384744794e-06, "loss": 0.016503044962882997, "step": 104385 }, { "epoch": 0.9824941176470589, "grad_norm": 0.2677202439853392, "learning_rate": 2.0182235025385392e-06, "loss": 0.01607639193534851, "step": 104390 }, { "epoch": 0.9825411764705883, "grad_norm": 4.551093842668325, "learning_rate": 2.01817517007525e-06, "loss": 0.010127638280391694, "step": 104395 }, { "epoch": 0.9825882352941177, "grad_norm": 0.5175565853116305, "learning_rate": 2.0181268410841947e-06, "loss": 0.01635337471961975, "step": 104400 }, { "epoch": 0.982635294117647, "grad_norm": 0.31724111110103426, "learning_rate": 2.0180785155649586e-06, "loss": 0.011042836308479308, "step": 104405 }, { "epoch": 0.9826823529411765, "grad_norm": 0.5970571173926218, "learning_rate": 2.0180301935171256e-06, "loss": 0.015558740496635437, "step": 104410 }, { "epoch": 0.9827294117647059, "grad_norm": 0.5159302399667514, "learning_rate": 2.0179818749402804e-06, "loss": 0.011839349567890168, "step": 104415 }, { "epoch": 0.9827764705882353, "grad_norm": 0.47892789035398675, "learning_rate": 2.017933559834007e-06, "loss": 0.01603911817073822, "step": 104420 }, { "epoch": 0.9828235294117647, "grad_norm": 0.5581118855026453, "learning_rate": 2.0178852481978902e-06, "loss": 0.016305844485759734, "step": 104425 }, { "epoch": 0.9828705882352942, "grad_norm": 0.5302836091025243, "learning_rate": 2.0178369400315144e-06, "loss": 0.012676472961902618, "step": 104430 }, { "epoch": 0.9829176470588236, "grad_norm": 0.5415135644874777, "learning_rate": 2.0177886353344653e-06, "loss": 0.010854928940534591, "step": 104435 }, { "epoch": 0.982964705882353, "grad_norm": 0.4736150285699433, "learning_rate": 2.0177403341063265e-06, "loss": 0.011769239604473115, "step": 104440 }, { "epoch": 0.9830117647058824, "grad_norm": 0.589019198500474, "learning_rate": 2.017692036346683e-06, "loss": 0.01266079992055893, "step": 104445 }, { "epoch": 0.9830588235294118, "grad_norm": 0.4099983526931324, "learning_rate": 2.01764374205512e-06, "loss": 0.011473065614700318, "step": 104450 }, { "epoch": 0.9831058823529412, "grad_norm": 0.3278378191247786, "learning_rate": 2.0175954512312223e-06, "loss": 0.011293719708919524, "step": 104455 }, { "epoch": 0.9831529411764706, "grad_norm": 0.4503579583486158, "learning_rate": 2.0175471638745757e-06, "loss": 0.01360836923122406, "step": 104460 }, { "epoch": 0.9832, "grad_norm": 0.5166286168979106, "learning_rate": 2.0174988799847643e-06, "loss": 0.01700841188430786, "step": 104465 }, { "epoch": 0.9832470588235294, "grad_norm": 0.3335259118403535, "learning_rate": 2.017450599561373e-06, "loss": 0.017808663845062255, "step": 104470 }, { "epoch": 0.9832941176470589, "grad_norm": 0.5158196918275143, "learning_rate": 2.0174023226039884e-06, "loss": 0.01752674877643585, "step": 104475 }, { "epoch": 0.9833411764705883, "grad_norm": 0.3565016831049609, "learning_rate": 2.017354049112195e-06, "loss": 0.011848632991313935, "step": 104480 }, { "epoch": 0.9833882352941177, "grad_norm": 0.5055586712655962, "learning_rate": 2.0173057790855783e-06, "loss": 0.010330826789140702, "step": 104485 }, { "epoch": 0.9834352941176471, "grad_norm": 0.4155676783761716, "learning_rate": 2.017257512523723e-06, "loss": 0.014336311817169189, "step": 104490 }, { "epoch": 0.9834823529411765, "grad_norm": 0.42307862941724766, "learning_rate": 2.0172092494262163e-06, "loss": 0.009788511693477631, "step": 104495 }, { "epoch": 0.9835294117647059, "grad_norm": 0.3252421997776267, "learning_rate": 2.017160989792642e-06, "loss": 0.013236835598945618, "step": 104500 }, { "epoch": 0.9835764705882353, "grad_norm": 0.6952517479672697, "learning_rate": 2.0171127336225875e-06, "loss": 0.015165606141090393, "step": 104505 }, { "epoch": 0.9836235294117647, "grad_norm": 0.4864190206339181, "learning_rate": 2.017064480915637e-06, "loss": 0.014417435228824615, "step": 104510 }, { "epoch": 0.9836705882352941, "grad_norm": 0.5104690743225795, "learning_rate": 2.0170162316713774e-06, "loss": 0.012177219986915589, "step": 104515 }, { "epoch": 0.9837176470588235, "grad_norm": 0.573279816797631, "learning_rate": 2.016967985889394e-06, "loss": 0.013685430586338043, "step": 104520 }, { "epoch": 0.983764705882353, "grad_norm": 0.46089592999026335, "learning_rate": 2.016919743569273e-06, "loss": 0.015488716959953307, "step": 104525 }, { "epoch": 0.9838117647058824, "grad_norm": 0.4464294331383491, "learning_rate": 2.0168715047105997e-06, "loss": 0.012094859778881074, "step": 104530 }, { "epoch": 0.9838588235294118, "grad_norm": 0.3936203570398889, "learning_rate": 2.0168232693129612e-06, "loss": 0.01311841607093811, "step": 104535 }, { "epoch": 0.9839058823529412, "grad_norm": 0.3904282466855937, "learning_rate": 2.0167750373759436e-06, "loss": 0.010209708660840987, "step": 104540 }, { "epoch": 0.9839529411764706, "grad_norm": 0.29786790567353527, "learning_rate": 2.016726808899132e-06, "loss": 0.010496076941490174, "step": 104545 }, { "epoch": 0.984, "grad_norm": 0.5025450563511353, "learning_rate": 2.016678583882114e-06, "loss": 0.016712412238121033, "step": 104550 }, { "epoch": 0.9840470588235294, "grad_norm": 0.4332938658111186, "learning_rate": 2.016630362324475e-06, "loss": 0.01115506887435913, "step": 104555 }, { "epoch": 0.9840941176470588, "grad_norm": 0.5306871870216218, "learning_rate": 2.016582144225802e-06, "loss": 0.014522185921669007, "step": 104560 }, { "epoch": 0.9841411764705882, "grad_norm": 0.516553048543941, "learning_rate": 2.016533929585681e-06, "loss": 0.012831652164459228, "step": 104565 }, { "epoch": 0.9841882352941177, "grad_norm": 0.345508041240156, "learning_rate": 2.0164857184036995e-06, "loss": 0.012176020443439484, "step": 104570 }, { "epoch": 0.9842352941176471, "grad_norm": 1.2006163798222529, "learning_rate": 2.016437510679443e-06, "loss": 0.01801235377788544, "step": 104575 }, { "epoch": 0.9842823529411765, "grad_norm": 0.3674632436569644, "learning_rate": 2.016389306412499e-06, "loss": 0.01476338505744934, "step": 104580 }, { "epoch": 0.9843294117647059, "grad_norm": 0.595568452690844, "learning_rate": 2.016341105602454e-06, "loss": 0.011586153507232666, "step": 104585 }, { "epoch": 0.9843764705882353, "grad_norm": 0.27813600936651117, "learning_rate": 2.016292908248895e-06, "loss": 0.010094988346099853, "step": 104590 }, { "epoch": 0.9844235294117647, "grad_norm": 0.5072378440504598, "learning_rate": 2.016244714351408e-06, "loss": 0.015010738372802734, "step": 104595 }, { "epoch": 0.9844705882352941, "grad_norm": 0.7046220884068182, "learning_rate": 2.016196523909581e-06, "loss": 0.011575470119714737, "step": 104600 }, { "epoch": 0.9845176470588235, "grad_norm": 0.42961974382565304, "learning_rate": 2.016148336923001e-06, "loss": 0.010005302727222443, "step": 104605 }, { "epoch": 0.9845647058823529, "grad_norm": 0.558155170381205, "learning_rate": 2.0161001533912543e-06, "loss": 0.014458820223808289, "step": 104610 }, { "epoch": 0.9846117647058823, "grad_norm": 0.5187937936346815, "learning_rate": 2.0160519733139293e-06, "loss": 0.013123777508735657, "step": 104615 }, { "epoch": 0.9846588235294118, "grad_norm": 0.5311965602251347, "learning_rate": 2.0160037966906123e-06, "loss": 0.012634700536727906, "step": 104620 }, { "epoch": 0.9847058823529412, "grad_norm": 0.3394897705287996, "learning_rate": 2.0159556235208917e-06, "loss": 0.01445443630218506, "step": 104625 }, { "epoch": 0.9847529411764706, "grad_norm": 0.5296159341453309, "learning_rate": 2.0159074538043534e-06, "loss": 0.012277046591043473, "step": 104630 }, { "epoch": 0.9848, "grad_norm": 0.3629013115796939, "learning_rate": 2.0158592875405856e-06, "loss": 0.011067049205303192, "step": 104635 }, { "epoch": 0.9848470588235294, "grad_norm": 0.43725496119159724, "learning_rate": 2.015811124729176e-06, "loss": 0.012656491994857789, "step": 104640 }, { "epoch": 0.9848941176470588, "grad_norm": 0.5882301210689574, "learning_rate": 2.015762965369712e-06, "loss": 0.014522364735603333, "step": 104645 }, { "epoch": 0.9849411764705882, "grad_norm": 0.35699027281641366, "learning_rate": 2.015714809461782e-06, "loss": 0.010272711515426636, "step": 104650 }, { "epoch": 0.9849882352941176, "grad_norm": 0.39888241172470457, "learning_rate": 2.015666657004972e-06, "loss": 0.01059347838163376, "step": 104655 }, { "epoch": 0.985035294117647, "grad_norm": 0.31250724671096103, "learning_rate": 2.0156185079988718e-06, "loss": 0.015790438652038573, "step": 104660 }, { "epoch": 0.9850823529411765, "grad_norm": 0.42981326308402584, "learning_rate": 2.0155703624430673e-06, "loss": 0.01139460951089859, "step": 104665 }, { "epoch": 0.9851294117647059, "grad_norm": 0.5825474879317127, "learning_rate": 2.0155222203371485e-06, "loss": 0.015127763152122498, "step": 104670 }, { "epoch": 0.9851764705882353, "grad_norm": 0.5604813173897742, "learning_rate": 2.015474081680702e-06, "loss": 0.014183731377124786, "step": 104675 }, { "epoch": 0.9852235294117647, "grad_norm": 0.5557592107446963, "learning_rate": 2.0154259464733158e-06, "loss": 0.013499131798744202, "step": 104680 }, { "epoch": 0.9852705882352941, "grad_norm": 0.46963258749800063, "learning_rate": 2.0153778147145794e-06, "loss": 0.011589030921459197, "step": 104685 }, { "epoch": 0.9853176470588235, "grad_norm": 0.4391905764268432, "learning_rate": 2.0153296864040795e-06, "loss": 0.012630981206893922, "step": 104690 }, { "epoch": 0.9853647058823529, "grad_norm": 0.4773259322413928, "learning_rate": 2.0152815615414056e-06, "loss": 0.012203612923622131, "step": 104695 }, { "epoch": 0.9854117647058823, "grad_norm": 0.27389938974730027, "learning_rate": 2.015233440126145e-06, "loss": 0.011758599430322647, "step": 104700 }, { "epoch": 0.9854588235294117, "grad_norm": 0.4547095876551155, "learning_rate": 2.015185322157887e-06, "loss": 0.01408795714378357, "step": 104705 }, { "epoch": 0.9855058823529412, "grad_norm": 0.5902493915694371, "learning_rate": 2.01513720763622e-06, "loss": 0.012693187594413758, "step": 104710 }, { "epoch": 0.9855529411764706, "grad_norm": 0.40135500878623637, "learning_rate": 2.015089096560732e-06, "loss": 0.01543910801410675, "step": 104715 }, { "epoch": 0.9856, "grad_norm": 0.42320195889611795, "learning_rate": 2.0150409889310114e-06, "loss": 0.01691947430372238, "step": 104720 }, { "epoch": 0.9856470588235294, "grad_norm": 0.3664682790830631, "learning_rate": 2.0149928847466483e-06, "loss": 0.012160720676183701, "step": 104725 }, { "epoch": 0.9856941176470588, "grad_norm": 0.5796369150015557, "learning_rate": 2.0149447840072305e-06, "loss": 0.013968372344970703, "step": 104730 }, { "epoch": 0.9857411764705882, "grad_norm": 0.4395144468018838, "learning_rate": 2.0148966867123467e-06, "loss": 0.014435672760009765, "step": 104735 }, { "epoch": 0.9857882352941176, "grad_norm": 0.39381434320350045, "learning_rate": 2.0148485928615857e-06, "loss": 0.013571582734584808, "step": 104740 }, { "epoch": 0.985835294117647, "grad_norm": 0.47259226474234123, "learning_rate": 2.0148005024545375e-06, "loss": 0.013956254720687867, "step": 104745 }, { "epoch": 0.9858823529411764, "grad_norm": 0.2741010176781797, "learning_rate": 2.0147524154907904e-06, "loss": 0.011297520995140076, "step": 104750 }, { "epoch": 0.9859294117647058, "grad_norm": 0.2110331142436689, "learning_rate": 2.0147043319699337e-06, "loss": 0.009534107148647308, "step": 104755 }, { "epoch": 0.9859764705882353, "grad_norm": 0.4961321293926776, "learning_rate": 2.014656251891556e-06, "loss": 0.01578163504600525, "step": 104760 }, { "epoch": 0.9860235294117647, "grad_norm": 0.6223720919729381, "learning_rate": 2.0146081752552473e-06, "loss": 0.013377824425697326, "step": 104765 }, { "epoch": 0.9860705882352941, "grad_norm": 0.38061765163956235, "learning_rate": 2.014560102060597e-06, "loss": 0.009037753939628601, "step": 104770 }, { "epoch": 0.9861176470588235, "grad_norm": 0.4069903038068393, "learning_rate": 2.0145120323071934e-06, "loss": 0.009745196253061295, "step": 104775 }, { "epoch": 0.9861647058823529, "grad_norm": 0.5141946486694512, "learning_rate": 2.0144639659946276e-06, "loss": 0.011340108513832093, "step": 104780 }, { "epoch": 0.9862117647058823, "grad_norm": 0.4079977528376551, "learning_rate": 2.0144159031224877e-06, "loss": 0.013547416031360626, "step": 104785 }, { "epoch": 0.9862588235294117, "grad_norm": 0.5578495647346524, "learning_rate": 2.0143678436903637e-06, "loss": 0.011669791489839553, "step": 104790 }, { "epoch": 0.9863058823529411, "grad_norm": 0.7186514423711216, "learning_rate": 2.0143197876978457e-06, "loss": 0.01250753104686737, "step": 104795 }, { "epoch": 0.9863529411764705, "grad_norm": 0.48678551561689504, "learning_rate": 2.0142717351445233e-06, "loss": 0.013668349385261536, "step": 104800 }, { "epoch": 0.9864, "grad_norm": 0.45728699651452376, "learning_rate": 2.014223686029986e-06, "loss": 0.016039341688156128, "step": 104805 }, { "epoch": 0.9864470588235295, "grad_norm": 0.4101600788759329, "learning_rate": 2.0141756403538237e-06, "loss": 0.017523202300071716, "step": 104810 }, { "epoch": 0.9864941176470589, "grad_norm": 0.6246422782007451, "learning_rate": 2.0141275981156266e-06, "loss": 0.017876330018043517, "step": 104815 }, { "epoch": 0.9865411764705883, "grad_norm": 0.4780531974961049, "learning_rate": 2.0140795593149844e-06, "loss": 0.013623657822608947, "step": 104820 }, { "epoch": 0.9865882352941177, "grad_norm": 0.42940295416822094, "learning_rate": 2.0140315239514876e-06, "loss": 0.012578451633453369, "step": 104825 }, { "epoch": 0.986635294117647, "grad_norm": 0.3195158502934597, "learning_rate": 2.0139834920247257e-06, "loss": 0.01089337319135666, "step": 104830 }, { "epoch": 0.9866823529411765, "grad_norm": 0.31854378226632163, "learning_rate": 2.0139354635342896e-06, "loss": 0.013446171581745148, "step": 104835 }, { "epoch": 0.9867294117647059, "grad_norm": 0.5084518082169394, "learning_rate": 2.013887438479769e-06, "loss": 0.013296255469322204, "step": 104840 }, { "epoch": 0.9867764705882353, "grad_norm": 0.6116425868567626, "learning_rate": 2.013839416860755e-06, "loss": 0.01323682814836502, "step": 104845 }, { "epoch": 0.9868235294117647, "grad_norm": 0.5808019745882884, "learning_rate": 2.013791398676837e-06, "loss": 0.015207350254058838, "step": 104850 }, { "epoch": 0.9868705882352942, "grad_norm": 0.7432739078274394, "learning_rate": 2.0137433839276063e-06, "loss": 0.013668642938137054, "step": 104855 }, { "epoch": 0.9869176470588236, "grad_norm": 0.44884219365774336, "learning_rate": 2.013695372612653e-06, "loss": 0.009679721295833587, "step": 104860 }, { "epoch": 0.986964705882353, "grad_norm": 0.744257676516741, "learning_rate": 2.0136473647315675e-06, "loss": 0.015909469127655028, "step": 104865 }, { "epoch": 0.9870117647058824, "grad_norm": 0.3580928582141662, "learning_rate": 2.013599360283942e-06, "loss": 0.01263374388217926, "step": 104870 }, { "epoch": 0.9870588235294118, "grad_norm": 0.6059125021261348, "learning_rate": 2.0135513592693653e-06, "loss": 0.015432673692703246, "step": 104875 }, { "epoch": 0.9871058823529412, "grad_norm": 0.4721915582149851, "learning_rate": 2.0135033616874296e-06, "loss": 0.01636572480201721, "step": 104880 }, { "epoch": 0.9871529411764706, "grad_norm": 0.4236200625157159, "learning_rate": 2.0134553675377247e-06, "loss": 0.012130215764045715, "step": 104885 }, { "epoch": 0.9872, "grad_norm": 0.6840888519301581, "learning_rate": 2.0134073768198425e-06, "loss": 0.015199406445026398, "step": 104890 }, { "epoch": 0.9872470588235294, "grad_norm": 0.3223428475729423, "learning_rate": 2.0133593895333733e-06, "loss": 0.012313026934862137, "step": 104895 }, { "epoch": 0.9872941176470589, "grad_norm": 0.47761758285716355, "learning_rate": 2.013311405677909e-06, "loss": 0.012779958546161652, "step": 104900 }, { "epoch": 0.9873411764705883, "grad_norm": 0.4724976769867811, "learning_rate": 2.01326342525304e-06, "loss": 0.011390757560729981, "step": 104905 }, { "epoch": 0.9873882352941177, "grad_norm": 0.4219960719762913, "learning_rate": 2.0132154482583584e-06, "loss": 0.016847896575927734, "step": 104910 }, { "epoch": 0.9874352941176471, "grad_norm": 0.4061835887314243, "learning_rate": 2.0131674746934545e-06, "loss": 0.012827256321907043, "step": 104915 }, { "epoch": 0.9874823529411765, "grad_norm": 0.9290674995792229, "learning_rate": 2.01311950455792e-06, "loss": 0.01685362458229065, "step": 104920 }, { "epoch": 0.9875294117647059, "grad_norm": 0.3975509831820532, "learning_rate": 2.0130715378513467e-06, "loss": 0.011814428120851516, "step": 104925 }, { "epoch": 0.9875764705882353, "grad_norm": 0.4134522585428057, "learning_rate": 2.0130235745733263e-06, "loss": 0.01148507446050644, "step": 104930 }, { "epoch": 0.9876235294117647, "grad_norm": 0.5202470218345115, "learning_rate": 2.01297561472345e-06, "loss": 0.014395987987518311, "step": 104935 }, { "epoch": 0.9876705882352941, "grad_norm": 0.48325768178053813, "learning_rate": 2.0129276583013087e-06, "loss": 0.015106037259101868, "step": 104940 }, { "epoch": 0.9877176470588235, "grad_norm": 0.35269457259189596, "learning_rate": 2.012879705306495e-06, "loss": 0.01202123761177063, "step": 104945 }, { "epoch": 0.987764705882353, "grad_norm": 0.5010133612083342, "learning_rate": 2.012831755738601e-06, "loss": 0.013302940130233764, "step": 104950 }, { "epoch": 0.9878117647058824, "grad_norm": 0.7059547959799694, "learning_rate": 2.0127838095972176e-06, "loss": 0.01643287092447281, "step": 104955 }, { "epoch": 0.9878588235294118, "grad_norm": 0.4076413221869414, "learning_rate": 2.012735866881937e-06, "loss": 0.010335075855255126, "step": 104960 }, { "epoch": 0.9879058823529412, "grad_norm": 0.42628312608595265, "learning_rate": 2.0126879275923515e-06, "loss": 0.00956672877073288, "step": 104965 }, { "epoch": 0.9879529411764706, "grad_norm": 0.5477736356716579, "learning_rate": 2.0126399917280535e-06, "loss": 0.012580709159374237, "step": 104970 }, { "epoch": 0.988, "grad_norm": 0.567206401667473, "learning_rate": 2.0125920592886346e-06, "loss": 0.01567177623510361, "step": 104975 }, { "epoch": 0.9880470588235294, "grad_norm": 0.356118291198104, "learning_rate": 2.012544130273687e-06, "loss": 0.010900057852268219, "step": 104980 }, { "epoch": 0.9880941176470588, "grad_norm": 0.38893844882702655, "learning_rate": 2.0124962046828026e-06, "loss": 0.010064782202243805, "step": 104985 }, { "epoch": 0.9881411764705882, "grad_norm": 0.4960757814907879, "learning_rate": 2.012448282515574e-06, "loss": 0.0125652015209198, "step": 104990 }, { "epoch": 0.9881882352941177, "grad_norm": 0.5351479921512173, "learning_rate": 2.0124003637715942e-06, "loss": 0.014377965033054352, "step": 104995 }, { "epoch": 0.9882352941176471, "grad_norm": 0.45865131985886853, "learning_rate": 2.012352448450455e-06, "loss": 0.011673671752214431, "step": 105000 }, { "epoch": 0.9882823529411765, "grad_norm": 0.5246884153149259, "learning_rate": 2.0123045365517494e-06, "loss": 0.012369129806756973, "step": 105005 }, { "epoch": 0.9883294117647059, "grad_norm": 0.8288678101665449, "learning_rate": 2.0122566280750696e-06, "loss": 0.018829885125160217, "step": 105010 }, { "epoch": 0.9883764705882353, "grad_norm": 0.35189739535479314, "learning_rate": 2.0122087230200077e-06, "loss": 0.012090368568897248, "step": 105015 }, { "epoch": 0.9884235294117647, "grad_norm": 0.5016277478015313, "learning_rate": 2.0121608213861577e-06, "loss": 0.012032131850719451, "step": 105020 }, { "epoch": 0.9884705882352941, "grad_norm": 0.6282292624398784, "learning_rate": 2.0121129231731117e-06, "loss": 0.011078354716300965, "step": 105025 }, { "epoch": 0.9885176470588235, "grad_norm": 0.4117116423066276, "learning_rate": 2.0120650283804626e-06, "loss": 0.01112973690032959, "step": 105030 }, { "epoch": 0.9885647058823529, "grad_norm": 0.4412709595279354, "learning_rate": 2.012017137007804e-06, "loss": 0.011703135073184967, "step": 105035 }, { "epoch": 0.9886117647058823, "grad_norm": 0.5627555182479232, "learning_rate": 2.0119692490547273e-06, "loss": 0.016048818826675415, "step": 105040 }, { "epoch": 0.9886588235294118, "grad_norm": 0.38685953933837847, "learning_rate": 2.011921364520827e-06, "loss": 0.014704136550426484, "step": 105045 }, { "epoch": 0.9887058823529412, "grad_norm": 0.3939268012789844, "learning_rate": 2.011873483405696e-06, "loss": 0.010365854203701019, "step": 105050 }, { "epoch": 0.9887529411764706, "grad_norm": 0.5217142561989428, "learning_rate": 2.011825605708927e-06, "loss": 0.017147767543792724, "step": 105055 }, { "epoch": 0.9888, "grad_norm": 0.4069964568207074, "learning_rate": 2.011777731430114e-06, "loss": 0.017472396790981292, "step": 105060 }, { "epoch": 0.9888470588235294, "grad_norm": 0.5232894107251187, "learning_rate": 2.01172986056885e-06, "loss": 0.019072341918945312, "step": 105065 }, { "epoch": 0.9888941176470588, "grad_norm": 0.5716975107567969, "learning_rate": 2.011681993124728e-06, "loss": 0.013087660074234009, "step": 105070 }, { "epoch": 0.9889411764705882, "grad_norm": 0.3946279569627138, "learning_rate": 2.0116341290973423e-06, "loss": 0.011771403998136521, "step": 105075 }, { "epoch": 0.9889882352941176, "grad_norm": 0.37012269416327365, "learning_rate": 2.0115862684862855e-06, "loss": 0.015256541967391967, "step": 105080 }, { "epoch": 0.989035294117647, "grad_norm": 0.4295123235809128, "learning_rate": 2.0115384112911517e-06, "loss": 0.01417582929134369, "step": 105085 }, { "epoch": 0.9890823529411765, "grad_norm": 0.446218444821396, "learning_rate": 2.0114905575115347e-06, "loss": 0.014979937672615051, "step": 105090 }, { "epoch": 0.9891294117647059, "grad_norm": 0.2455323122015339, "learning_rate": 2.011442707147028e-06, "loss": 0.01147204488515854, "step": 105095 }, { "epoch": 0.9891764705882353, "grad_norm": 0.490034586395081, "learning_rate": 2.011394860197226e-06, "loss": 0.012856315076351165, "step": 105100 }, { "epoch": 0.9892235294117647, "grad_norm": 0.49197215653607446, "learning_rate": 2.011347016661722e-06, "loss": 0.012133727967739105, "step": 105105 }, { "epoch": 0.9892705882352941, "grad_norm": 0.5574466082639794, "learning_rate": 2.01129917654011e-06, "loss": 0.013774201273918152, "step": 105110 }, { "epoch": 0.9893176470588235, "grad_norm": 0.613147129923621, "learning_rate": 2.011251339831984e-06, "loss": 0.011766527593135834, "step": 105115 }, { "epoch": 0.9893647058823529, "grad_norm": 0.44934026103259134, "learning_rate": 2.0112035065369385e-06, "loss": 0.01421428918838501, "step": 105120 }, { "epoch": 0.9894117647058823, "grad_norm": 0.40916781456319595, "learning_rate": 2.011155676654567e-06, "loss": 0.013998159766197204, "step": 105125 }, { "epoch": 0.9894588235294117, "grad_norm": 0.4171652266004332, "learning_rate": 2.0111078501844643e-06, "loss": 0.011904288083314896, "step": 105130 }, { "epoch": 0.9895058823529411, "grad_norm": 0.47798115065693303, "learning_rate": 2.011060027126224e-06, "loss": 0.012392181158065795, "step": 105135 }, { "epoch": 0.9895529411764706, "grad_norm": 0.5323317809648236, "learning_rate": 2.011012207479441e-06, "loss": 0.01166006624698639, "step": 105140 }, { "epoch": 0.9896, "grad_norm": 0.4452937415477242, "learning_rate": 2.010964391243711e-06, "loss": 0.012343303114175797, "step": 105145 }, { "epoch": 0.9896470588235294, "grad_norm": 0.5280039142521957, "learning_rate": 2.0109165784186256e-06, "loss": 0.01782446801662445, "step": 105150 }, { "epoch": 0.9896941176470588, "grad_norm": 0.8464520996917116, "learning_rate": 2.0108687690037814e-06, "loss": 0.013726292550563813, "step": 105155 }, { "epoch": 0.9897411764705882, "grad_norm": 0.5712398704394694, "learning_rate": 2.0108209629987724e-06, "loss": 0.012560033798217773, "step": 105160 }, { "epoch": 0.9897882352941176, "grad_norm": 0.5385499217158587, "learning_rate": 2.010773160403194e-06, "loss": 0.01205836609005928, "step": 105165 }, { "epoch": 0.989835294117647, "grad_norm": 0.4905492885917941, "learning_rate": 2.0107253612166398e-06, "loss": 0.011538040637969971, "step": 105170 }, { "epoch": 0.9898823529411764, "grad_norm": 0.2957780697202806, "learning_rate": 2.0106775654387054e-06, "loss": 0.01165432557463646, "step": 105175 }, { "epoch": 0.9899294117647058, "grad_norm": 0.6278442797625278, "learning_rate": 2.010629773068985e-06, "loss": 0.012190169841051101, "step": 105180 }, { "epoch": 0.9899764705882353, "grad_norm": 0.44091563925984656, "learning_rate": 2.010581984107075e-06, "loss": 0.020826703310012816, "step": 105185 }, { "epoch": 0.9900235294117647, "grad_norm": 0.66011887612407, "learning_rate": 2.0105341985525694e-06, "loss": 0.01462528109550476, "step": 105190 }, { "epoch": 0.9900705882352941, "grad_norm": 0.2659951872945782, "learning_rate": 2.010486416405063e-06, "loss": 0.013636729121208191, "step": 105195 }, { "epoch": 0.9901176470588235, "grad_norm": 0.3971514264059058, "learning_rate": 2.0104386376641515e-06, "loss": 0.012001907825469971, "step": 105200 }, { "epoch": 0.990164705882353, "grad_norm": 0.49590503041773754, "learning_rate": 2.01039086232943e-06, "loss": 0.016996920108795166, "step": 105205 }, { "epoch": 0.9902117647058823, "grad_norm": 0.535361236847954, "learning_rate": 2.0103430904004936e-06, "loss": 0.013227291405200958, "step": 105210 }, { "epoch": 0.9902588235294117, "grad_norm": 0.526730452484602, "learning_rate": 2.010295321876938e-06, "loss": 0.012735565006732941, "step": 105215 }, { "epoch": 0.9903058823529411, "grad_norm": 0.5376543187991701, "learning_rate": 2.0102475567583587e-06, "loss": 0.01164073646068573, "step": 105220 }, { "epoch": 0.9903529411764705, "grad_norm": 0.293481029374603, "learning_rate": 2.010199795044351e-06, "loss": 0.009800928831100463, "step": 105225 }, { "epoch": 0.9904, "grad_norm": 0.3705742716458205, "learning_rate": 2.0101520367345105e-06, "loss": 0.013424350321292878, "step": 105230 }, { "epoch": 0.9904470588235295, "grad_norm": 0.5430689839408009, "learning_rate": 2.0101042818284327e-06, "loss": 0.015353353321552276, "step": 105235 }, { "epoch": 0.9904941176470589, "grad_norm": 0.6396236069970792, "learning_rate": 2.010056530325714e-06, "loss": 0.013306920230388642, "step": 105240 }, { "epoch": 0.9905411764705883, "grad_norm": 0.3982859132417783, "learning_rate": 2.010008782225949e-06, "loss": 0.011012870818376541, "step": 105245 }, { "epoch": 0.9905882352941177, "grad_norm": 0.5030910169383744, "learning_rate": 2.0099610375287343e-06, "loss": 0.012749041616916656, "step": 105250 }, { "epoch": 0.9906352941176471, "grad_norm": 0.2398664292168954, "learning_rate": 2.009913296233666e-06, "loss": 0.015123915672302247, "step": 105255 }, { "epoch": 0.9906823529411765, "grad_norm": 0.3774231734427318, "learning_rate": 2.0098655583403393e-06, "loss": 0.012974628806114196, "step": 105260 }, { "epoch": 0.9907294117647059, "grad_norm": 0.5540757743949, "learning_rate": 2.009817823848351e-06, "loss": 0.011985665559768677, "step": 105265 }, { "epoch": 0.9907764705882353, "grad_norm": 0.3965724805878229, "learning_rate": 2.0097700927572963e-06, "loss": 0.01560128927230835, "step": 105270 }, { "epoch": 0.9908235294117647, "grad_norm": 0.4416867138936635, "learning_rate": 2.0097223650667726e-06, "loss": 0.027248847484588622, "step": 105275 }, { "epoch": 0.9908705882352942, "grad_norm": 0.3023007057159056, "learning_rate": 2.009674640776375e-06, "loss": 0.011548824608325958, "step": 105280 }, { "epoch": 0.9909176470588236, "grad_norm": 0.5738342871781605, "learning_rate": 2.0096269198857006e-06, "loss": 0.017442482709884643, "step": 105285 }, { "epoch": 0.990964705882353, "grad_norm": 0.5800867234215316, "learning_rate": 2.009579202394346e-06, "loss": 0.012088167667388915, "step": 105290 }, { "epoch": 0.9910117647058824, "grad_norm": 0.323398561494134, "learning_rate": 2.0095314883019065e-06, "loss": 0.011844833940267563, "step": 105295 }, { "epoch": 0.9910588235294118, "grad_norm": 0.520374573510304, "learning_rate": 2.0094837776079794e-06, "loss": 0.014401060342788697, "step": 105300 }, { "epoch": 0.9911058823529412, "grad_norm": 0.38518932313956783, "learning_rate": 2.0094360703121612e-06, "loss": 0.012086665630340577, "step": 105305 }, { "epoch": 0.9911529411764706, "grad_norm": 0.3334433105192284, "learning_rate": 2.0093883664140485e-06, "loss": 0.012438236176967621, "step": 105310 }, { "epoch": 0.9912, "grad_norm": 0.6383389593973763, "learning_rate": 2.009340665913238e-06, "loss": 0.01697591841220856, "step": 105315 }, { "epoch": 0.9912470588235294, "grad_norm": 0.6369048112572008, "learning_rate": 2.009292968809326e-06, "loss": 0.0185156911611557, "step": 105320 }, { "epoch": 0.9912941176470588, "grad_norm": 0.5075615199592871, "learning_rate": 2.00924527510191e-06, "loss": 0.014450643956661225, "step": 105325 }, { "epoch": 0.9913411764705883, "grad_norm": 0.43067552021464933, "learning_rate": 2.009197584790587e-06, "loss": 0.011843246966600418, "step": 105330 }, { "epoch": 0.9913882352941177, "grad_norm": 0.5533862154933998, "learning_rate": 2.0091498978749537e-06, "loss": 0.012669771909713745, "step": 105335 }, { "epoch": 0.9914352941176471, "grad_norm": 0.34339992050657786, "learning_rate": 2.009102214354607e-06, "loss": 0.013474373519420624, "step": 105340 }, { "epoch": 0.9914823529411765, "grad_norm": 0.4344649681612908, "learning_rate": 2.009054534229144e-06, "loss": 0.011082985997200012, "step": 105345 }, { "epoch": 0.9915294117647059, "grad_norm": 0.9565482311787692, "learning_rate": 2.009006857498163e-06, "loss": 0.01579917222261429, "step": 105350 }, { "epoch": 0.9915764705882353, "grad_norm": 0.4821335224939444, "learning_rate": 2.008959184161259e-06, "loss": 0.011444851756095886, "step": 105355 }, { "epoch": 0.9916235294117647, "grad_norm": 0.6443028951736625, "learning_rate": 2.0089115142180314e-06, "loss": 0.01477188766002655, "step": 105360 }, { "epoch": 0.9916705882352941, "grad_norm": 0.5001828345367985, "learning_rate": 2.0088638476680763e-06, "loss": 0.013088501989841461, "step": 105365 }, { "epoch": 0.9917176470588235, "grad_norm": 0.5327477298561497, "learning_rate": 2.008816184510992e-06, "loss": 0.017636337876319887, "step": 105370 }, { "epoch": 0.991764705882353, "grad_norm": 0.45864085434149165, "learning_rate": 2.0087685247463753e-06, "loss": 0.009720128029584885, "step": 105375 }, { "epoch": 0.9918117647058824, "grad_norm": 0.5761275953569254, "learning_rate": 2.0087208683738247e-06, "loss": 0.01039668619632721, "step": 105380 }, { "epoch": 0.9918588235294118, "grad_norm": 0.43714714102096197, "learning_rate": 2.008673215392937e-06, "loss": 0.014752620458602905, "step": 105385 }, { "epoch": 0.9919058823529412, "grad_norm": 0.37022070619831626, "learning_rate": 2.0086255658033104e-06, "loss": 0.013710981607437134, "step": 105390 }, { "epoch": 0.9919529411764706, "grad_norm": 0.7309089068531085, "learning_rate": 2.0085779196045424e-06, "loss": 0.016908109188079834, "step": 105395 }, { "epoch": 0.992, "grad_norm": 0.38726173046096873, "learning_rate": 2.0085302767962305e-06, "loss": 0.013603213429450988, "step": 105400 }, { "epoch": 0.9920470588235294, "grad_norm": 0.46981732774909113, "learning_rate": 2.0084826373779736e-06, "loss": 0.013816720247268677, "step": 105405 }, { "epoch": 0.9920941176470588, "grad_norm": 0.4258861369519994, "learning_rate": 2.008435001349369e-06, "loss": 0.01037687435746193, "step": 105410 }, { "epoch": 0.9921411764705882, "grad_norm": 0.5700589430489087, "learning_rate": 2.008387368710015e-06, "loss": 0.015536509454250336, "step": 105415 }, { "epoch": 0.9921882352941176, "grad_norm": 0.233899752005844, "learning_rate": 2.008339739459509e-06, "loss": 0.016602307558059692, "step": 105420 }, { "epoch": 0.9922352941176471, "grad_norm": 0.47148432451339956, "learning_rate": 2.0082921135974504e-06, "loss": 0.012264910340309142, "step": 105425 }, { "epoch": 0.9922823529411765, "grad_norm": 0.37040529248538817, "learning_rate": 2.0082444911234367e-06, "loss": 0.011221843957901, "step": 105430 }, { "epoch": 0.9923294117647059, "grad_norm": 0.7774897891527334, "learning_rate": 2.0081968720370664e-06, "loss": 0.017835763096809388, "step": 105435 }, { "epoch": 0.9923764705882353, "grad_norm": 0.30909294185534375, "learning_rate": 2.008149256337938e-06, "loss": 0.01133432388305664, "step": 105440 }, { "epoch": 0.9924235294117647, "grad_norm": 0.6237585460151911, "learning_rate": 2.0081016440256493e-06, "loss": 0.016651976108551025, "step": 105445 }, { "epoch": 0.9924705882352941, "grad_norm": 0.6193678413677437, "learning_rate": 2.0080540350997997e-06, "loss": 0.012958815693855286, "step": 105450 }, { "epoch": 0.9925176470588235, "grad_norm": 0.4959299904208443, "learning_rate": 2.008006429559987e-06, "loss": 0.012891653180122375, "step": 105455 }, { "epoch": 0.9925647058823529, "grad_norm": 0.5688482882823441, "learning_rate": 2.007958827405811e-06, "loss": 0.01405709534883499, "step": 105460 }, { "epoch": 0.9926117647058823, "grad_norm": 0.7132625676564305, "learning_rate": 2.007911228636869e-06, "loss": 0.011704627424478531, "step": 105465 }, { "epoch": 0.9926588235294118, "grad_norm": 0.32864652387853427, "learning_rate": 2.0078636332527605e-06, "loss": 0.010844504833221436, "step": 105470 }, { "epoch": 0.9927058823529412, "grad_norm": 0.46712165976460374, "learning_rate": 2.0078160412530847e-06, "loss": 0.011279627680778503, "step": 105475 }, { "epoch": 0.9927529411764706, "grad_norm": 0.7101281225679453, "learning_rate": 2.0077684526374397e-06, "loss": 0.02027570307254791, "step": 105480 }, { "epoch": 0.9928, "grad_norm": 0.31114669496882574, "learning_rate": 2.0077208674054245e-06, "loss": 0.0182982474565506, "step": 105485 }, { "epoch": 0.9928470588235294, "grad_norm": 0.425528833581031, "learning_rate": 2.007673285556639e-06, "loss": 0.012307057529687882, "step": 105490 }, { "epoch": 0.9928941176470588, "grad_norm": 0.36147421707027777, "learning_rate": 2.0076257070906823e-06, "loss": 0.014020058512687682, "step": 105495 }, { "epoch": 0.9929411764705882, "grad_norm": 0.8646738962306341, "learning_rate": 2.007578132007153e-06, "loss": 0.012850596010684967, "step": 105500 }, { "epoch": 0.9929882352941176, "grad_norm": 0.7528140608533789, "learning_rate": 2.00753056030565e-06, "loss": 0.009400077909231187, "step": 105505 }, { "epoch": 0.993035294117647, "grad_norm": 0.4303260558660617, "learning_rate": 2.007482991985773e-06, "loss": 0.011309491842985154, "step": 105510 }, { "epoch": 0.9930823529411764, "grad_norm": 0.7220752131324887, "learning_rate": 2.007435427047122e-06, "loss": 0.014766669273376465, "step": 105515 }, { "epoch": 0.9931294117647059, "grad_norm": 0.5892696051887466, "learning_rate": 2.0073878654892957e-06, "loss": 0.0115917406976223, "step": 105520 }, { "epoch": 0.9931764705882353, "grad_norm": 0.4943479658715732, "learning_rate": 2.007340307311894e-06, "loss": 0.01117093414068222, "step": 105525 }, { "epoch": 0.9932235294117647, "grad_norm": 0.43076532835872267, "learning_rate": 2.007292752514516e-06, "loss": 0.01895040273666382, "step": 105530 }, { "epoch": 0.9932705882352941, "grad_norm": 0.45756943874688966, "learning_rate": 2.007245201096762e-06, "loss": 0.016081027686595917, "step": 105535 }, { "epoch": 0.9933176470588235, "grad_norm": 0.38702368310248353, "learning_rate": 2.0071976530582317e-06, "loss": 0.011340780556201935, "step": 105540 }, { "epoch": 0.9933647058823529, "grad_norm": 0.45895207441436053, "learning_rate": 2.007150108398524e-06, "loss": 0.01334151327610016, "step": 105545 }, { "epoch": 0.9934117647058823, "grad_norm": 0.4853164214439299, "learning_rate": 2.0071025671172394e-06, "loss": 0.016467219591140746, "step": 105550 }, { "epoch": 0.9934588235294117, "grad_norm": 0.48055720022930715, "learning_rate": 2.007055029213978e-06, "loss": 0.009722907841205598, "step": 105555 }, { "epoch": 0.9935058823529411, "grad_norm": 0.4103446028908643, "learning_rate": 2.007007494688339e-06, "loss": 0.01154838427901268, "step": 105560 }, { "epoch": 0.9935529411764706, "grad_norm": 0.2628140800414002, "learning_rate": 2.006959963539923e-06, "loss": 0.011591468751430512, "step": 105565 }, { "epoch": 0.9936, "grad_norm": 0.6345404734858615, "learning_rate": 2.0069124357683306e-06, "loss": 0.013524645566940307, "step": 105570 }, { "epoch": 0.9936470588235294, "grad_norm": 0.35496464673100103, "learning_rate": 2.0068649113731613e-06, "loss": 0.012571227550506592, "step": 105575 }, { "epoch": 0.9936941176470588, "grad_norm": 0.43977372621666333, "learning_rate": 2.006817390354015e-06, "loss": 0.012237223982810973, "step": 105580 }, { "epoch": 0.9937411764705882, "grad_norm": 0.7295130774415519, "learning_rate": 2.006769872710493e-06, "loss": 0.017214393615722655, "step": 105585 }, { "epoch": 0.9937882352941176, "grad_norm": 0.489295074161676, "learning_rate": 2.006722358442195e-06, "loss": 0.012689313292503357, "step": 105590 }, { "epoch": 0.993835294117647, "grad_norm": 0.2483520008054118, "learning_rate": 2.006674847548722e-06, "loss": 0.011458638310432433, "step": 105595 }, { "epoch": 0.9938823529411764, "grad_norm": 0.8872237110582352, "learning_rate": 2.0066273400296733e-06, "loss": 0.01615799367427826, "step": 105600 }, { "epoch": 0.9939294117647058, "grad_norm": 0.3601216007258531, "learning_rate": 2.006579835884651e-06, "loss": 0.011939189583063125, "step": 105605 }, { "epoch": 0.9939764705882352, "grad_norm": 0.4874259660991697, "learning_rate": 2.006532335113255e-06, "loss": 0.01306101530790329, "step": 105610 }, { "epoch": 0.9940235294117648, "grad_norm": 0.429462418884334, "learning_rate": 2.0064848377150857e-06, "loss": 0.011917696893215179, "step": 105615 }, { "epoch": 0.9940705882352942, "grad_norm": 0.7634365492402035, "learning_rate": 2.0064373436897444e-06, "loss": 0.01244674101471901, "step": 105620 }, { "epoch": 0.9941176470588236, "grad_norm": 0.42912047571463313, "learning_rate": 2.006389853036832e-06, "loss": 0.01196587085723877, "step": 105625 }, { "epoch": 0.994164705882353, "grad_norm": 0.59578937712337, "learning_rate": 2.0063423657559485e-06, "loss": 0.012880465388298035, "step": 105630 }, { "epoch": 0.9942117647058824, "grad_norm": 0.5109311634487664, "learning_rate": 2.0062948818466964e-06, "loss": 0.016208389401435853, "step": 105635 }, { "epoch": 0.9942588235294118, "grad_norm": 0.4157093348373105, "learning_rate": 2.006247401308675e-06, "loss": 0.014552932977676392, "step": 105640 }, { "epoch": 0.9943058823529412, "grad_norm": 0.4451908707231337, "learning_rate": 2.0061999241414872e-06, "loss": 0.011286021769046783, "step": 105645 }, { "epoch": 0.9943529411764706, "grad_norm": 0.41405631576369084, "learning_rate": 2.0061524503447327e-06, "loss": 0.013357079029083252, "step": 105650 }, { "epoch": 0.9944, "grad_norm": 0.34892070755312704, "learning_rate": 2.006104979918013e-06, "loss": 0.0127469003200531, "step": 105655 }, { "epoch": 0.9944470588235295, "grad_norm": 0.7023569583385301, "learning_rate": 2.0060575128609304e-06, "loss": 0.014583709836006164, "step": 105660 }, { "epoch": 0.9944941176470589, "grad_norm": 0.7606746635840792, "learning_rate": 2.006010049173085e-06, "loss": 0.016396340727806092, "step": 105665 }, { "epoch": 0.9945411764705883, "grad_norm": 0.4207576671266851, "learning_rate": 2.0059625888540796e-06, "loss": 0.012760394811630249, "step": 105670 }, { "epoch": 0.9945882352941177, "grad_norm": 0.5169812562459188, "learning_rate": 2.0059151319035145e-06, "loss": 0.012188461422920228, "step": 105675 }, { "epoch": 0.9946352941176471, "grad_norm": 0.5198222107104876, "learning_rate": 2.005867678320991e-06, "loss": 0.014213880896568299, "step": 105680 }, { "epoch": 0.9946823529411765, "grad_norm": 0.7360028717486942, "learning_rate": 2.0058202281061125e-06, "loss": 0.020999701321125032, "step": 105685 }, { "epoch": 0.9947294117647059, "grad_norm": 0.40484326427836814, "learning_rate": 2.005772781258479e-06, "loss": 0.00804576426744461, "step": 105690 }, { "epoch": 0.9947764705882353, "grad_norm": 0.3849217117191652, "learning_rate": 2.005725337777693e-06, "loss": 0.012976188957691193, "step": 105695 }, { "epoch": 0.9948235294117647, "grad_norm": 0.34608501969960725, "learning_rate": 2.0056778976633563e-06, "loss": 0.011384108662605285, "step": 105700 }, { "epoch": 0.9948705882352941, "grad_norm": 0.553607491837698, "learning_rate": 2.0056304609150703e-06, "loss": 0.013420520722866059, "step": 105705 }, { "epoch": 0.9949176470588236, "grad_norm": 0.4869626740209097, "learning_rate": 2.005583027532438e-06, "loss": 0.015537141263484955, "step": 105710 }, { "epoch": 0.994964705882353, "grad_norm": 0.40364836959135036, "learning_rate": 2.0055355975150604e-06, "loss": 0.012761196494102478, "step": 105715 }, { "epoch": 0.9950117647058824, "grad_norm": 0.47411184351514, "learning_rate": 2.0054881708625398e-06, "loss": 0.011018854379653931, "step": 105720 }, { "epoch": 0.9950588235294118, "grad_norm": 0.3686490066825006, "learning_rate": 2.0054407475744785e-06, "loss": 0.012857908010482788, "step": 105725 }, { "epoch": 0.9951058823529412, "grad_norm": 0.3643932997075854, "learning_rate": 2.0053933276504793e-06, "loss": 0.013327434659004211, "step": 105730 }, { "epoch": 0.9951529411764706, "grad_norm": 0.8285111677910015, "learning_rate": 2.0053459110901437e-06, "loss": 0.013708779215812683, "step": 105735 }, { "epoch": 0.9952, "grad_norm": 0.5976003957915698, "learning_rate": 2.0052984978930743e-06, "loss": 0.012527337670326233, "step": 105740 }, { "epoch": 0.9952470588235294, "grad_norm": 0.48688559698789796, "learning_rate": 2.0052510880588736e-06, "loss": 0.012910556793212891, "step": 105745 }, { "epoch": 0.9952941176470588, "grad_norm": 0.40726050816893883, "learning_rate": 2.0052036815871438e-06, "loss": 0.012830027937889099, "step": 105750 }, { "epoch": 0.9953411764705883, "grad_norm": 0.48871006095245745, "learning_rate": 2.0051562784774876e-06, "loss": 0.012440756708383561, "step": 105755 }, { "epoch": 0.9953882352941177, "grad_norm": 0.5256121141504635, "learning_rate": 2.0051088787295083e-06, "loss": 0.01697709858417511, "step": 105760 }, { "epoch": 0.9954352941176471, "grad_norm": 0.4388904896817111, "learning_rate": 2.0050614823428077e-06, "loss": 0.012468241900205613, "step": 105765 }, { "epoch": 0.9954823529411765, "grad_norm": 0.5659654896612978, "learning_rate": 2.0050140893169886e-06, "loss": 0.014117482304573058, "step": 105770 }, { "epoch": 0.9955294117647059, "grad_norm": 0.2844498278571424, "learning_rate": 2.0049666996516545e-06, "loss": 0.011400794982910157, "step": 105775 }, { "epoch": 0.9955764705882353, "grad_norm": 0.3772158797921649, "learning_rate": 2.0049193133464074e-06, "loss": 0.012627819180488586, "step": 105780 }, { "epoch": 0.9956235294117647, "grad_norm": 0.5563460647178035, "learning_rate": 2.004871930400851e-06, "loss": 0.015019567310810089, "step": 105785 }, { "epoch": 0.9956705882352941, "grad_norm": 0.41034114292737883, "learning_rate": 2.004824550814588e-06, "loss": 0.020029842853546143, "step": 105790 }, { "epoch": 0.9957176470588235, "grad_norm": 0.766916697554071, "learning_rate": 2.0047771745872215e-06, "loss": 0.01226361021399498, "step": 105795 }, { "epoch": 0.995764705882353, "grad_norm": 0.5202977060041327, "learning_rate": 2.0047298017183543e-06, "loss": 0.012661102414131164, "step": 105800 }, { "epoch": 0.9958117647058824, "grad_norm": 0.37860368394252186, "learning_rate": 2.0046824322075903e-06, "loss": 0.01035071536898613, "step": 105805 }, { "epoch": 0.9958588235294118, "grad_norm": 0.3578257580415536, "learning_rate": 2.004635066054532e-06, "loss": 0.013531780242919922, "step": 105810 }, { "epoch": 0.9959058823529412, "grad_norm": 0.36198054879110697, "learning_rate": 2.0045877032587836e-06, "loss": 0.009830029308795929, "step": 105815 }, { "epoch": 0.9959529411764706, "grad_norm": 0.5156004867388239, "learning_rate": 2.004540343819948e-06, "loss": 0.012281826883554458, "step": 105820 }, { "epoch": 0.996, "grad_norm": 0.3546131187566936, "learning_rate": 2.004492987737629e-06, "loss": 0.009601674973964691, "step": 105825 }, { "epoch": 0.9960470588235294, "grad_norm": 0.37234948455649713, "learning_rate": 2.0044456350114297e-06, "loss": 0.011264188587665558, "step": 105830 }, { "epoch": 0.9960941176470588, "grad_norm": 0.39917424777884525, "learning_rate": 2.004398285640954e-06, "loss": 0.011928142607212066, "step": 105835 }, { "epoch": 0.9961411764705882, "grad_norm": 0.5769354305177194, "learning_rate": 2.004350939625805e-06, "loss": 0.012737850844860076, "step": 105840 }, { "epoch": 0.9961882352941176, "grad_norm": 0.6183428181293238, "learning_rate": 2.004303596965587e-06, "loss": 0.016288572549819948, "step": 105845 }, { "epoch": 0.9962352941176471, "grad_norm": 0.5229293325528942, "learning_rate": 2.0042562576599044e-06, "loss": 0.016361913084983824, "step": 105850 }, { "epoch": 0.9962823529411765, "grad_norm": 0.5340741281351573, "learning_rate": 2.00420892170836e-06, "loss": 0.012615856528282166, "step": 105855 }, { "epoch": 0.9963294117647059, "grad_norm": 0.5605303409921127, "learning_rate": 2.0041615891105582e-06, "loss": 0.01543067693710327, "step": 105860 }, { "epoch": 0.9963764705882353, "grad_norm": 0.41805546113889785, "learning_rate": 2.0041142598661024e-06, "loss": 0.010603854805231095, "step": 105865 }, { "epoch": 0.9964235294117647, "grad_norm": 0.5915892285078084, "learning_rate": 2.0040669339745976e-06, "loss": 0.01587857902050018, "step": 105870 }, { "epoch": 0.9964705882352941, "grad_norm": 0.40931793377764214, "learning_rate": 2.0040196114356476e-06, "loss": 0.014731435477733612, "step": 105875 }, { "epoch": 0.9965176470588235, "grad_norm": 0.8034657321631137, "learning_rate": 2.0039722922488562e-06, "loss": 0.017265260219573975, "step": 105880 }, { "epoch": 0.9965647058823529, "grad_norm": 0.36509159753204007, "learning_rate": 2.003924976413828e-06, "loss": 0.012985315918922425, "step": 105885 }, { "epoch": 0.9966117647058823, "grad_norm": 0.3161273470877407, "learning_rate": 2.0038776639301675e-06, "loss": 0.01294635534286499, "step": 105890 }, { "epoch": 0.9966588235294118, "grad_norm": 0.4753663516471245, "learning_rate": 2.003830354797479e-06, "loss": 0.01461004912853241, "step": 105895 }, { "epoch": 0.9967058823529412, "grad_norm": 0.5578272484087121, "learning_rate": 2.0037830490153665e-06, "loss": 0.013992299139499665, "step": 105900 }, { "epoch": 0.9967529411764706, "grad_norm": 0.6526973480415956, "learning_rate": 2.003735746583435e-06, "loss": 0.014441008865833282, "step": 105905 }, { "epoch": 0.9968, "grad_norm": 0.4439376654931226, "learning_rate": 2.003688447501289e-06, "loss": 0.010285395383834838, "step": 105910 }, { "epoch": 0.9968470588235294, "grad_norm": 0.4250591656951809, "learning_rate": 2.003641151768533e-06, "loss": 0.014048248529434204, "step": 105915 }, { "epoch": 0.9968941176470588, "grad_norm": 0.4304930741376164, "learning_rate": 2.003593859384772e-06, "loss": 0.014025206863880157, "step": 105920 }, { "epoch": 0.9969411764705882, "grad_norm": 0.3284398061777379, "learning_rate": 2.00354657034961e-06, "loss": 0.014166250824928284, "step": 105925 }, { "epoch": 0.9969882352941176, "grad_norm": 0.34179094438140273, "learning_rate": 2.003499284662653e-06, "loss": 0.01255355030298233, "step": 105930 }, { "epoch": 0.997035294117647, "grad_norm": 0.40988773628466013, "learning_rate": 2.0034520023235053e-06, "loss": 0.011549909412860871, "step": 105935 }, { "epoch": 0.9970823529411764, "grad_norm": 0.42596103401912105, "learning_rate": 2.003404723331772e-06, "loss": 0.01300358921289444, "step": 105940 }, { "epoch": 0.9971294117647059, "grad_norm": 0.44192615337125957, "learning_rate": 2.0033574476870577e-06, "loss": 0.012873673439025879, "step": 105945 }, { "epoch": 0.9971764705882353, "grad_norm": 0.372497116818631, "learning_rate": 2.0033101753889682e-06, "loss": 0.012141145765781403, "step": 105950 }, { "epoch": 0.9972235294117647, "grad_norm": 0.6850861448750301, "learning_rate": 2.0032629064371082e-06, "loss": 0.01365777999162674, "step": 105955 }, { "epoch": 0.9972705882352941, "grad_norm": 0.544552066014147, "learning_rate": 2.0032156408310834e-06, "loss": 0.014344790577888488, "step": 105960 }, { "epoch": 0.9973176470588235, "grad_norm": 0.4867446753063327, "learning_rate": 2.0031683785704987e-06, "loss": 0.008600294589996338, "step": 105965 }, { "epoch": 0.9973647058823529, "grad_norm": 0.2906580093369369, "learning_rate": 2.0031211196549597e-06, "loss": 0.011761468648910523, "step": 105970 }, { "epoch": 0.9974117647058823, "grad_norm": 0.37349942841548817, "learning_rate": 2.0030738640840714e-06, "loss": 0.014331331849098206, "step": 105975 }, { "epoch": 0.9974588235294117, "grad_norm": 0.35596323478923203, "learning_rate": 2.00302661185744e-06, "loss": 0.010049465298652648, "step": 105980 }, { "epoch": 0.9975058823529411, "grad_norm": 0.3900954778969125, "learning_rate": 2.0029793629746707e-06, "loss": 0.015265309810638427, "step": 105985 }, { "epoch": 0.9975529411764706, "grad_norm": 0.5860127871412476, "learning_rate": 2.0029321174353684e-06, "loss": 0.01517365425825119, "step": 105990 }, { "epoch": 0.9976, "grad_norm": 0.6671414813564304, "learning_rate": 2.00288487523914e-06, "loss": 0.013301317393779755, "step": 105995 }, { "epoch": 0.9976470588235294, "grad_norm": 1.0411586444744334, "learning_rate": 2.0028376363855913e-06, "loss": 0.014131098985671997, "step": 106000 }, { "epoch": 0.9976941176470588, "grad_norm": 0.3446281338163378, "learning_rate": 2.002790400874327e-06, "loss": 0.012580202519893646, "step": 106005 }, { "epoch": 0.9977411764705882, "grad_norm": 0.5198586369231452, "learning_rate": 2.0027431687049535e-06, "loss": 0.010719786584377288, "step": 106010 }, { "epoch": 0.9977882352941176, "grad_norm": 0.5947904622585134, "learning_rate": 2.002695939877077e-06, "loss": 0.011149027943611145, "step": 106015 }, { "epoch": 0.997835294117647, "grad_norm": 0.5604810938218698, "learning_rate": 2.0026487143903035e-06, "loss": 0.011763030290603637, "step": 106020 }, { "epoch": 0.9978823529411764, "grad_norm": 0.5435499131722951, "learning_rate": 2.002601492244239e-06, "loss": 0.012749436497688293, "step": 106025 }, { "epoch": 0.9979294117647058, "grad_norm": 0.4628907217504451, "learning_rate": 2.002554273438489e-06, "loss": 0.01415371596813202, "step": 106030 }, { "epoch": 0.9979764705882352, "grad_norm": 0.4145886702157734, "learning_rate": 2.002507057972661e-06, "loss": 0.014466321468353272, "step": 106035 }, { "epoch": 0.9980235294117648, "grad_norm": 0.4908681862829866, "learning_rate": 2.0024598458463606e-06, "loss": 0.014415717124938965, "step": 106040 }, { "epoch": 0.9980705882352942, "grad_norm": 0.5840209640723721, "learning_rate": 2.002412637059194e-06, "loss": 0.010435253381729126, "step": 106045 }, { "epoch": 0.9981176470588236, "grad_norm": 0.703415990598963, "learning_rate": 2.002365431610768e-06, "loss": 0.01205701231956482, "step": 106050 }, { "epoch": 0.998164705882353, "grad_norm": 0.39912183000142654, "learning_rate": 2.0023182295006886e-06, "loss": 0.013841789960861207, "step": 106055 }, { "epoch": 0.9982117647058824, "grad_norm": 0.41621363546039836, "learning_rate": 2.0022710307285623e-06, "loss": 0.0109270378947258, "step": 106060 }, { "epoch": 0.9982588235294118, "grad_norm": 0.35506183959190085, "learning_rate": 2.0022238352939965e-06, "loss": 0.01061541810631752, "step": 106065 }, { "epoch": 0.9983058823529412, "grad_norm": 0.3872628618695336, "learning_rate": 2.0021766431965965e-06, "loss": 0.010643435269594192, "step": 106070 }, { "epoch": 0.9983529411764706, "grad_norm": 0.2708192451034514, "learning_rate": 2.0021294544359714e-06, "loss": 0.01274595856666565, "step": 106075 }, { "epoch": 0.9984, "grad_norm": 0.3027024462200701, "learning_rate": 2.0020822690117253e-06, "loss": 0.011549384146928788, "step": 106080 }, { "epoch": 0.9984470588235295, "grad_norm": 0.41133751516433836, "learning_rate": 2.0020350869234667e-06, "loss": 0.014635519683361053, "step": 106085 }, { "epoch": 0.9984941176470589, "grad_norm": 0.606955488272544, "learning_rate": 2.0019879081708015e-06, "loss": 0.016894155740737916, "step": 106090 }, { "epoch": 0.9985411764705883, "grad_norm": 0.9277272014751392, "learning_rate": 2.001940732753338e-06, "loss": 0.012921491265296936, "step": 106095 }, { "epoch": 0.9985882352941177, "grad_norm": 0.4220393658323177, "learning_rate": 2.0018935606706826e-06, "loss": 0.01540195345878601, "step": 106100 }, { "epoch": 0.9986352941176471, "grad_norm": 0.5842371268530706, "learning_rate": 2.0018463919224425e-06, "loss": 0.014351874589920044, "step": 106105 }, { "epoch": 0.9986823529411765, "grad_norm": 0.27783100570255037, "learning_rate": 2.0017992265082244e-06, "loss": 0.010185258835554123, "step": 106110 }, { "epoch": 0.9987294117647059, "grad_norm": 0.2605907770077525, "learning_rate": 2.0017520644276354e-06, "loss": 0.012289724498987197, "step": 106115 }, { "epoch": 0.9987764705882353, "grad_norm": 0.3942082132135691, "learning_rate": 2.001704905680284e-06, "loss": 0.009442631155252457, "step": 106120 }, { "epoch": 0.9988235294117647, "grad_norm": 0.5514134599417595, "learning_rate": 2.001657750265777e-06, "loss": 0.016533735394477844, "step": 106125 }, { "epoch": 0.9988705882352941, "grad_norm": 0.695471964501139, "learning_rate": 2.0016105981837216e-06, "loss": 0.015377572178840638, "step": 106130 }, { "epoch": 0.9989176470588236, "grad_norm": 0.43938644011392214, "learning_rate": 2.0015634494337252e-06, "loss": 0.01338481605052948, "step": 106135 }, { "epoch": 0.998964705882353, "grad_norm": 0.5068392081462292, "learning_rate": 2.0015163040153958e-06, "loss": 0.015485891699790954, "step": 106140 }, { "epoch": 0.9990117647058824, "grad_norm": 0.46518758372478053, "learning_rate": 2.0014691619283414e-06, "loss": 0.015672126412391664, "step": 106145 }, { "epoch": 0.9990588235294118, "grad_norm": 0.6093077395034814, "learning_rate": 2.0014220231721687e-06, "loss": 0.017394624650478363, "step": 106150 }, { "epoch": 0.9991058823529412, "grad_norm": 0.26567803572651294, "learning_rate": 2.001374887746486e-06, "loss": 0.011783094704151153, "step": 106155 }, { "epoch": 0.9991529411764706, "grad_norm": 0.3969522856908635, "learning_rate": 2.001327755650901e-06, "loss": 0.011945535987615585, "step": 106160 }, { "epoch": 0.9992, "grad_norm": 0.43841269033165225, "learning_rate": 2.0012806268850224e-06, "loss": 0.014318174123764038, "step": 106165 }, { "epoch": 0.9992470588235294, "grad_norm": 0.5190699704262829, "learning_rate": 2.0012335014484572e-06, "loss": 0.014499960839748383, "step": 106170 }, { "epoch": 0.9992941176470588, "grad_norm": 0.5043604252918137, "learning_rate": 2.0011863793408133e-06, "loss": 0.011141801625490189, "step": 106175 }, { "epoch": 0.9993411764705883, "grad_norm": 0.5823633415312958, "learning_rate": 2.0011392605616996e-06, "loss": 0.013904303312301636, "step": 106180 }, { "epoch": 0.9993882352941177, "grad_norm": 0.4710609896244298, "learning_rate": 2.001092145110724e-06, "loss": 0.011886753141880035, "step": 106185 }, { "epoch": 0.9994352941176471, "grad_norm": 0.41886826996998744, "learning_rate": 2.001045032987494e-06, "loss": 0.0147939071059227, "step": 106190 }, { "epoch": 0.9994823529411765, "grad_norm": 0.28456288042918626, "learning_rate": 2.0009979241916193e-06, "loss": 0.010248976200819016, "step": 106195 }, { "epoch": 0.9995294117647059, "grad_norm": 0.40613022410643357, "learning_rate": 2.000950818722707e-06, "loss": 0.009846001863479614, "step": 106200 }, { "epoch": 0.9995764705882353, "grad_norm": 0.49882194888487474, "learning_rate": 2.000903716580366e-06, "loss": 0.01202288717031479, "step": 106205 }, { "epoch": 0.9996235294117647, "grad_norm": 0.37254432948500504, "learning_rate": 2.000856617764205e-06, "loss": 0.011869561672210694, "step": 106210 }, { "epoch": 0.9996705882352941, "grad_norm": 0.5190743187286774, "learning_rate": 2.0008095222738318e-06, "loss": 0.02240423411130905, "step": 106215 }, { "epoch": 0.9997176470588235, "grad_norm": 0.42794912861204637, "learning_rate": 2.000762430108856e-06, "loss": 0.012923920154571533, "step": 106220 }, { "epoch": 0.9997647058823529, "grad_norm": 0.4855368778165714, "learning_rate": 2.0007153412688853e-06, "loss": 0.009874986112117767, "step": 106225 }, { "epoch": 0.9998117647058824, "grad_norm": 0.5885676770821906, "learning_rate": 2.0006682557535297e-06, "loss": 0.013918906450271606, "step": 106230 }, { "epoch": 0.9998588235294118, "grad_norm": 0.38134450331087827, "learning_rate": 2.000621173562397e-06, "loss": 0.010918188840150833, "step": 106235 }, { "epoch": 0.9999058823529412, "grad_norm": 0.5894911604226193, "learning_rate": 2.0005740946950954e-06, "loss": 0.013996723294258117, "step": 106240 }, { "epoch": 0.9999529411764706, "grad_norm": 1.1131129141488023, "learning_rate": 2.0005270191512356e-06, "loss": 0.014800292253494263, "step": 106245 }, { "epoch": 1.0, "grad_norm": 0.41977782158671845, "learning_rate": 2.000479946930426e-06, "loss": 0.01973336935043335, "step": 106250 }, { "epoch": 1.0, "step": 106250, "total_flos": 3635036937584640.0, "train_loss": 0.040250375388965885, "train_runtime": 214343.3461, "train_samples_per_second": 31.725, "train_steps_per_second": 0.496 } ], "logging_steps": 5, "max_steps": 106250, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 30000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3635036937584640.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }