{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.48168396714915346, "eval_steps": 500, "global_step": 20000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00012042099178728837, "grad_norm": 3.4532573223114014, "learning_rate": 1.9258545979778525e-09, "loss": 1.2769, "step": 5 }, { "epoch": 0.00024084198357457673, "grad_norm": 3.2849907875061035, "learning_rate": 4.333172845450169e-09, "loss": 1.2562, "step": 10 }, { "epoch": 0.00036126297536186506, "grad_norm": 3.0698747634887695, "learning_rate": 6.740491092922484e-09, "loss": 1.2727, "step": 15 }, { "epoch": 0.00048168396714915346, "grad_norm": 3.3526523113250732, "learning_rate": 9.1478093403948e-09, "loss": 1.2471, "step": 20 }, { "epoch": 0.0006021049589364418, "grad_norm": 3.5746824741363525, "learning_rate": 1.1555127587867116e-08, "loss": 1.2575, "step": 25 }, { "epoch": 0.0007225259507237301, "grad_norm": 3.2632179260253906, "learning_rate": 1.3962445835339432e-08, "loss": 1.1786, "step": 30 }, { "epoch": 0.0008429469425110186, "grad_norm": 3.0086328983306885, "learning_rate": 1.636976408281175e-08, "loss": 1.4168, "step": 35 }, { "epoch": 0.0009633679342983069, "grad_norm": 3.5532827377319336, "learning_rate": 1.8777082330284063e-08, "loss": 1.2169, "step": 40 }, { "epoch": 0.0010837889260855952, "grad_norm": 3.0027787685394287, "learning_rate": 2.118440057775638e-08, "loss": 1.2513, "step": 45 }, { "epoch": 0.0012042099178728835, "grad_norm": 3.6059303283691406, "learning_rate": 2.3591718825228694e-08, "loss": 1.2505, "step": 50 }, { "epoch": 0.0013246309096601719, "grad_norm": 2.860373020172119, "learning_rate": 2.599903707270101e-08, "loss": 1.2457, "step": 55 }, { "epoch": 0.0014450519014474602, "grad_norm": 3.1921334266662598, "learning_rate": 2.8406355320173326e-08, "loss": 1.2627, "step": 60 }, { "epoch": 0.0015654728932347486, "grad_norm": 3.080982208251953, "learning_rate": 3.081367356764564e-08, "loss": 1.1995, "step": 65 }, { "epoch": 0.0016858938850220371, "grad_norm": 3.90539288520813, "learning_rate": 3.3220991815117954e-08, "loss": 1.3055, "step": 70 }, { "epoch": 0.0018063148768093255, "grad_norm": 3.3694944381713867, "learning_rate": 3.5628310062590275e-08, "loss": 1.2396, "step": 75 }, { "epoch": 0.0019267358685966139, "grad_norm": 3.218294143676758, "learning_rate": 3.803562831006259e-08, "loss": 1.2292, "step": 80 }, { "epoch": 0.002047156860383902, "grad_norm": 3.063572406768799, "learning_rate": 4.04429465575349e-08, "loss": 1.253, "step": 85 }, { "epoch": 0.0021675778521711903, "grad_norm": 3.4931082725524902, "learning_rate": 4.285026480500722e-08, "loss": 1.2709, "step": 90 }, { "epoch": 0.002287998843958479, "grad_norm": 3.7916605472564697, "learning_rate": 4.525758305247954e-08, "loss": 1.2878, "step": 95 }, { "epoch": 0.002408419835745767, "grad_norm": 2.3481788635253906, "learning_rate": 4.766490129995185e-08, "loss": 1.2535, "step": 100 }, { "epoch": 0.0025288408275330556, "grad_norm": 2.8467800617218018, "learning_rate": 5.0072219547424166e-08, "loss": 1.2573, "step": 105 }, { "epoch": 0.0026492618193203438, "grad_norm": 3.1305465698242188, "learning_rate": 5.2479537794896487e-08, "loss": 1.2112, "step": 110 }, { "epoch": 0.0027696828111076323, "grad_norm": 2.8494081497192383, "learning_rate": 5.48868560423688e-08, "loss": 1.2003, "step": 115 }, { "epoch": 0.0028901038028949205, "grad_norm": 3.01526141166687, "learning_rate": 5.7294174289841115e-08, "loss": 1.2025, "step": 120 }, { "epoch": 0.003010524794682209, "grad_norm": 3.138826370239258, "learning_rate": 5.970149253731343e-08, "loss": 1.2197, "step": 125 }, { "epoch": 0.003130945786469497, "grad_norm": 3.5005788803100586, "learning_rate": 6.210881078478574e-08, "loss": 1.2479, "step": 130 }, { "epoch": 0.0032513667782567857, "grad_norm": 3.51312255859375, "learning_rate": 6.451612903225806e-08, "loss": 1.2142, "step": 135 }, { "epoch": 0.0033717877700440743, "grad_norm": 2.6358306407928467, "learning_rate": 6.692344727973037e-08, "loss": 1.2545, "step": 140 }, { "epoch": 0.0034922087618313624, "grad_norm": 3.6043667793273926, "learning_rate": 6.93307655272027e-08, "loss": 1.2427, "step": 145 }, { "epoch": 0.003612629753618651, "grad_norm": 3.161194324493408, "learning_rate": 7.173808377467501e-08, "loss": 1.2686, "step": 150 }, { "epoch": 0.003733050745405939, "grad_norm": 3.4229471683502197, "learning_rate": 7.414540202214733e-08, "loss": 1.3099, "step": 155 }, { "epoch": 0.0038534717371932277, "grad_norm": 3.3181447982788086, "learning_rate": 7.655272026961963e-08, "loss": 1.2736, "step": 160 }, { "epoch": 0.003973892728980516, "grad_norm": 3.1612415313720703, "learning_rate": 7.896003851709195e-08, "loss": 1.2001, "step": 165 }, { "epoch": 0.004094313720767804, "grad_norm": 2.884935140609741, "learning_rate": 8.136735676456427e-08, "loss": 1.2524, "step": 170 }, { "epoch": 0.0042147347125550925, "grad_norm": 2.7151708602905273, "learning_rate": 8.377467501203658e-08, "loss": 1.2547, "step": 175 }, { "epoch": 0.004335155704342381, "grad_norm": 3.2015910148620605, "learning_rate": 8.618199325950891e-08, "loss": 1.2492, "step": 180 }, { "epoch": 0.00445557669612967, "grad_norm": 3.480416774749756, "learning_rate": 8.858931150698122e-08, "loss": 1.2558, "step": 185 }, { "epoch": 0.004575997687916958, "grad_norm": 3.1630730628967285, "learning_rate": 9.099662975445354e-08, "loss": 1.2078, "step": 190 }, { "epoch": 0.004696418679704246, "grad_norm": 3.1624763011932373, "learning_rate": 9.340394800192584e-08, "loss": 1.2021, "step": 195 }, { "epoch": 0.004816839671491534, "grad_norm": 2.6630239486694336, "learning_rate": 9.581126624939817e-08, "loss": 1.1915, "step": 200 }, { "epoch": 0.004937260663278823, "grad_norm": 2.7018954753875732, "learning_rate": 9.821858449687048e-08, "loss": 1.2762, "step": 205 }, { "epoch": 0.005057681655066111, "grad_norm": 3.0080630779266357, "learning_rate": 1.006259027443428e-07, "loss": 1.2456, "step": 210 }, { "epoch": 0.005178102646853399, "grad_norm": 2.8256232738494873, "learning_rate": 1.0303322099181512e-07, "loss": 1.218, "step": 215 }, { "epoch": 0.0052985236386406875, "grad_norm": 3.1638636589050293, "learning_rate": 1.0544053923928744e-07, "loss": 1.2508, "step": 220 }, { "epoch": 0.0054189446304279765, "grad_norm": 2.64296555519104, "learning_rate": 1.0784785748675975e-07, "loss": 1.2768, "step": 225 }, { "epoch": 0.005539365622215265, "grad_norm": 2.2767012119293213, "learning_rate": 1.1025517573423205e-07, "loss": 1.1927, "step": 230 }, { "epoch": 0.005659786614002553, "grad_norm": 2.666334629058838, "learning_rate": 1.1266249398170438e-07, "loss": 1.1981, "step": 235 }, { "epoch": 0.005780207605789841, "grad_norm": 3.0129833221435547, "learning_rate": 1.1506981222917669e-07, "loss": 1.1702, "step": 240 }, { "epoch": 0.00590062859757713, "grad_norm": 2.534895896911621, "learning_rate": 1.17477130476649e-07, "loss": 1.2273, "step": 245 }, { "epoch": 0.006021049589364418, "grad_norm": 3.0159912109375, "learning_rate": 1.1988444872412132e-07, "loss": 1.1828, "step": 250 }, { "epoch": 0.006141470581151706, "grad_norm": 2.4064695835113525, "learning_rate": 1.2229176697159363e-07, "loss": 1.1927, "step": 255 }, { "epoch": 0.006261891572938994, "grad_norm": 2.506269693374634, "learning_rate": 1.2469908521906595e-07, "loss": 1.2452, "step": 260 }, { "epoch": 0.006382312564726283, "grad_norm": 2.26957631111145, "learning_rate": 1.271064034665383e-07, "loss": 1.2155, "step": 265 }, { "epoch": 0.0065027335565135715, "grad_norm": 2.604719638824463, "learning_rate": 1.295137217140106e-07, "loss": 1.1983, "step": 270 }, { "epoch": 0.00662315454830086, "grad_norm": 2.3419127464294434, "learning_rate": 1.3192103996148292e-07, "loss": 1.2137, "step": 275 }, { "epoch": 0.006743575540088149, "grad_norm": 2.5202889442443848, "learning_rate": 1.343283582089552e-07, "loss": 1.2272, "step": 280 }, { "epoch": 0.006863996531875437, "grad_norm": 2.3413245677948, "learning_rate": 1.3673567645642752e-07, "loss": 1.2049, "step": 285 }, { "epoch": 0.006984417523662725, "grad_norm": 2.6377112865448, "learning_rate": 1.3914299470389983e-07, "loss": 1.2673, "step": 290 }, { "epoch": 0.007104838515450013, "grad_norm": 2.182490825653076, "learning_rate": 1.4155031295137215e-07, "loss": 1.2062, "step": 295 }, { "epoch": 0.007225259507237302, "grad_norm": 2.1873981952667236, "learning_rate": 1.439576311988445e-07, "loss": 1.1677, "step": 300 }, { "epoch": 0.00734568049902459, "grad_norm": 2.436870574951172, "learning_rate": 1.463649494463168e-07, "loss": 1.1813, "step": 305 }, { "epoch": 0.007466101490811878, "grad_norm": 2.0647244453430176, "learning_rate": 1.4877226769378912e-07, "loss": 1.1143, "step": 310 }, { "epoch": 0.007586522482599166, "grad_norm": 2.045997381210327, "learning_rate": 1.5117958594126143e-07, "loss": 1.2223, "step": 315 }, { "epoch": 0.007706943474386455, "grad_norm": 2.0606868267059326, "learning_rate": 1.5358690418873374e-07, "loss": 1.184, "step": 320 }, { "epoch": 0.007827364466173744, "grad_norm": 2.2403106689453125, "learning_rate": 1.5599422243620606e-07, "loss": 1.1599, "step": 325 }, { "epoch": 0.007947785457961033, "grad_norm": 1.9159278869628906, "learning_rate": 1.5840154068367837e-07, "loss": 1.1863, "step": 330 }, { "epoch": 0.00806820644974832, "grad_norm": 2.374685525894165, "learning_rate": 1.608088589311507e-07, "loss": 1.2206, "step": 335 }, { "epoch": 0.008188627441535609, "grad_norm": 2.1852266788482666, "learning_rate": 1.6321617717862303e-07, "loss": 1.165, "step": 340 }, { "epoch": 0.008309048433322896, "grad_norm": 2.137631416320801, "learning_rate": 1.6562349542609534e-07, "loss": 1.177, "step": 345 }, { "epoch": 0.008429469425110185, "grad_norm": 2.00748348236084, "learning_rate": 1.6803081367356763e-07, "loss": 1.0761, "step": 350 }, { "epoch": 0.008549890416897474, "grad_norm": 1.9672129154205322, "learning_rate": 1.7043813192103994e-07, "loss": 1.1491, "step": 355 }, { "epoch": 0.008670311408684761, "grad_norm": 2.3764731884002686, "learning_rate": 1.7284545016851226e-07, "loss": 1.2437, "step": 360 }, { "epoch": 0.00879073240047205, "grad_norm": 2.0609636306762695, "learning_rate": 1.7525276841598457e-07, "loss": 1.1461, "step": 365 }, { "epoch": 0.00891115339225934, "grad_norm": 1.9100985527038574, "learning_rate": 1.7766008666345688e-07, "loss": 1.1708, "step": 370 }, { "epoch": 0.009031574384046627, "grad_norm": 1.9871859550476074, "learning_rate": 1.8006740491092923e-07, "loss": 1.1183, "step": 375 }, { "epoch": 0.009151995375833916, "grad_norm": 2.0929617881774902, "learning_rate": 1.8247472315840154e-07, "loss": 1.2154, "step": 380 }, { "epoch": 0.009272416367621203, "grad_norm": 2.0678470134735107, "learning_rate": 1.8488204140587385e-07, "loss": 1.1872, "step": 385 }, { "epoch": 0.009392837359408492, "grad_norm": 2.0919220447540283, "learning_rate": 1.8728935965334617e-07, "loss": 1.1782, "step": 390 }, { "epoch": 0.009513258351195781, "grad_norm": 1.8138477802276611, "learning_rate": 1.8969667790081848e-07, "loss": 1.1245, "step": 395 }, { "epoch": 0.009633679342983068, "grad_norm": 2.0728824138641357, "learning_rate": 1.921039961482908e-07, "loss": 1.1498, "step": 400 }, { "epoch": 0.009754100334770357, "grad_norm": 2.1343467235565186, "learning_rate": 1.945113143957631e-07, "loss": 1.2057, "step": 405 }, { "epoch": 0.009874521326557646, "grad_norm": 1.8334988355636597, "learning_rate": 1.9691863264323545e-07, "loss": 1.1354, "step": 410 }, { "epoch": 0.009994942318344933, "grad_norm": 1.8179374933242798, "learning_rate": 1.9932595089070774e-07, "loss": 1.172, "step": 415 }, { "epoch": 0.010115363310132222, "grad_norm": 1.8935538530349731, "learning_rate": 2.0173326913818005e-07, "loss": 1.2059, "step": 420 }, { "epoch": 0.010235784301919511, "grad_norm": 2.025412082672119, "learning_rate": 2.0414058738565237e-07, "loss": 1.1687, "step": 425 }, { "epoch": 0.010356205293706799, "grad_norm": 1.9169843196868896, "learning_rate": 2.0654790563312468e-07, "loss": 1.2163, "step": 430 }, { "epoch": 0.010476626285494088, "grad_norm": 1.9890685081481934, "learning_rate": 2.08955223880597e-07, "loss": 1.1798, "step": 435 }, { "epoch": 0.010597047277281375, "grad_norm": 1.7384068965911865, "learning_rate": 2.113625421280693e-07, "loss": 1.103, "step": 440 }, { "epoch": 0.010717468269068664, "grad_norm": 1.8030136823654175, "learning_rate": 2.1376986037554165e-07, "loss": 1.2152, "step": 445 }, { "epoch": 0.010837889260855953, "grad_norm": 1.7656060457229614, "learning_rate": 2.1617717862301396e-07, "loss": 1.1988, "step": 450 }, { "epoch": 0.01095831025264324, "grad_norm": 2.1653716564178467, "learning_rate": 2.1858449687048628e-07, "loss": 1.1923, "step": 455 }, { "epoch": 0.01107873124443053, "grad_norm": 2.042724132537842, "learning_rate": 2.209918151179586e-07, "loss": 1.216, "step": 460 }, { "epoch": 0.011199152236217818, "grad_norm": 1.65592360496521, "learning_rate": 2.233991333654309e-07, "loss": 1.1506, "step": 465 }, { "epoch": 0.011319573228005106, "grad_norm": 1.6511238813400269, "learning_rate": 2.2580645161290322e-07, "loss": 1.1547, "step": 470 }, { "epoch": 0.011439994219792395, "grad_norm": 1.8964343070983887, "learning_rate": 2.2821376986037553e-07, "loss": 1.1387, "step": 475 }, { "epoch": 0.011560415211579682, "grad_norm": 1.615464210510254, "learning_rate": 2.3062108810784782e-07, "loss": 1.1492, "step": 480 }, { "epoch": 0.01168083620336697, "grad_norm": 2.1037256717681885, "learning_rate": 2.3302840635532016e-07, "loss": 1.1664, "step": 485 }, { "epoch": 0.01180125719515426, "grad_norm": 1.4680767059326172, "learning_rate": 2.3543572460279248e-07, "loss": 1.1456, "step": 490 }, { "epoch": 0.011921678186941547, "grad_norm": 1.7871757745742798, "learning_rate": 2.378430428502648e-07, "loss": 1.1777, "step": 495 }, { "epoch": 0.012042099178728836, "grad_norm": 1.7084572315216064, "learning_rate": 2.402503610977371e-07, "loss": 1.1512, "step": 500 }, { "epoch": 0.012162520170516125, "grad_norm": 1.815544605255127, "learning_rate": 2.426576793452094e-07, "loss": 1.153, "step": 505 }, { "epoch": 0.012282941162303412, "grad_norm": 1.6143004894256592, "learning_rate": 2.4506499759268173e-07, "loss": 1.131, "step": 510 }, { "epoch": 0.012403362154090701, "grad_norm": 2.2032082080841064, "learning_rate": 2.4747231584015405e-07, "loss": 1.1916, "step": 515 }, { "epoch": 0.012523783145877989, "grad_norm": 1.9207594394683838, "learning_rate": 2.4987963408762636e-07, "loss": 1.1398, "step": 520 }, { "epoch": 0.012644204137665278, "grad_norm": 1.954811930656433, "learning_rate": 2.522869523350987e-07, "loss": 1.162, "step": 525 }, { "epoch": 0.012764625129452567, "grad_norm": 1.676818609237671, "learning_rate": 2.54694270582571e-07, "loss": 1.1526, "step": 530 }, { "epoch": 0.012885046121239854, "grad_norm": 1.7171730995178223, "learning_rate": 2.5710158883004335e-07, "loss": 1.1389, "step": 535 }, { "epoch": 0.013005467113027143, "grad_norm": 1.7267348766326904, "learning_rate": 2.5950890707751567e-07, "loss": 1.1892, "step": 540 }, { "epoch": 0.013125888104814432, "grad_norm": 1.7431941032409668, "learning_rate": 2.61916225324988e-07, "loss": 1.0527, "step": 545 }, { "epoch": 0.01324630909660172, "grad_norm": 1.5393019914627075, "learning_rate": 2.643235435724603e-07, "loss": 1.1091, "step": 550 }, { "epoch": 0.013366730088389008, "grad_norm": 1.6455602645874023, "learning_rate": 2.667308618199326e-07, "loss": 1.108, "step": 555 }, { "epoch": 0.013487151080176297, "grad_norm": 1.696184754371643, "learning_rate": 2.691381800674049e-07, "loss": 1.1133, "step": 560 }, { "epoch": 0.013607572071963584, "grad_norm": 1.7459622621536255, "learning_rate": 2.7154549831487724e-07, "loss": 1.1074, "step": 565 }, { "epoch": 0.013727993063750873, "grad_norm": 1.5549553632736206, "learning_rate": 2.7395281656234955e-07, "loss": 1.1855, "step": 570 }, { "epoch": 0.01384841405553816, "grad_norm": 1.6191879510879517, "learning_rate": 2.7636013480982187e-07, "loss": 1.075, "step": 575 }, { "epoch": 0.01396883504732545, "grad_norm": 1.6304470300674438, "learning_rate": 2.787674530572942e-07, "loss": 1.1123, "step": 580 }, { "epoch": 0.014089256039112739, "grad_norm": 1.7658319473266602, "learning_rate": 2.8117477130476644e-07, "loss": 1.1768, "step": 585 }, { "epoch": 0.014209677030900026, "grad_norm": 1.5940040349960327, "learning_rate": 2.8358208955223876e-07, "loss": 1.1377, "step": 590 }, { "epoch": 0.014330098022687315, "grad_norm": 1.5074690580368042, "learning_rate": 2.8598940779971107e-07, "loss": 1.1538, "step": 595 }, { "epoch": 0.014450519014474604, "grad_norm": 1.7436341047286987, "learning_rate": 2.883967260471834e-07, "loss": 1.0854, "step": 600 }, { "epoch": 0.014570940006261891, "grad_norm": 1.6860382556915283, "learning_rate": 2.908040442946557e-07, "loss": 1.1191, "step": 605 }, { "epoch": 0.01469136099804918, "grad_norm": 1.799471378326416, "learning_rate": 2.9321136254212807e-07, "loss": 1.1518, "step": 610 }, { "epoch": 0.014811781989836468, "grad_norm": 1.7893131971359253, "learning_rate": 2.956186807896004e-07, "loss": 1.1027, "step": 615 }, { "epoch": 0.014932202981623757, "grad_norm": 1.6366047859191895, "learning_rate": 2.980259990370727e-07, "loss": 1.1171, "step": 620 }, { "epoch": 0.015052623973411046, "grad_norm": 1.6394248008728027, "learning_rate": 3.00433317284545e-07, "loss": 1.101, "step": 625 }, { "epoch": 0.015173044965198333, "grad_norm": 1.658291220664978, "learning_rate": 3.028406355320173e-07, "loss": 1.144, "step": 630 }, { "epoch": 0.015293465956985622, "grad_norm": 1.7101342678070068, "learning_rate": 3.0524795377948964e-07, "loss": 1.0963, "step": 635 }, { "epoch": 0.01541388694877291, "grad_norm": 2.0829336643218994, "learning_rate": 3.0765527202696195e-07, "loss": 1.1316, "step": 640 }, { "epoch": 0.015534307940560198, "grad_norm": 1.643818736076355, "learning_rate": 3.1006259027443426e-07, "loss": 1.1402, "step": 645 }, { "epoch": 0.015654728932347487, "grad_norm": 1.5390375852584839, "learning_rate": 3.124699085219066e-07, "loss": 1.1286, "step": 650 }, { "epoch": 0.015775149924134774, "grad_norm": 1.5295696258544922, "learning_rate": 3.148772267693789e-07, "loss": 1.0913, "step": 655 }, { "epoch": 0.015895570915922065, "grad_norm": 1.7846722602844238, "learning_rate": 3.172845450168512e-07, "loss": 1.1475, "step": 660 }, { "epoch": 0.016015991907709352, "grad_norm": 1.5267643928527832, "learning_rate": 3.196918632643235e-07, "loss": 1.147, "step": 665 }, { "epoch": 0.01613641289949664, "grad_norm": 1.5753979682922363, "learning_rate": 3.2209918151179583e-07, "loss": 1.1521, "step": 670 }, { "epoch": 0.016256833891283927, "grad_norm": 1.597781777381897, "learning_rate": 3.2450649975926815e-07, "loss": 1.1069, "step": 675 }, { "epoch": 0.016377254883071218, "grad_norm": 1.9727338552474976, "learning_rate": 3.2691381800674046e-07, "loss": 1.1125, "step": 680 }, { "epoch": 0.016497675874858505, "grad_norm": 1.6481128931045532, "learning_rate": 3.2932113625421283e-07, "loss": 1.1188, "step": 685 }, { "epoch": 0.016618096866645792, "grad_norm": 1.6717129945755005, "learning_rate": 3.3172845450168514e-07, "loss": 1.1382, "step": 690 }, { "epoch": 0.016738517858433083, "grad_norm": 1.7000445127487183, "learning_rate": 3.3413577274915746e-07, "loss": 1.0947, "step": 695 }, { "epoch": 0.01685893885022037, "grad_norm": 1.8228042125701904, "learning_rate": 3.3654309099662977e-07, "loss": 1.109, "step": 700 }, { "epoch": 0.016979359842007657, "grad_norm": 1.5345516204833984, "learning_rate": 3.389504092441021e-07, "loss": 1.1233, "step": 705 }, { "epoch": 0.017099780833794948, "grad_norm": 1.7012462615966797, "learning_rate": 3.413577274915744e-07, "loss": 1.091, "step": 710 }, { "epoch": 0.017220201825582235, "grad_norm": 1.6733700037002563, "learning_rate": 3.437650457390467e-07, "loss": 1.1274, "step": 715 }, { "epoch": 0.017340622817369523, "grad_norm": 1.6725404262542725, "learning_rate": 3.46172363986519e-07, "loss": 1.1021, "step": 720 }, { "epoch": 0.017461043809156813, "grad_norm": 1.902208924293518, "learning_rate": 3.485796822339913e-07, "loss": 1.1609, "step": 725 }, { "epoch": 0.0175814648009441, "grad_norm": 1.8290274143218994, "learning_rate": 3.509870004814636e-07, "loss": 1.0894, "step": 730 }, { "epoch": 0.017701885792731388, "grad_norm": 1.551406979560852, "learning_rate": 3.533943187289359e-07, "loss": 1.0974, "step": 735 }, { "epoch": 0.01782230678451868, "grad_norm": 1.6504721641540527, "learning_rate": 3.5580163697640823e-07, "loss": 1.0495, "step": 740 }, { "epoch": 0.017942727776305966, "grad_norm": 1.7599427700042725, "learning_rate": 3.5820895522388055e-07, "loss": 1.0976, "step": 745 }, { "epoch": 0.018063148768093253, "grad_norm": 1.696821689605713, "learning_rate": 3.6061627347135286e-07, "loss": 1.1485, "step": 750 }, { "epoch": 0.018183569759880544, "grad_norm": 1.7811964750289917, "learning_rate": 3.6302359171882523e-07, "loss": 1.0991, "step": 755 }, { "epoch": 0.01830399075166783, "grad_norm": 1.7591949701309204, "learning_rate": 3.6543090996629754e-07, "loss": 1.1346, "step": 760 }, { "epoch": 0.01842441174345512, "grad_norm": 1.4767632484436035, "learning_rate": 3.6783822821376986e-07, "loss": 1.0937, "step": 765 }, { "epoch": 0.018544832735242406, "grad_norm": 1.8599060773849487, "learning_rate": 3.7024554646124217e-07, "loss": 1.0742, "step": 770 }, { "epoch": 0.018665253727029697, "grad_norm": 1.5535491704940796, "learning_rate": 3.726528647087145e-07, "loss": 1.1141, "step": 775 }, { "epoch": 0.018785674718816984, "grad_norm": 1.7345106601715088, "learning_rate": 3.750601829561868e-07, "loss": 1.0477, "step": 780 }, { "epoch": 0.01890609571060427, "grad_norm": 1.6146178245544434, "learning_rate": 3.774675012036591e-07, "loss": 1.0765, "step": 785 }, { "epoch": 0.019026516702391562, "grad_norm": 1.61898672580719, "learning_rate": 3.798748194511314e-07, "loss": 1.0599, "step": 790 }, { "epoch": 0.01914693769417885, "grad_norm": 1.8047658205032349, "learning_rate": 3.8228213769860374e-07, "loss": 1.1565, "step": 795 }, { "epoch": 0.019267358685966136, "grad_norm": 1.7502000331878662, "learning_rate": 3.8468945594607605e-07, "loss": 1.1029, "step": 800 }, { "epoch": 0.019387779677753427, "grad_norm": 1.7901040315628052, "learning_rate": 3.8709677419354837e-07, "loss": 1.0321, "step": 805 }, { "epoch": 0.019508200669540714, "grad_norm": 1.710308313369751, "learning_rate": 3.895040924410207e-07, "loss": 1.1007, "step": 810 }, { "epoch": 0.019628621661328, "grad_norm": 1.6469848155975342, "learning_rate": 3.91911410688493e-07, "loss": 1.0919, "step": 815 }, { "epoch": 0.019749042653115292, "grad_norm": 1.9024361371994019, "learning_rate": 3.943187289359653e-07, "loss": 1.1408, "step": 820 }, { "epoch": 0.01986946364490258, "grad_norm": 1.4699581861495972, "learning_rate": 3.967260471834376e-07, "loss": 1.0799, "step": 825 }, { "epoch": 0.019989884636689867, "grad_norm": 1.7625906467437744, "learning_rate": 3.9913336543091e-07, "loss": 1.0956, "step": 830 }, { "epoch": 0.020110305628477158, "grad_norm": 1.45587158203125, "learning_rate": 4.015406836783823e-07, "loss": 1.1026, "step": 835 }, { "epoch": 0.020230726620264445, "grad_norm": 1.5896087884902954, "learning_rate": 4.039480019258546e-07, "loss": 1.1299, "step": 840 }, { "epoch": 0.020351147612051732, "grad_norm": 1.5886921882629395, "learning_rate": 4.0635532017332693e-07, "loss": 1.0504, "step": 845 }, { "epoch": 0.020471568603839023, "grad_norm": 1.5642247200012207, "learning_rate": 4.0876263842079925e-07, "loss": 1.0425, "step": 850 }, { "epoch": 0.02059198959562631, "grad_norm": 1.7490164041519165, "learning_rate": 4.111699566682715e-07, "loss": 1.047, "step": 855 }, { "epoch": 0.020712410587413597, "grad_norm": 1.6565054655075073, "learning_rate": 4.135772749157438e-07, "loss": 1.0785, "step": 860 }, { "epoch": 0.020832831579200885, "grad_norm": 1.748234510421753, "learning_rate": 4.1598459316321614e-07, "loss": 1.09, "step": 865 }, { "epoch": 0.020953252570988175, "grad_norm": 1.5565547943115234, "learning_rate": 4.1839191141068845e-07, "loss": 1.043, "step": 870 }, { "epoch": 0.021073673562775463, "grad_norm": 1.482420802116394, "learning_rate": 4.2079922965816076e-07, "loss": 1.0658, "step": 875 }, { "epoch": 0.02119409455456275, "grad_norm": 1.6103140115737915, "learning_rate": 4.232065479056331e-07, "loss": 1.0916, "step": 880 }, { "epoch": 0.02131451554635004, "grad_norm": 1.6495821475982666, "learning_rate": 4.256138661531054e-07, "loss": 1.0948, "step": 885 }, { "epoch": 0.021434936538137328, "grad_norm": 1.6755701303482056, "learning_rate": 4.280211844005777e-07, "loss": 1.0383, "step": 890 }, { "epoch": 0.021555357529924615, "grad_norm": 1.5431663990020752, "learning_rate": 4.3042850264805e-07, "loss": 1.1197, "step": 895 }, { "epoch": 0.021675778521711906, "grad_norm": 1.7741978168487549, "learning_rate": 4.3283582089552234e-07, "loss": 1.0617, "step": 900 }, { "epoch": 0.021796199513499193, "grad_norm": 1.4438221454620361, "learning_rate": 4.352431391429947e-07, "loss": 1.0777, "step": 905 }, { "epoch": 0.02191662050528648, "grad_norm": 1.625065565109253, "learning_rate": 4.37650457390467e-07, "loss": 1.01, "step": 910 }, { "epoch": 0.02203704149707377, "grad_norm": 1.5660688877105713, "learning_rate": 4.4005777563793933e-07, "loss": 1.0914, "step": 915 }, { "epoch": 0.02215746248886106, "grad_norm": 1.5976072549819946, "learning_rate": 4.4246509388541164e-07, "loss": 1.0577, "step": 920 }, { "epoch": 0.022277883480648346, "grad_norm": 1.7111833095550537, "learning_rate": 4.4487241213288396e-07, "loss": 1.0814, "step": 925 }, { "epoch": 0.022398304472435637, "grad_norm": 1.7777079343795776, "learning_rate": 4.4727973038035627e-07, "loss": 1.0339, "step": 930 }, { "epoch": 0.022518725464222924, "grad_norm": 1.6317858695983887, "learning_rate": 4.496870486278286e-07, "loss": 1.0387, "step": 935 }, { "epoch": 0.02263914645601021, "grad_norm": 1.6453614234924316, "learning_rate": 4.520943668753009e-07, "loss": 1.078, "step": 940 }, { "epoch": 0.0227595674477975, "grad_norm": 1.5312925577163696, "learning_rate": 4.545016851227732e-07, "loss": 1.0616, "step": 945 }, { "epoch": 0.02287998843958479, "grad_norm": 1.4723235368728638, "learning_rate": 4.5690900337024553e-07, "loss": 1.054, "step": 950 }, { "epoch": 0.023000409431372076, "grad_norm": 1.6957050561904907, "learning_rate": 4.5931632161771784e-07, "loss": 1.0612, "step": 955 }, { "epoch": 0.023120830423159364, "grad_norm": 1.5759074687957764, "learning_rate": 4.6172363986519016e-07, "loss": 1.005, "step": 960 }, { "epoch": 0.023241251414946654, "grad_norm": 1.7994741201400757, "learning_rate": 4.6413095811266247e-07, "loss": 1.115, "step": 965 }, { "epoch": 0.02336167240673394, "grad_norm": 1.5642887353897095, "learning_rate": 4.665382763601348e-07, "loss": 1.1111, "step": 970 }, { "epoch": 0.02348209339852123, "grad_norm": 1.8060336112976074, "learning_rate": 4.6894559460760715e-07, "loss": 1.0129, "step": 975 }, { "epoch": 0.02360251439030852, "grad_norm": 1.6655142307281494, "learning_rate": 4.7135291285507947e-07, "loss": 1.0414, "step": 980 }, { "epoch": 0.023722935382095807, "grad_norm": 1.7621194124221802, "learning_rate": 4.737602311025518e-07, "loss": 1.1035, "step": 985 }, { "epoch": 0.023843356373883094, "grad_norm": 1.5201829671859741, "learning_rate": 4.7616754935002404e-07, "loss": 1.0852, "step": 990 }, { "epoch": 0.023963777365670385, "grad_norm": 1.6027559041976929, "learning_rate": 4.785748675974964e-07, "loss": 1.0891, "step": 995 }, { "epoch": 0.024084198357457672, "grad_norm": 1.7190868854522705, "learning_rate": 4.809821858449687e-07, "loss": 1.092, "step": 1000 }, { "epoch": 0.02420461934924496, "grad_norm": 1.8872677087783813, "learning_rate": 4.83389504092441e-07, "loss": 1.0614, "step": 1005 }, { "epoch": 0.02432504034103225, "grad_norm": 1.4623500108718872, "learning_rate": 4.857968223399134e-07, "loss": 1.0281, "step": 1010 }, { "epoch": 0.024445461332819537, "grad_norm": 1.6613128185272217, "learning_rate": 4.882041405873856e-07, "loss": 1.0937, "step": 1015 }, { "epoch": 0.024565882324606825, "grad_norm": 1.533061146736145, "learning_rate": 4.90611458834858e-07, "loss": 1.0469, "step": 1020 }, { "epoch": 0.024686303316394115, "grad_norm": 1.6154255867004395, "learning_rate": 4.930187770823302e-07, "loss": 1.0317, "step": 1025 }, { "epoch": 0.024806724308181403, "grad_norm": 2.1915018558502197, "learning_rate": 4.954260953298026e-07, "loss": 1.0589, "step": 1030 }, { "epoch": 0.02492714529996869, "grad_norm": 1.678594708442688, "learning_rate": 4.978334135772749e-07, "loss": 0.9996, "step": 1035 }, { "epoch": 0.025047566291755977, "grad_norm": 2.124476909637451, "learning_rate": 5.002407318247472e-07, "loss": 1.0824, "step": 1040 }, { "epoch": 0.025167987283543268, "grad_norm": 1.5182719230651855, "learning_rate": 5.026480500722196e-07, "loss": 1.0825, "step": 1045 }, { "epoch": 0.025288408275330555, "grad_norm": 2.0846803188323975, "learning_rate": 5.050553683196919e-07, "loss": 1.037, "step": 1050 }, { "epoch": 0.025408829267117843, "grad_norm": 1.5982701778411865, "learning_rate": 5.074626865671642e-07, "loss": 0.988, "step": 1055 }, { "epoch": 0.025529250258905133, "grad_norm": 1.4213273525238037, "learning_rate": 5.098700048146365e-07, "loss": 1.0748, "step": 1060 }, { "epoch": 0.02564967125069242, "grad_norm": 1.5166436433792114, "learning_rate": 5.122773230621088e-07, "loss": 1.0678, "step": 1065 }, { "epoch": 0.025770092242479708, "grad_norm": 1.5300348997116089, "learning_rate": 5.146846413095811e-07, "loss": 1.0731, "step": 1070 }, { "epoch": 0.025890513234267, "grad_norm": 1.6487699747085571, "learning_rate": 5.170919595570534e-07, "loss": 1.0892, "step": 1075 }, { "epoch": 0.026010934226054286, "grad_norm": 1.641617774963379, "learning_rate": 5.194992778045257e-07, "loss": 1.0668, "step": 1080 }, { "epoch": 0.026131355217841573, "grad_norm": 1.632634162902832, "learning_rate": 5.21906596051998e-07, "loss": 1.1131, "step": 1085 }, { "epoch": 0.026251776209628864, "grad_norm": 1.6787406206130981, "learning_rate": 5.243139142994704e-07, "loss": 1.0292, "step": 1090 }, { "epoch": 0.02637219720141615, "grad_norm": 1.5313917398452759, "learning_rate": 5.267212325469426e-07, "loss": 1.0467, "step": 1095 }, { "epoch": 0.02649261819320344, "grad_norm": 1.5146945714950562, "learning_rate": 5.29128550794415e-07, "loss": 1.097, "step": 1100 }, { "epoch": 0.02661303918499073, "grad_norm": 1.5477287769317627, "learning_rate": 5.315358690418873e-07, "loss": 1.0578, "step": 1105 }, { "epoch": 0.026733460176778016, "grad_norm": 1.5698777437210083, "learning_rate": 5.339431872893596e-07, "loss": 1.0324, "step": 1110 }, { "epoch": 0.026853881168565304, "grad_norm": 1.6078376770019531, "learning_rate": 5.363505055368319e-07, "loss": 1.0931, "step": 1115 }, { "epoch": 0.026974302160352594, "grad_norm": 1.6052172183990479, "learning_rate": 5.387578237843043e-07, "loss": 0.9986, "step": 1120 }, { "epoch": 0.02709472315213988, "grad_norm": 1.492550015449524, "learning_rate": 5.411651420317765e-07, "loss": 1.0325, "step": 1125 }, { "epoch": 0.02721514414392717, "grad_norm": 1.6259959936141968, "learning_rate": 5.435724602792489e-07, "loss": 1.0123, "step": 1130 }, { "epoch": 0.027335565135714456, "grad_norm": 1.3507508039474487, "learning_rate": 5.459797785267211e-07, "loss": 1.0907, "step": 1135 }, { "epoch": 0.027455986127501747, "grad_norm": 1.743194818496704, "learning_rate": 5.483870967741935e-07, "loss": 1.0432, "step": 1140 }, { "epoch": 0.027576407119289034, "grad_norm": 1.5899051427841187, "learning_rate": 5.507944150216658e-07, "loss": 1.0519, "step": 1145 }, { "epoch": 0.02769682811107632, "grad_norm": 1.5947670936584473, "learning_rate": 5.532017332691381e-07, "loss": 1.1119, "step": 1150 }, { "epoch": 0.027817249102863612, "grad_norm": 1.6205607652664185, "learning_rate": 5.556090515166104e-07, "loss": 1.0232, "step": 1155 }, { "epoch": 0.0279376700946509, "grad_norm": 1.6683999300003052, "learning_rate": 5.580163697640829e-07, "loss": 1.0367, "step": 1160 }, { "epoch": 0.028058091086438187, "grad_norm": 1.691907525062561, "learning_rate": 5.604236880115551e-07, "loss": 1.0982, "step": 1165 }, { "epoch": 0.028178512078225477, "grad_norm": 1.7351024150848389, "learning_rate": 5.628310062590275e-07, "loss": 1.0043, "step": 1170 }, { "epoch": 0.028298933070012765, "grad_norm": 1.624670386314392, "learning_rate": 5.652383245064998e-07, "loss": 1.0359, "step": 1175 }, { "epoch": 0.028419354061800052, "grad_norm": 1.611996054649353, "learning_rate": 5.676456427539721e-07, "loss": 1.0703, "step": 1180 }, { "epoch": 0.028539775053587343, "grad_norm": 1.5940327644348145, "learning_rate": 5.700529610014444e-07, "loss": 1.0034, "step": 1185 }, { "epoch": 0.02866019604537463, "grad_norm": 1.5508549213409424, "learning_rate": 5.724602792489168e-07, "loss": 0.9925, "step": 1190 }, { "epoch": 0.028780617037161917, "grad_norm": 1.7031129598617554, "learning_rate": 5.74867597496389e-07, "loss": 1.0722, "step": 1195 }, { "epoch": 0.028901038028949208, "grad_norm": 1.542175531387329, "learning_rate": 5.772749157438613e-07, "loss": 1.0558, "step": 1200 }, { "epoch": 0.029021459020736495, "grad_norm": 1.6221057176589966, "learning_rate": 5.796822339913337e-07, "loss": 0.9962, "step": 1205 }, { "epoch": 0.029141880012523783, "grad_norm": 1.7214235067367554, "learning_rate": 5.820895522388059e-07, "loss": 1.0642, "step": 1210 }, { "epoch": 0.02926230100431107, "grad_norm": 1.7093576192855835, "learning_rate": 5.844968704862783e-07, "loss": 1.0311, "step": 1215 }, { "epoch": 0.02938272199609836, "grad_norm": 1.5216920375823975, "learning_rate": 5.869041887337505e-07, "loss": 1.0486, "step": 1220 }, { "epoch": 0.029503142987885648, "grad_norm": 1.604498028755188, "learning_rate": 5.893115069812229e-07, "loss": 1.0437, "step": 1225 }, { "epoch": 0.029623563979672935, "grad_norm": 1.562648057937622, "learning_rate": 5.917188252286952e-07, "loss": 1.1011, "step": 1230 }, { "epoch": 0.029743984971460226, "grad_norm": 1.6788482666015625, "learning_rate": 5.941261434761675e-07, "loss": 1.0703, "step": 1235 }, { "epoch": 0.029864405963247513, "grad_norm": 1.5976691246032715, "learning_rate": 5.965334617236398e-07, "loss": 1.0202, "step": 1240 }, { "epoch": 0.0299848269550348, "grad_norm": 1.937235713005066, "learning_rate": 5.989407799711122e-07, "loss": 1.0541, "step": 1245 }, { "epoch": 0.03010524794682209, "grad_norm": 1.5281552076339722, "learning_rate": 6.013480982185844e-07, "loss": 1.0317, "step": 1250 }, { "epoch": 0.03022566893860938, "grad_norm": 1.8217495679855347, "learning_rate": 6.037554164660568e-07, "loss": 1.0604, "step": 1255 }, { "epoch": 0.030346089930396666, "grad_norm": 1.394348382949829, "learning_rate": 6.061627347135291e-07, "loss": 1.0598, "step": 1260 }, { "epoch": 0.030466510922183956, "grad_norm": 1.6202285289764404, "learning_rate": 6.085700529610014e-07, "loss": 1.0583, "step": 1265 }, { "epoch": 0.030586931913971244, "grad_norm": 1.7116400003433228, "learning_rate": 6.109773712084737e-07, "loss": 1.0783, "step": 1270 }, { "epoch": 0.03070735290575853, "grad_norm": 1.7912710905075073, "learning_rate": 6.13384689455946e-07, "loss": 1.0799, "step": 1275 }, { "epoch": 0.03082777389754582, "grad_norm": 1.6622925996780396, "learning_rate": 6.157920077034183e-07, "loss": 1.0426, "step": 1280 }, { "epoch": 0.03094819488933311, "grad_norm": 1.9057449102401733, "learning_rate": 6.181993259508907e-07, "loss": 1.064, "step": 1285 }, { "epoch": 0.031068615881120396, "grad_norm": 1.7786000967025757, "learning_rate": 6.206066441983629e-07, "loss": 1.062, "step": 1290 }, { "epoch": 0.031189036872907687, "grad_norm": 1.4491337537765503, "learning_rate": 6.230139624458353e-07, "loss": 1.0659, "step": 1295 }, { "epoch": 0.031309457864694974, "grad_norm": 1.662503957748413, "learning_rate": 6.254212806933076e-07, "loss": 1.0827, "step": 1300 }, { "epoch": 0.03142987885648226, "grad_norm": 1.7556451559066772, "learning_rate": 6.2782859894078e-07, "loss": 1.0, "step": 1305 }, { "epoch": 0.03155029984826955, "grad_norm": 1.7393630743026733, "learning_rate": 6.302359171882523e-07, "loss": 1.046, "step": 1310 }, { "epoch": 0.031670720840056836, "grad_norm": 1.614888310432434, "learning_rate": 6.326432354357247e-07, "loss": 1.04, "step": 1315 }, { "epoch": 0.03179114183184413, "grad_norm": 1.588411808013916, "learning_rate": 6.350505536831969e-07, "loss": 1.1276, "step": 1320 }, { "epoch": 0.03191156282363142, "grad_norm": 1.5537513494491577, "learning_rate": 6.374578719306693e-07, "loss": 1.0437, "step": 1325 }, { "epoch": 0.032031983815418705, "grad_norm": 1.452207326889038, "learning_rate": 6.398651901781416e-07, "loss": 1.0394, "step": 1330 }, { "epoch": 0.03215240480720599, "grad_norm": 1.6652417182922363, "learning_rate": 6.422725084256138e-07, "loss": 1.021, "step": 1335 }, { "epoch": 0.03227282579899328, "grad_norm": 1.5968703031539917, "learning_rate": 6.446798266730862e-07, "loss": 1.0361, "step": 1340 }, { "epoch": 0.03239324679078057, "grad_norm": 1.6294167041778564, "learning_rate": 6.470871449205584e-07, "loss": 1.0288, "step": 1345 }, { "epoch": 0.032513667782567854, "grad_norm": 2.108187675476074, "learning_rate": 6.494944631680308e-07, "loss": 1.1072, "step": 1350 }, { "epoch": 0.03263408877435515, "grad_norm": 1.5419865846633911, "learning_rate": 6.519017814155031e-07, "loss": 0.9997, "step": 1355 }, { "epoch": 0.032754509766142435, "grad_norm": 1.8960027694702148, "learning_rate": 6.543090996629754e-07, "loss": 1.0657, "step": 1360 }, { "epoch": 0.03287493075792972, "grad_norm": 1.8205565214157104, "learning_rate": 6.567164179104477e-07, "loss": 1.0377, "step": 1365 }, { "epoch": 0.03299535174971701, "grad_norm": 1.4616715908050537, "learning_rate": 6.591237361579201e-07, "loss": 1.0416, "step": 1370 }, { "epoch": 0.0331157727415043, "grad_norm": 1.6263893842697144, "learning_rate": 6.615310544053923e-07, "loss": 1.0303, "step": 1375 }, { "epoch": 0.033236193733291584, "grad_norm": 1.5705087184906006, "learning_rate": 6.639383726528647e-07, "loss": 1.0414, "step": 1380 }, { "epoch": 0.03335661472507888, "grad_norm": 1.559043526649475, "learning_rate": 6.66345690900337e-07, "loss": 1.0892, "step": 1385 }, { "epoch": 0.033477035716866166, "grad_norm": 1.5846097469329834, "learning_rate": 6.687530091478093e-07, "loss": 1.0132, "step": 1390 }, { "epoch": 0.03359745670865345, "grad_norm": 1.6560180187225342, "learning_rate": 6.711603273952816e-07, "loss": 1.0384, "step": 1395 }, { "epoch": 0.03371787770044074, "grad_norm": 1.416507601737976, "learning_rate": 6.73567645642754e-07, "loss": 0.9696, "step": 1400 }, { "epoch": 0.03383829869222803, "grad_norm": 1.463869333267212, "learning_rate": 6.759749638902262e-07, "loss": 1.0099, "step": 1405 }, { "epoch": 0.033958719684015315, "grad_norm": 1.4500248432159424, "learning_rate": 6.783822821376986e-07, "loss": 1.0786, "step": 1410 }, { "epoch": 0.03407914067580261, "grad_norm": 1.8260598182678223, "learning_rate": 6.807896003851708e-07, "loss": 1.0813, "step": 1415 }, { "epoch": 0.034199561667589896, "grad_norm": 1.6602531671524048, "learning_rate": 6.831969186326432e-07, "loss": 0.9867, "step": 1420 }, { "epoch": 0.034319982659377184, "grad_norm": 1.6317486763000488, "learning_rate": 6.856042368801155e-07, "loss": 0.9797, "step": 1425 }, { "epoch": 0.03444040365116447, "grad_norm": 1.5111490488052368, "learning_rate": 6.880115551275878e-07, "loss": 1.0456, "step": 1430 }, { "epoch": 0.03456082464295176, "grad_norm": 1.5642411708831787, "learning_rate": 6.904188733750601e-07, "loss": 0.9404, "step": 1435 }, { "epoch": 0.034681245634739045, "grad_norm": 1.9178805351257324, "learning_rate": 6.928261916225325e-07, "loss": 1.0056, "step": 1440 }, { "epoch": 0.03480166662652633, "grad_norm": 1.6757012605667114, "learning_rate": 6.952335098700047e-07, "loss": 1.0829, "step": 1445 }, { "epoch": 0.03492208761831363, "grad_norm": 1.7237210273742676, "learning_rate": 6.976408281174772e-07, "loss": 1.0273, "step": 1450 }, { "epoch": 0.035042508610100914, "grad_norm": 2.0043551921844482, "learning_rate": 7.000481463649495e-07, "loss": 1.0238, "step": 1455 }, { "epoch": 0.0351629296018882, "grad_norm": 1.5664793252944946, "learning_rate": 7.024554646124218e-07, "loss": 0.9788, "step": 1460 }, { "epoch": 0.03528335059367549, "grad_norm": 1.595779299736023, "learning_rate": 7.048627828598941e-07, "loss": 1.0589, "step": 1465 }, { "epoch": 0.035403771585462776, "grad_norm": 1.4906885623931885, "learning_rate": 7.072701011073664e-07, "loss": 1.0575, "step": 1470 }, { "epoch": 0.03552419257725006, "grad_norm": 1.6824171543121338, "learning_rate": 7.096774193548387e-07, "loss": 1.074, "step": 1475 }, { "epoch": 0.03564461356903736, "grad_norm": 1.4851865768432617, "learning_rate": 7.12084737602311e-07, "loss": 1.0499, "step": 1480 }, { "epoch": 0.035765034560824645, "grad_norm": 1.6125143766403198, "learning_rate": 7.144920558497833e-07, "loss": 1.0288, "step": 1485 }, { "epoch": 0.03588545555261193, "grad_norm": 1.6875344514846802, "learning_rate": 7.168993740972556e-07, "loss": 1.0351, "step": 1490 }, { "epoch": 0.03600587654439922, "grad_norm": 1.6106306314468384, "learning_rate": 7.19306692344728e-07, "loss": 1.0325, "step": 1495 }, { "epoch": 0.03612629753618651, "grad_norm": 1.4459638595581055, "learning_rate": 7.217140105922002e-07, "loss": 1.0201, "step": 1500 }, { "epoch": 0.036246718527973794, "grad_norm": 1.626741647720337, "learning_rate": 7.241213288396726e-07, "loss": 1.0355, "step": 1505 }, { "epoch": 0.03636713951976109, "grad_norm": 1.4596201181411743, "learning_rate": 7.265286470871449e-07, "loss": 1.0033, "step": 1510 }, { "epoch": 0.036487560511548375, "grad_norm": 1.3791264295578003, "learning_rate": 7.289359653346172e-07, "loss": 1.0504, "step": 1515 }, { "epoch": 0.03660798150333566, "grad_norm": 1.399931788444519, "learning_rate": 7.313432835820895e-07, "loss": 1.0174, "step": 1520 }, { "epoch": 0.03672840249512295, "grad_norm": 1.4853743314743042, "learning_rate": 7.337506018295619e-07, "loss": 1.0472, "step": 1525 }, { "epoch": 0.03684882348691024, "grad_norm": 1.5996602773666382, "learning_rate": 7.361579200770341e-07, "loss": 1.0267, "step": 1530 }, { "epoch": 0.036969244478697524, "grad_norm": 1.67296302318573, "learning_rate": 7.385652383245065e-07, "loss": 1.0361, "step": 1535 }, { "epoch": 0.03708966547048481, "grad_norm": 1.6642286777496338, "learning_rate": 7.409725565719787e-07, "loss": 1.0457, "step": 1540 }, { "epoch": 0.037210086462272106, "grad_norm": 1.4878441095352173, "learning_rate": 7.433798748194511e-07, "loss": 1.0817, "step": 1545 }, { "epoch": 0.03733050745405939, "grad_norm": 1.816658854484558, "learning_rate": 7.457871930669234e-07, "loss": 1.0329, "step": 1550 }, { "epoch": 0.03745092844584668, "grad_norm": 1.480438470840454, "learning_rate": 7.481945113143957e-07, "loss": 1.0022, "step": 1555 }, { "epoch": 0.03757134943763397, "grad_norm": 1.4642038345336914, "learning_rate": 7.50601829561868e-07, "loss": 1.0261, "step": 1560 }, { "epoch": 0.037691770429421255, "grad_norm": 1.5570502281188965, "learning_rate": 7.530091478093404e-07, "loss": 1.0731, "step": 1565 }, { "epoch": 0.03781219142120854, "grad_norm": 1.5301499366760254, "learning_rate": 7.554164660568126e-07, "loss": 1.0498, "step": 1570 }, { "epoch": 0.037932612412995836, "grad_norm": 1.3333464860916138, "learning_rate": 7.57823784304285e-07, "loss": 0.9946, "step": 1575 }, { "epoch": 0.038053033404783124, "grad_norm": 1.6789193153381348, "learning_rate": 7.602311025517573e-07, "loss": 0.9817, "step": 1580 }, { "epoch": 0.03817345439657041, "grad_norm": 1.4923834800720215, "learning_rate": 7.626384207992296e-07, "loss": 1.0567, "step": 1585 }, { "epoch": 0.0382938753883577, "grad_norm": 1.7668758630752563, "learning_rate": 7.650457390467019e-07, "loss": 0.984, "step": 1590 }, { "epoch": 0.038414296380144985, "grad_norm": 1.4018738269805908, "learning_rate": 7.674530572941742e-07, "loss": 0.9654, "step": 1595 }, { "epoch": 0.03853471737193227, "grad_norm": 1.733635425567627, "learning_rate": 7.698603755416466e-07, "loss": 1.0245, "step": 1600 }, { "epoch": 0.03865513836371957, "grad_norm": 1.5820351839065552, "learning_rate": 7.722676937891189e-07, "loss": 1.0502, "step": 1605 }, { "epoch": 0.038775559355506854, "grad_norm": 1.7299373149871826, "learning_rate": 7.746750120365913e-07, "loss": 1.0013, "step": 1610 }, { "epoch": 0.03889598034729414, "grad_norm": 1.5306358337402344, "learning_rate": 7.770823302840635e-07, "loss": 1.0434, "step": 1615 }, { "epoch": 0.03901640133908143, "grad_norm": 1.6972306966781616, "learning_rate": 7.794896485315359e-07, "loss": 1.0443, "step": 1620 }, { "epoch": 0.039136822330868716, "grad_norm": 1.8630884885787964, "learning_rate": 7.818969667790081e-07, "loss": 0.9966, "step": 1625 }, { "epoch": 0.039257243322656, "grad_norm": 1.4289274215698242, "learning_rate": 7.843042850264805e-07, "loss": 1.0051, "step": 1630 }, { "epoch": 0.03937766431444329, "grad_norm": 1.3741828203201294, "learning_rate": 7.867116032739528e-07, "loss": 0.9737, "step": 1635 }, { "epoch": 0.039498085306230585, "grad_norm": 1.6088579893112183, "learning_rate": 7.891189215214251e-07, "loss": 1.0336, "step": 1640 }, { "epoch": 0.03961850629801787, "grad_norm": 1.3409250974655151, "learning_rate": 7.915262397688974e-07, "loss": 1.0477, "step": 1645 }, { "epoch": 0.03973892728980516, "grad_norm": 1.3570778369903564, "learning_rate": 7.939335580163698e-07, "loss": 0.9611, "step": 1650 }, { "epoch": 0.03985934828159245, "grad_norm": 1.4607723951339722, "learning_rate": 7.96340876263842e-07, "loss": 1.0476, "step": 1655 }, { "epoch": 0.039979769273379734, "grad_norm": 1.4103513956069946, "learning_rate": 7.987481945113144e-07, "loss": 1.0038, "step": 1660 }, { "epoch": 0.04010019026516702, "grad_norm": 1.7692028284072876, "learning_rate": 8.011555127587867e-07, "loss": 1.0486, "step": 1665 }, { "epoch": 0.040220611256954315, "grad_norm": 1.7130166292190552, "learning_rate": 8.03562831006259e-07, "loss": 1.0355, "step": 1670 }, { "epoch": 0.0403410322487416, "grad_norm": 1.7267684936523438, "learning_rate": 8.059701492537313e-07, "loss": 0.9746, "step": 1675 }, { "epoch": 0.04046145324052889, "grad_norm": 1.5484259128570557, "learning_rate": 8.083774675012036e-07, "loss": 1.08, "step": 1680 }, { "epoch": 0.04058187423231618, "grad_norm": 1.6294459104537964, "learning_rate": 8.107847857486759e-07, "loss": 1.0619, "step": 1685 }, { "epoch": 0.040702295224103464, "grad_norm": 1.4222997426986694, "learning_rate": 8.131921039961483e-07, "loss": 1.0091, "step": 1690 }, { "epoch": 0.04082271621589075, "grad_norm": 1.7014527320861816, "learning_rate": 8.155994222436205e-07, "loss": 1.0323, "step": 1695 }, { "epoch": 0.040943137207678046, "grad_norm": 1.7980411052703857, "learning_rate": 8.180067404910929e-07, "loss": 1.064, "step": 1700 }, { "epoch": 0.04106355819946533, "grad_norm": 1.468579888343811, "learning_rate": 8.204140587385652e-07, "loss": 0.9758, "step": 1705 }, { "epoch": 0.04118397919125262, "grad_norm": 1.6334229707717896, "learning_rate": 8.228213769860375e-07, "loss": 1.0022, "step": 1710 }, { "epoch": 0.04130440018303991, "grad_norm": 1.5733845233917236, "learning_rate": 8.252286952335098e-07, "loss": 1.0361, "step": 1715 }, { "epoch": 0.041424821174827195, "grad_norm": 1.8664427995681763, "learning_rate": 8.276360134809822e-07, "loss": 1.0545, "step": 1720 }, { "epoch": 0.04154524216661448, "grad_norm": 1.562538743019104, "learning_rate": 8.300433317284544e-07, "loss": 1.0639, "step": 1725 }, { "epoch": 0.04166566315840177, "grad_norm": 1.882519245147705, "learning_rate": 8.324506499759267e-07, "loss": 0.9948, "step": 1730 }, { "epoch": 0.041786084150189064, "grad_norm": 1.424924373626709, "learning_rate": 8.34857968223399e-07, "loss": 1.0339, "step": 1735 }, { "epoch": 0.04190650514197635, "grad_norm": 1.9680640697479248, "learning_rate": 8.372652864708713e-07, "loss": 1.0502, "step": 1740 }, { "epoch": 0.04202692613376364, "grad_norm": 1.8039034605026245, "learning_rate": 8.396726047183438e-07, "loss": 1.0537, "step": 1745 }, { "epoch": 0.042147347125550925, "grad_norm": 1.5843199491500854, "learning_rate": 8.42079922965816e-07, "loss": 1.0155, "step": 1750 }, { "epoch": 0.04226776811733821, "grad_norm": 1.2623687982559204, "learning_rate": 8.444872412132884e-07, "loss": 0.992, "step": 1755 }, { "epoch": 0.0423881891091255, "grad_norm": 2.0166735649108887, "learning_rate": 8.468945594607607e-07, "loss": 1.0148, "step": 1760 }, { "epoch": 0.042508610100912794, "grad_norm": 1.8024189472198486, "learning_rate": 8.49301877708233e-07, "loss": 1.0526, "step": 1765 }, { "epoch": 0.04262903109270008, "grad_norm": 1.444460391998291, "learning_rate": 8.517091959557053e-07, "loss": 1.0287, "step": 1770 }, { "epoch": 0.04274945208448737, "grad_norm": 1.9382981061935425, "learning_rate": 8.541165142031777e-07, "loss": 1.0336, "step": 1775 }, { "epoch": 0.042869873076274656, "grad_norm": 1.5555084943771362, "learning_rate": 8.565238324506499e-07, "loss": 1.0116, "step": 1780 }, { "epoch": 0.04299029406806194, "grad_norm": 1.4860131740570068, "learning_rate": 8.589311506981223e-07, "loss": 1.0343, "step": 1785 }, { "epoch": 0.04311071505984923, "grad_norm": 1.5422332286834717, "learning_rate": 8.613384689455946e-07, "loss": 1.0307, "step": 1790 }, { "epoch": 0.04323113605163652, "grad_norm": 1.5868923664093018, "learning_rate": 8.637457871930669e-07, "loss": 1.0291, "step": 1795 }, { "epoch": 0.04335155704342381, "grad_norm": 1.5933997631072998, "learning_rate": 8.661531054405392e-07, "loss": 1.0708, "step": 1800 }, { "epoch": 0.0434719780352111, "grad_norm": 1.8912562131881714, "learning_rate": 8.685604236880116e-07, "loss": 1.0921, "step": 1805 }, { "epoch": 0.04359239902699839, "grad_norm": 1.5516586303710938, "learning_rate": 8.709677419354838e-07, "loss": 1.0165, "step": 1810 }, { "epoch": 0.043712820018785674, "grad_norm": 1.5522100925445557, "learning_rate": 8.733750601829562e-07, "loss": 1.0371, "step": 1815 }, { "epoch": 0.04383324101057296, "grad_norm": 1.924184799194336, "learning_rate": 8.757823784304284e-07, "loss": 1.0591, "step": 1820 }, { "epoch": 0.04395366200236025, "grad_norm": 1.5833402872085571, "learning_rate": 8.781896966779008e-07, "loss": 1.0504, "step": 1825 }, { "epoch": 0.04407408299414754, "grad_norm": 1.7274823188781738, "learning_rate": 8.805970149253731e-07, "loss": 1.0242, "step": 1830 }, { "epoch": 0.04419450398593483, "grad_norm": 1.5076647996902466, "learning_rate": 8.830043331728454e-07, "loss": 0.9765, "step": 1835 }, { "epoch": 0.04431492497772212, "grad_norm": 1.5085982084274292, "learning_rate": 8.854116514203177e-07, "loss": 1.0185, "step": 1840 }, { "epoch": 0.044435345969509404, "grad_norm": 1.6915234327316284, "learning_rate": 8.878189696677901e-07, "loss": 1.0761, "step": 1845 }, { "epoch": 0.04455576696129669, "grad_norm": 1.6578340530395508, "learning_rate": 8.902262879152623e-07, "loss": 1.0643, "step": 1850 }, { "epoch": 0.04467618795308398, "grad_norm": 1.6258350610733032, "learning_rate": 8.926336061627347e-07, "loss": 1.0495, "step": 1855 }, { "epoch": 0.04479660894487127, "grad_norm": 1.6774884462356567, "learning_rate": 8.95040924410207e-07, "loss": 1.0767, "step": 1860 }, { "epoch": 0.04491702993665856, "grad_norm": 1.6079150438308716, "learning_rate": 8.974482426576792e-07, "loss": 0.9649, "step": 1865 }, { "epoch": 0.04503745092844585, "grad_norm": 1.6209561824798584, "learning_rate": 8.998555609051516e-07, "loss": 0.9927, "step": 1870 }, { "epoch": 0.045157871920233135, "grad_norm": 1.641926884651184, "learning_rate": 9.022628791526238e-07, "loss": 1.029, "step": 1875 }, { "epoch": 0.04527829291202042, "grad_norm": 1.6065603494644165, "learning_rate": 9.046701974000962e-07, "loss": 1.0018, "step": 1880 }, { "epoch": 0.04539871390380771, "grad_norm": 1.5539517402648926, "learning_rate": 9.070775156475685e-07, "loss": 1.0798, "step": 1885 }, { "epoch": 0.045519134895595, "grad_norm": 1.6610114574432373, "learning_rate": 9.094848338950409e-07, "loss": 1.0386, "step": 1890 }, { "epoch": 0.04563955588738229, "grad_norm": 1.5111974477767944, "learning_rate": 9.118921521425132e-07, "loss": 1.0406, "step": 1895 }, { "epoch": 0.04575997687916958, "grad_norm": 2.0480804443359375, "learning_rate": 9.142994703899856e-07, "loss": 1.0007, "step": 1900 }, { "epoch": 0.045880397870956866, "grad_norm": 1.7117605209350586, "learning_rate": 9.167067886374578e-07, "loss": 0.9986, "step": 1905 }, { "epoch": 0.04600081886274415, "grad_norm": 1.505280613899231, "learning_rate": 9.191141068849302e-07, "loss": 1.0267, "step": 1910 }, { "epoch": 0.04612123985453144, "grad_norm": 1.8697929382324219, "learning_rate": 9.215214251324025e-07, "loss": 1.0041, "step": 1915 }, { "epoch": 0.04624166084631873, "grad_norm": 1.4194835424423218, "learning_rate": 9.239287433798748e-07, "loss": 1.0155, "step": 1920 }, { "epoch": 0.04636208183810602, "grad_norm": 1.8681567907333374, "learning_rate": 9.263360616273471e-07, "loss": 1.0041, "step": 1925 }, { "epoch": 0.04648250282989331, "grad_norm": 1.5799516439437866, "learning_rate": 9.287433798748195e-07, "loss": 0.996, "step": 1930 }, { "epoch": 0.046602923821680596, "grad_norm": 1.585576057434082, "learning_rate": 9.311506981222917e-07, "loss": 1.0328, "step": 1935 }, { "epoch": 0.04672334481346788, "grad_norm": 1.6536821126937866, "learning_rate": 9.335580163697641e-07, "loss": 1.0476, "step": 1940 }, { "epoch": 0.04684376580525517, "grad_norm": 1.5552594661712646, "learning_rate": 9.359653346172363e-07, "loss": 1.0186, "step": 1945 }, { "epoch": 0.04696418679704246, "grad_norm": 1.459959864616394, "learning_rate": 9.383726528647087e-07, "loss": 1.0039, "step": 1950 }, { "epoch": 0.04708460778882975, "grad_norm": 1.63764226436615, "learning_rate": 9.40779971112181e-07, "loss": 1.0606, "step": 1955 }, { "epoch": 0.04720502878061704, "grad_norm": 1.6388280391693115, "learning_rate": 9.431872893596533e-07, "loss": 0.9829, "step": 1960 }, { "epoch": 0.04732544977240433, "grad_norm": 1.6173887252807617, "learning_rate": 9.455946076071256e-07, "loss": 1.0445, "step": 1965 }, { "epoch": 0.047445870764191614, "grad_norm": 1.4738174676895142, "learning_rate": 9.48001925854598e-07, "loss": 1.0223, "step": 1970 }, { "epoch": 0.0475662917559789, "grad_norm": 1.7925279140472412, "learning_rate": 9.504092441020702e-07, "loss": 1.0145, "step": 1975 }, { "epoch": 0.04768671274776619, "grad_norm": 1.609230399131775, "learning_rate": 9.528165623495426e-07, "loss": 1.0355, "step": 1980 }, { "epoch": 0.047807133739553476, "grad_norm": 1.5456503629684448, "learning_rate": 9.552238805970149e-07, "loss": 0.9816, "step": 1985 }, { "epoch": 0.04792755473134077, "grad_norm": 1.7006056308746338, "learning_rate": 9.576311988444872e-07, "loss": 1.0325, "step": 1990 }, { "epoch": 0.04804797572312806, "grad_norm": 2.056192636489868, "learning_rate": 9.600385170919594e-07, "loss": 1.0202, "step": 1995 }, { "epoch": 0.048168396714915344, "grad_norm": 1.6051387786865234, "learning_rate": 9.624458353394317e-07, "loss": 0.9992, "step": 2000 }, { "epoch": 0.04828881770670263, "grad_norm": 1.550959587097168, "learning_rate": 9.648531535869041e-07, "loss": 0.9809, "step": 2005 }, { "epoch": 0.04840923869848992, "grad_norm": 1.8543497323989868, "learning_rate": 9.672604718343765e-07, "loss": 1.0177, "step": 2010 }, { "epoch": 0.048529659690277206, "grad_norm": 1.6538995504379272, "learning_rate": 9.696677900818486e-07, "loss": 1.0336, "step": 2015 }, { "epoch": 0.0486500806820645, "grad_norm": 1.763832926750183, "learning_rate": 9.72075108329321e-07, "loss": 0.9986, "step": 2020 }, { "epoch": 0.04877050167385179, "grad_norm": 1.794128656387329, "learning_rate": 9.744824265767934e-07, "loss": 1.0349, "step": 2025 }, { "epoch": 0.048890922665639075, "grad_norm": 1.6576578617095947, "learning_rate": 9.768897448242657e-07, "loss": 1.0408, "step": 2030 }, { "epoch": 0.04901134365742636, "grad_norm": 1.539139986038208, "learning_rate": 9.792970630717381e-07, "loss": 0.9983, "step": 2035 }, { "epoch": 0.04913176464921365, "grad_norm": 1.5743350982666016, "learning_rate": 9.817043813192105e-07, "loss": 1.0335, "step": 2040 }, { "epoch": 0.04925218564100094, "grad_norm": 1.6326196193695068, "learning_rate": 9.841116995666826e-07, "loss": 1.0018, "step": 2045 }, { "epoch": 0.04937260663278823, "grad_norm": 1.6246923208236694, "learning_rate": 9.86519017814155e-07, "loss": 0.9942, "step": 2050 }, { "epoch": 0.04949302762457552, "grad_norm": 1.564854621887207, "learning_rate": 9.889263360616274e-07, "loss": 1.0211, "step": 2055 }, { "epoch": 0.049613448616362806, "grad_norm": 1.5778380632400513, "learning_rate": 9.913336543090997e-07, "loss": 1.0246, "step": 2060 }, { "epoch": 0.04973386960815009, "grad_norm": 1.7073304653167725, "learning_rate": 9.937409725565719e-07, "loss": 1.0102, "step": 2065 }, { "epoch": 0.04985429059993738, "grad_norm": 1.4603596925735474, "learning_rate": 9.961482908040443e-07, "loss": 0.9871, "step": 2070 }, { "epoch": 0.04997471159172467, "grad_norm": 1.6223366260528564, "learning_rate": 9.985556090515166e-07, "loss": 1.0269, "step": 2075 }, { "epoch": 0.050095132583511955, "grad_norm": 1.8474642038345337, "learning_rate": 9.999492952033262e-07, "loss": 0.9398, "step": 2080 }, { "epoch": 0.05021555357529925, "grad_norm": 1.6487759351730347, "learning_rate": 9.998225332116418e-07, "loss": 0.9937, "step": 2085 }, { "epoch": 0.050335974567086536, "grad_norm": 1.697744607925415, "learning_rate": 9.996957712199575e-07, "loss": 0.9984, "step": 2090 }, { "epoch": 0.05045639555887382, "grad_norm": 1.4020692110061646, "learning_rate": 9.995690092282729e-07, "loss": 0.9828, "step": 2095 }, { "epoch": 0.05057681655066111, "grad_norm": 1.589512586593628, "learning_rate": 9.994422472365885e-07, "loss": 0.9955, "step": 2100 }, { "epoch": 0.0506972375424484, "grad_norm": 1.4988205432891846, "learning_rate": 9.993154852449042e-07, "loss": 1.0136, "step": 2105 }, { "epoch": 0.050817658534235685, "grad_norm": 1.6529805660247803, "learning_rate": 9.991887232532196e-07, "loss": 0.9957, "step": 2110 }, { "epoch": 0.05093807952602298, "grad_norm": 1.5760482549667358, "learning_rate": 9.990619612615353e-07, "loss": 0.9742, "step": 2115 }, { "epoch": 0.05105850051781027, "grad_norm": 1.7098240852355957, "learning_rate": 9.98935199269851e-07, "loss": 0.9826, "step": 2120 }, { "epoch": 0.051178921509597554, "grad_norm": 1.4694714546203613, "learning_rate": 9.988084372781663e-07, "loss": 1.0453, "step": 2125 }, { "epoch": 0.05129934250138484, "grad_norm": 1.719585657119751, "learning_rate": 9.98681675286482e-07, "loss": 1.0561, "step": 2130 }, { "epoch": 0.05141976349317213, "grad_norm": 1.5589990615844727, "learning_rate": 9.985549132947976e-07, "loss": 0.9995, "step": 2135 }, { "epoch": 0.051540184484959416, "grad_norm": 1.570941686630249, "learning_rate": 9.984281513031133e-07, "loss": 1.0074, "step": 2140 }, { "epoch": 0.05166060547674671, "grad_norm": 1.4263135194778442, "learning_rate": 9.98301389311429e-07, "loss": 0.9939, "step": 2145 }, { "epoch": 0.051781026468534, "grad_norm": 1.6503669023513794, "learning_rate": 9.981746273197443e-07, "loss": 0.9384, "step": 2150 }, { "epoch": 0.051901447460321284, "grad_norm": 1.5674190521240234, "learning_rate": 9.9804786532806e-07, "loss": 1.0988, "step": 2155 }, { "epoch": 0.05202186845210857, "grad_norm": 1.8552207946777344, "learning_rate": 9.979211033363756e-07, "loss": 0.9471, "step": 2160 }, { "epoch": 0.05214228944389586, "grad_norm": 1.5317758321762085, "learning_rate": 9.977943413446913e-07, "loss": 1.0026, "step": 2165 }, { "epoch": 0.052262710435683146, "grad_norm": 1.5184342861175537, "learning_rate": 9.976675793530067e-07, "loss": 0.9657, "step": 2170 }, { "epoch": 0.052383131427470433, "grad_norm": 2.0550878047943115, "learning_rate": 9.975408173613224e-07, "loss": 1.0551, "step": 2175 }, { "epoch": 0.05250355241925773, "grad_norm": 1.670849084854126, "learning_rate": 9.97414055369638e-07, "loss": 1.0307, "step": 2180 }, { "epoch": 0.052623973411045015, "grad_norm": 1.8367807865142822, "learning_rate": 9.972872933779534e-07, "loss": 1.0552, "step": 2185 }, { "epoch": 0.0527443944028323, "grad_norm": 1.5710960626602173, "learning_rate": 9.97160531386269e-07, "loss": 1.026, "step": 2190 }, { "epoch": 0.05286481539461959, "grad_norm": 1.4356889724731445, "learning_rate": 9.970337693945847e-07, "loss": 0.9868, "step": 2195 }, { "epoch": 0.05298523638640688, "grad_norm": 1.4902724027633667, "learning_rate": 9.969070074029002e-07, "loss": 1.0166, "step": 2200 }, { "epoch": 0.053105657378194164, "grad_norm": 1.6974903345108032, "learning_rate": 9.967802454112158e-07, "loss": 1.0271, "step": 2205 }, { "epoch": 0.05322607836998146, "grad_norm": 1.4314632415771484, "learning_rate": 9.966534834195315e-07, "loss": 0.983, "step": 2210 }, { "epoch": 0.053346499361768746, "grad_norm": 1.472601294517517, "learning_rate": 9.96526721427847e-07, "loss": 1.024, "step": 2215 }, { "epoch": 0.05346692035355603, "grad_norm": 1.6418883800506592, "learning_rate": 9.963999594361625e-07, "loss": 1.0011, "step": 2220 }, { "epoch": 0.05358734134534332, "grad_norm": 1.492514729499817, "learning_rate": 9.962731974444782e-07, "loss": 1.0287, "step": 2225 }, { "epoch": 0.05370776233713061, "grad_norm": 1.6566858291625977, "learning_rate": 9.961464354527938e-07, "loss": 1.0115, "step": 2230 }, { "epoch": 0.053828183328917895, "grad_norm": 2.257582187652588, "learning_rate": 9.960196734611095e-07, "loss": 1.0276, "step": 2235 }, { "epoch": 0.05394860432070519, "grad_norm": 1.6120405197143555, "learning_rate": 9.95892911469425e-07, "loss": 1.0478, "step": 2240 }, { "epoch": 0.054069025312492476, "grad_norm": 1.6881752014160156, "learning_rate": 9.957661494777405e-07, "loss": 1.0088, "step": 2245 }, { "epoch": 0.05418944630427976, "grad_norm": 1.7539931535720825, "learning_rate": 9.956393874860562e-07, "loss": 1.0064, "step": 2250 }, { "epoch": 0.05430986729606705, "grad_norm": 1.7022444009780884, "learning_rate": 9.955126254943718e-07, "loss": 1.0547, "step": 2255 }, { "epoch": 0.05443028828785434, "grad_norm": 1.4357643127441406, "learning_rate": 9.953858635026873e-07, "loss": 1.0532, "step": 2260 }, { "epoch": 0.054550709279641625, "grad_norm": 1.4944684505462646, "learning_rate": 9.95259101511003e-07, "loss": 0.996, "step": 2265 }, { "epoch": 0.05467113027142891, "grad_norm": 1.4375295639038086, "learning_rate": 9.951323395193186e-07, "loss": 1.0292, "step": 2270 }, { "epoch": 0.05479155126321621, "grad_norm": 1.7429416179656982, "learning_rate": 9.95005577527634e-07, "loss": 0.9918, "step": 2275 }, { "epoch": 0.054911972255003494, "grad_norm": 1.5450338125228882, "learning_rate": 9.948788155359496e-07, "loss": 1.0145, "step": 2280 }, { "epoch": 0.05503239324679078, "grad_norm": 1.7032642364501953, "learning_rate": 9.947520535442653e-07, "loss": 1.0643, "step": 2285 }, { "epoch": 0.05515281423857807, "grad_norm": 1.4787014722824097, "learning_rate": 9.946252915525807e-07, "loss": 0.972, "step": 2290 }, { "epoch": 0.055273235230365356, "grad_norm": 1.6551034450531006, "learning_rate": 9.944985295608964e-07, "loss": 1.0394, "step": 2295 }, { "epoch": 0.05539365622215264, "grad_norm": 1.5106158256530762, "learning_rate": 9.94371767569212e-07, "loss": 1.0251, "step": 2300 }, { "epoch": 0.05551407721393994, "grad_norm": 1.7415740489959717, "learning_rate": 9.942450055775276e-07, "loss": 1.0066, "step": 2305 }, { "epoch": 0.055634498205727224, "grad_norm": 1.625821590423584, "learning_rate": 9.94118243585843e-07, "loss": 0.9502, "step": 2310 }, { "epoch": 0.05575491919751451, "grad_norm": 1.480774164199829, "learning_rate": 9.939914815941587e-07, "loss": 0.9622, "step": 2315 }, { "epoch": 0.0558753401893018, "grad_norm": 1.4830437898635864, "learning_rate": 9.938647196024744e-07, "loss": 0.9566, "step": 2320 }, { "epoch": 0.055995761181089086, "grad_norm": 1.5242736339569092, "learning_rate": 9.9373795761079e-07, "loss": 0.9981, "step": 2325 }, { "epoch": 0.056116182172876374, "grad_norm": 1.4055622816085815, "learning_rate": 9.936111956191057e-07, "loss": 1.0229, "step": 2330 }, { "epoch": 0.05623660316466366, "grad_norm": 2.0330963134765625, "learning_rate": 9.93484433627421e-07, "loss": 1.0014, "step": 2335 }, { "epoch": 0.056357024156450955, "grad_norm": 1.824340581893921, "learning_rate": 9.933576716357367e-07, "loss": 0.9525, "step": 2340 }, { "epoch": 0.05647744514823824, "grad_norm": 1.5875630378723145, "learning_rate": 9.932309096440524e-07, "loss": 0.9849, "step": 2345 }, { "epoch": 0.05659786614002553, "grad_norm": 1.5911989212036133, "learning_rate": 9.931041476523678e-07, "loss": 1.0125, "step": 2350 }, { "epoch": 0.05671828713181282, "grad_norm": 1.567030906677246, "learning_rate": 9.929773856606835e-07, "loss": 1.0127, "step": 2355 }, { "epoch": 0.056838708123600104, "grad_norm": 1.8666136264801025, "learning_rate": 9.92850623668999e-07, "loss": 1.0164, "step": 2360 }, { "epoch": 0.05695912911538739, "grad_norm": 1.4268726110458374, "learning_rate": 9.927238616773145e-07, "loss": 0.9955, "step": 2365 }, { "epoch": 0.057079550107174686, "grad_norm": 1.6934856176376343, "learning_rate": 9.925970996856302e-07, "loss": 0.978, "step": 2370 }, { "epoch": 0.05719997109896197, "grad_norm": 1.5638610124588013, "learning_rate": 9.924703376939458e-07, "loss": 1.0284, "step": 2375 }, { "epoch": 0.05732039209074926, "grad_norm": 1.6345804929733276, "learning_rate": 9.923435757022613e-07, "loss": 1.0061, "step": 2380 }, { "epoch": 0.05744081308253655, "grad_norm": 1.7583974599838257, "learning_rate": 9.92216813710577e-07, "loss": 1.0355, "step": 2385 }, { "epoch": 0.057561234074323835, "grad_norm": 1.576433539390564, "learning_rate": 9.920900517188925e-07, "loss": 0.9556, "step": 2390 }, { "epoch": 0.05768165506611112, "grad_norm": 1.4995986223220825, "learning_rate": 9.919632897272082e-07, "loss": 0.9899, "step": 2395 }, { "epoch": 0.057802076057898416, "grad_norm": 1.8596760034561157, "learning_rate": 9.918365277355238e-07, "loss": 1.0413, "step": 2400 }, { "epoch": 0.0579224970496857, "grad_norm": 2.1781084537506104, "learning_rate": 9.917097657438393e-07, "loss": 1.0664, "step": 2405 }, { "epoch": 0.05804291804147299, "grad_norm": 1.719193458557129, "learning_rate": 9.91583003752155e-07, "loss": 1.0398, "step": 2410 }, { "epoch": 0.05816333903326028, "grad_norm": 1.5527453422546387, "learning_rate": 9.914562417604706e-07, "loss": 0.8899, "step": 2415 }, { "epoch": 0.058283760025047565, "grad_norm": 2.198544979095459, "learning_rate": 9.913294797687862e-07, "loss": 1.0026, "step": 2420 }, { "epoch": 0.05840418101683485, "grad_norm": 1.8652313947677612, "learning_rate": 9.912027177771016e-07, "loss": 1.0415, "step": 2425 }, { "epoch": 0.05852460200862214, "grad_norm": 1.5211336612701416, "learning_rate": 9.910759557854173e-07, "loss": 0.9695, "step": 2430 }, { "epoch": 0.058645023000409434, "grad_norm": 1.603398323059082, "learning_rate": 9.90949193793733e-07, "loss": 0.973, "step": 2435 }, { "epoch": 0.05876544399219672, "grad_norm": 1.6345428228378296, "learning_rate": 9.908224318020484e-07, "loss": 0.9492, "step": 2440 }, { "epoch": 0.05888586498398401, "grad_norm": 1.4910589456558228, "learning_rate": 9.90695669810364e-07, "loss": 0.9742, "step": 2445 }, { "epoch": 0.059006285975771296, "grad_norm": 1.7999168634414673, "learning_rate": 9.905689078186796e-07, "loss": 0.9763, "step": 2450 }, { "epoch": 0.05912670696755858, "grad_norm": 1.7445992231369019, "learning_rate": 9.90442145826995e-07, "loss": 1.0366, "step": 2455 }, { "epoch": 0.05924712795934587, "grad_norm": 1.6648802757263184, "learning_rate": 9.903153838353107e-07, "loss": 1.0165, "step": 2460 }, { "epoch": 0.059367548951133164, "grad_norm": 1.5104074478149414, "learning_rate": 9.901886218436264e-07, "loss": 0.9897, "step": 2465 }, { "epoch": 0.05948796994292045, "grad_norm": 1.7375459671020508, "learning_rate": 9.900618598519418e-07, "loss": 1.0129, "step": 2470 }, { "epoch": 0.05960839093470774, "grad_norm": 1.4074209928512573, "learning_rate": 9.899350978602574e-07, "loss": 1.0411, "step": 2475 }, { "epoch": 0.059728811926495026, "grad_norm": 1.6192816495895386, "learning_rate": 9.89808335868573e-07, "loss": 0.995, "step": 2480 }, { "epoch": 0.059849232918282314, "grad_norm": 1.6760953664779663, "learning_rate": 9.896815738768887e-07, "loss": 1.0046, "step": 2485 }, { "epoch": 0.0599696539100696, "grad_norm": 1.579380989074707, "learning_rate": 9.895548118852044e-07, "loss": 0.9926, "step": 2490 }, { "epoch": 0.060090074901856895, "grad_norm": 1.7261121273040771, "learning_rate": 9.894280498935198e-07, "loss": 0.9732, "step": 2495 }, { "epoch": 0.06021049589364418, "grad_norm": 1.5446956157684326, "learning_rate": 9.893012879018355e-07, "loss": 1.0288, "step": 2500 }, { "epoch": 0.06033091688543147, "grad_norm": 1.4820080995559692, "learning_rate": 9.89174525910151e-07, "loss": 1.0583, "step": 2505 }, { "epoch": 0.06045133787721876, "grad_norm": 1.5800060033798218, "learning_rate": 9.890477639184667e-07, "loss": 1.0293, "step": 2510 }, { "epoch": 0.060571758869006044, "grad_norm": 1.6209781169891357, "learning_rate": 9.889210019267822e-07, "loss": 1.0297, "step": 2515 }, { "epoch": 0.06069217986079333, "grad_norm": 1.3738642930984497, "learning_rate": 9.887942399350978e-07, "loss": 1.0049, "step": 2520 }, { "epoch": 0.06081260085258062, "grad_norm": 1.6053427457809448, "learning_rate": 9.886674779434135e-07, "loss": 0.9941, "step": 2525 }, { "epoch": 0.06093302184436791, "grad_norm": 1.4712995290756226, "learning_rate": 9.88540715951729e-07, "loss": 0.9638, "step": 2530 }, { "epoch": 0.0610534428361552, "grad_norm": 1.7111667394638062, "learning_rate": 9.884139539600445e-07, "loss": 1.0174, "step": 2535 }, { "epoch": 0.06117386382794249, "grad_norm": 1.4965132474899292, "learning_rate": 9.882871919683602e-07, "loss": 0.9886, "step": 2540 }, { "epoch": 0.061294284819729775, "grad_norm": 1.615824580192566, "learning_rate": 9.881604299766756e-07, "loss": 0.9891, "step": 2545 }, { "epoch": 0.06141470581151706, "grad_norm": 1.4951409101486206, "learning_rate": 9.880336679849913e-07, "loss": 1.0106, "step": 2550 }, { "epoch": 0.06153512680330435, "grad_norm": 1.630304217338562, "learning_rate": 9.87906905993307e-07, "loss": 0.937, "step": 2555 }, { "epoch": 0.06165554779509164, "grad_norm": 1.622524380683899, "learning_rate": 9.877801440016226e-07, "loss": 0.9855, "step": 2560 }, { "epoch": 0.06177596878687893, "grad_norm": 1.665725588798523, "learning_rate": 9.87653382009938e-07, "loss": 1.0499, "step": 2565 }, { "epoch": 0.06189638977866622, "grad_norm": 1.747391700744629, "learning_rate": 9.875266200182536e-07, "loss": 0.9747, "step": 2570 }, { "epoch": 0.062016810770453505, "grad_norm": 1.7850977182388306, "learning_rate": 9.873998580265693e-07, "loss": 0.9773, "step": 2575 }, { "epoch": 0.06213723176224079, "grad_norm": 1.7369146347045898, "learning_rate": 9.87273096034885e-07, "loss": 1.0316, "step": 2580 }, { "epoch": 0.06225765275402808, "grad_norm": 1.810365915298462, "learning_rate": 9.871463340432006e-07, "loss": 0.9694, "step": 2585 }, { "epoch": 0.062378073745815374, "grad_norm": 1.6777886152267456, "learning_rate": 9.87019572051516e-07, "loss": 0.9939, "step": 2590 }, { "epoch": 0.06249849473760266, "grad_norm": 1.7758405208587646, "learning_rate": 9.868928100598316e-07, "loss": 1.0482, "step": 2595 }, { "epoch": 0.06261891572938995, "grad_norm": 1.699005365371704, "learning_rate": 9.867660480681473e-07, "loss": 1.0163, "step": 2600 }, { "epoch": 0.06273933672117724, "grad_norm": 1.5089378356933594, "learning_rate": 9.866392860764627e-07, "loss": 0.9912, "step": 2605 }, { "epoch": 0.06285975771296452, "grad_norm": 1.4108537435531616, "learning_rate": 9.865125240847784e-07, "loss": 1.0056, "step": 2610 }, { "epoch": 0.06298017870475181, "grad_norm": 1.6560865640640259, "learning_rate": 9.86385762093094e-07, "loss": 1.024, "step": 2615 }, { "epoch": 0.0631005996965391, "grad_norm": 1.54850172996521, "learning_rate": 9.862590001014094e-07, "loss": 1.0085, "step": 2620 }, { "epoch": 0.06322102068832638, "grad_norm": 1.6445883512496948, "learning_rate": 9.86132238109725e-07, "loss": 1.0119, "step": 2625 }, { "epoch": 0.06334144168011367, "grad_norm": 1.811767816543579, "learning_rate": 9.860054761180407e-07, "loss": 0.9318, "step": 2630 }, { "epoch": 0.06346186267190096, "grad_norm": 1.6554980278015137, "learning_rate": 9.858787141263562e-07, "loss": 1.0083, "step": 2635 }, { "epoch": 0.06358228366368826, "grad_norm": 1.773821473121643, "learning_rate": 9.857519521346718e-07, "loss": 1.0159, "step": 2640 }, { "epoch": 0.06370270465547555, "grad_norm": 1.6003626585006714, "learning_rate": 9.856251901429875e-07, "loss": 0.9948, "step": 2645 }, { "epoch": 0.06382312564726283, "grad_norm": 1.6465872526168823, "learning_rate": 9.85498428151303e-07, "loss": 1.0038, "step": 2650 }, { "epoch": 0.06394354663905012, "grad_norm": 1.4264850616455078, "learning_rate": 9.853716661596185e-07, "loss": 1.0058, "step": 2655 }, { "epoch": 0.06406396763083741, "grad_norm": 1.4336827993392944, "learning_rate": 9.852449041679342e-07, "loss": 1.0192, "step": 2660 }, { "epoch": 0.0641843886226247, "grad_norm": 1.6807986497879028, "learning_rate": 9.851181421762498e-07, "loss": 0.9417, "step": 2665 }, { "epoch": 0.06430480961441198, "grad_norm": 1.5016063451766968, "learning_rate": 9.849913801845655e-07, "loss": 0.9898, "step": 2670 }, { "epoch": 0.06442523060619927, "grad_norm": 1.5583468675613403, "learning_rate": 9.848646181928811e-07, "loss": 1.0263, "step": 2675 }, { "epoch": 0.06454565159798656, "grad_norm": 1.9095042943954468, "learning_rate": 9.847378562011966e-07, "loss": 0.9524, "step": 2680 }, { "epoch": 0.06466607258977385, "grad_norm": 1.6604355573654175, "learning_rate": 9.846110942095122e-07, "loss": 0.9932, "step": 2685 }, { "epoch": 0.06478649358156113, "grad_norm": 1.5880812406539917, "learning_rate": 9.844843322178278e-07, "loss": 0.9439, "step": 2690 }, { "epoch": 0.06490691457334842, "grad_norm": 1.512092113494873, "learning_rate": 9.843575702261433e-07, "loss": 0.9756, "step": 2695 }, { "epoch": 0.06502733556513571, "grad_norm": 1.510452151298523, "learning_rate": 9.84230808234459e-07, "loss": 0.9915, "step": 2700 }, { "epoch": 0.06514775655692301, "grad_norm": 1.5895094871520996, "learning_rate": 9.841040462427746e-07, "loss": 1.0063, "step": 2705 }, { "epoch": 0.0652681775487103, "grad_norm": 1.780158519744873, "learning_rate": 9.8397728425109e-07, "loss": 1.0395, "step": 2710 }, { "epoch": 0.06538859854049758, "grad_norm": 1.534387469291687, "learning_rate": 9.838505222594056e-07, "loss": 0.9337, "step": 2715 }, { "epoch": 0.06550901953228487, "grad_norm": 1.6160880327224731, "learning_rate": 9.837237602677213e-07, "loss": 0.9846, "step": 2720 }, { "epoch": 0.06562944052407216, "grad_norm": 1.616960883140564, "learning_rate": 9.835969982760367e-07, "loss": 1.0247, "step": 2725 }, { "epoch": 0.06574986151585945, "grad_norm": 1.6236367225646973, "learning_rate": 9.834702362843524e-07, "loss": 0.9983, "step": 2730 }, { "epoch": 0.06587028250764673, "grad_norm": 2.1704113483428955, "learning_rate": 9.83343474292668e-07, "loss": 1.0474, "step": 2735 }, { "epoch": 0.06599070349943402, "grad_norm": 1.4857268333435059, "learning_rate": 9.832167123009837e-07, "loss": 0.9496, "step": 2740 }, { "epoch": 0.06611112449122131, "grad_norm": 1.6319400072097778, "learning_rate": 9.830899503092993e-07, "loss": 0.972, "step": 2745 }, { "epoch": 0.0662315454830086, "grad_norm": 1.406339168548584, "learning_rate": 9.829631883176147e-07, "loss": 0.9842, "step": 2750 }, { "epoch": 0.06635196647479588, "grad_norm": 1.6158806085586548, "learning_rate": 9.828364263259304e-07, "loss": 0.9952, "step": 2755 }, { "epoch": 0.06647238746658317, "grad_norm": 1.6224943399429321, "learning_rate": 9.82709664334246e-07, "loss": 0.961, "step": 2760 }, { "epoch": 0.06659280845837047, "grad_norm": 1.630590558052063, "learning_rate": 9.825829023425617e-07, "loss": 1.0236, "step": 2765 }, { "epoch": 0.06671322945015776, "grad_norm": 1.5676782131195068, "learning_rate": 9.82456140350877e-07, "loss": 0.9279, "step": 2770 }, { "epoch": 0.06683365044194504, "grad_norm": 1.5544155836105347, "learning_rate": 9.823293783591927e-07, "loss": 0.9843, "step": 2775 }, { "epoch": 0.06695407143373233, "grad_norm": 1.5769226551055908, "learning_rate": 9.822026163675084e-07, "loss": 1.0193, "step": 2780 }, { "epoch": 0.06707449242551962, "grad_norm": 1.502962350845337, "learning_rate": 9.820758543758238e-07, "loss": 1.0003, "step": 2785 }, { "epoch": 0.0671949134173069, "grad_norm": 1.9905481338500977, "learning_rate": 9.819490923841395e-07, "loss": 0.9875, "step": 2790 }, { "epoch": 0.0673153344090942, "grad_norm": 1.6774263381958008, "learning_rate": 9.818223303924551e-07, "loss": 1.0727, "step": 2795 }, { "epoch": 0.06743575540088148, "grad_norm": 1.4511786699295044, "learning_rate": 9.816955684007708e-07, "loss": 0.9355, "step": 2800 }, { "epoch": 0.06755617639266877, "grad_norm": 1.5723068714141846, "learning_rate": 9.815688064090862e-07, "loss": 0.9647, "step": 2805 }, { "epoch": 0.06767659738445606, "grad_norm": 1.5582698583602905, "learning_rate": 9.814420444174018e-07, "loss": 0.9908, "step": 2810 }, { "epoch": 0.06779701837624334, "grad_norm": 1.5775694847106934, "learning_rate": 9.813152824257175e-07, "loss": 0.9866, "step": 2815 }, { "epoch": 0.06791743936803063, "grad_norm": 1.4566597938537598, "learning_rate": 9.81188520434033e-07, "loss": 1.0293, "step": 2820 }, { "epoch": 0.06803786035981792, "grad_norm": 1.9800313711166382, "learning_rate": 9.810617584423486e-07, "loss": 0.8688, "step": 2825 }, { "epoch": 0.06815828135160522, "grad_norm": 1.5676807165145874, "learning_rate": 9.809349964506642e-07, "loss": 1.027, "step": 2830 }, { "epoch": 0.0682787023433925, "grad_norm": 1.5596281290054321, "learning_rate": 9.808082344589798e-07, "loss": 0.9814, "step": 2835 }, { "epoch": 0.06839912333517979, "grad_norm": 1.5297009944915771, "learning_rate": 9.806814724672953e-07, "loss": 0.9466, "step": 2840 }, { "epoch": 0.06851954432696708, "grad_norm": 1.5337249040603638, "learning_rate": 9.80554710475611e-07, "loss": 0.9952, "step": 2845 }, { "epoch": 0.06863996531875437, "grad_norm": 1.7600308656692505, "learning_rate": 9.804279484839266e-07, "loss": 0.9941, "step": 2850 }, { "epoch": 0.06876038631054165, "grad_norm": 1.5565025806427002, "learning_rate": 9.803011864922422e-07, "loss": 0.9536, "step": 2855 }, { "epoch": 0.06888080730232894, "grad_norm": 1.4753719568252563, "learning_rate": 9.801744245005579e-07, "loss": 0.9562, "step": 2860 }, { "epoch": 0.06900122829411623, "grad_norm": 1.6010030508041382, "learning_rate": 9.800476625088733e-07, "loss": 1.0187, "step": 2865 }, { "epoch": 0.06912164928590352, "grad_norm": 1.723764181137085, "learning_rate": 9.79920900517189e-07, "loss": 0.9999, "step": 2870 }, { "epoch": 0.0692420702776908, "grad_norm": 1.480420470237732, "learning_rate": 9.797941385255046e-07, "loss": 1.0608, "step": 2875 }, { "epoch": 0.06936249126947809, "grad_norm": 1.699066400527954, "learning_rate": 9.7966737653382e-07, "loss": 1.0086, "step": 2880 }, { "epoch": 0.06948291226126538, "grad_norm": 1.6982381343841553, "learning_rate": 9.795406145421357e-07, "loss": 0.9616, "step": 2885 }, { "epoch": 0.06960333325305267, "grad_norm": 1.5258110761642456, "learning_rate": 9.794138525504513e-07, "loss": 0.9569, "step": 2890 }, { "epoch": 0.06972375424483997, "grad_norm": 1.3778504133224487, "learning_rate": 9.792870905587667e-07, "loss": 0.949, "step": 2895 }, { "epoch": 0.06984417523662725, "grad_norm": 1.5082603693008423, "learning_rate": 9.791603285670824e-07, "loss": 0.9212, "step": 2900 }, { "epoch": 0.06996459622841454, "grad_norm": 1.683454155921936, "learning_rate": 9.79033566575398e-07, "loss": 1.0048, "step": 2905 }, { "epoch": 0.07008501722020183, "grad_norm": 1.6343798637390137, "learning_rate": 9.789068045837135e-07, "loss": 0.9412, "step": 2910 }, { "epoch": 0.07020543821198912, "grad_norm": 2.1352267265319824, "learning_rate": 9.78780042592029e-07, "loss": 1.0182, "step": 2915 }, { "epoch": 0.0703258592037764, "grad_norm": 1.613693118095398, "learning_rate": 9.786532806003447e-07, "loss": 0.9761, "step": 2920 }, { "epoch": 0.07044628019556369, "grad_norm": 1.7342922687530518, "learning_rate": 9.785265186086604e-07, "loss": 0.9448, "step": 2925 }, { "epoch": 0.07056670118735098, "grad_norm": 1.664732575416565, "learning_rate": 9.78399756616976e-07, "loss": 1.026, "step": 2930 }, { "epoch": 0.07068712217913826, "grad_norm": 1.4946379661560059, "learning_rate": 9.782729946252915e-07, "loss": 0.935, "step": 2935 }, { "epoch": 0.07080754317092555, "grad_norm": 1.7126940488815308, "learning_rate": 9.781462326336071e-07, "loss": 0.9844, "step": 2940 }, { "epoch": 0.07092796416271284, "grad_norm": 1.847761869430542, "learning_rate": 9.780194706419228e-07, "loss": 1.0524, "step": 2945 }, { "epoch": 0.07104838515450013, "grad_norm": 1.6522741317749023, "learning_rate": 9.778927086502384e-07, "loss": 0.9787, "step": 2950 }, { "epoch": 0.07116880614628743, "grad_norm": 1.5920339822769165, "learning_rate": 9.777659466585538e-07, "loss": 0.9595, "step": 2955 }, { "epoch": 0.07128922713807472, "grad_norm": 1.5285476446151733, "learning_rate": 9.776391846668695e-07, "loss": 0.9854, "step": 2960 }, { "epoch": 0.071409648129862, "grad_norm": 1.3394943475723267, "learning_rate": 9.775124226751851e-07, "loss": 0.956, "step": 2965 }, { "epoch": 0.07153006912164929, "grad_norm": 1.6529779434204102, "learning_rate": 9.773856606835006e-07, "loss": 1.0041, "step": 2970 }, { "epoch": 0.07165049011343658, "grad_norm": 1.643385887145996, "learning_rate": 9.772588986918162e-07, "loss": 1.0442, "step": 2975 }, { "epoch": 0.07177091110522386, "grad_norm": 1.5586971044540405, "learning_rate": 9.771321367001318e-07, "loss": 0.9751, "step": 2980 }, { "epoch": 0.07189133209701115, "grad_norm": 1.46442711353302, "learning_rate": 9.770053747084473e-07, "loss": 1.0208, "step": 2985 }, { "epoch": 0.07201175308879844, "grad_norm": 1.5164238214492798, "learning_rate": 9.76878612716763e-07, "loss": 1.0347, "step": 2990 }, { "epoch": 0.07213217408058573, "grad_norm": 1.6594327688217163, "learning_rate": 9.767518507250786e-07, "loss": 1.0027, "step": 2995 }, { "epoch": 0.07225259507237301, "grad_norm": 1.7102785110473633, "learning_rate": 9.766250887333942e-07, "loss": 1.0035, "step": 3000 }, { "epoch": 0.0723730160641603, "grad_norm": 1.5333776473999023, "learning_rate": 9.764983267417096e-07, "loss": 0.9626, "step": 3005 }, { "epoch": 0.07249343705594759, "grad_norm": 2.1526007652282715, "learning_rate": 9.763715647500253e-07, "loss": 0.9312, "step": 3010 }, { "epoch": 0.07261385804773487, "grad_norm": 1.6529592275619507, "learning_rate": 9.76244802758341e-07, "loss": 0.9714, "step": 3015 }, { "epoch": 0.07273427903952218, "grad_norm": 1.569183349609375, "learning_rate": 9.761180407666566e-07, "loss": 0.9682, "step": 3020 }, { "epoch": 0.07285470003130946, "grad_norm": 1.4431354999542236, "learning_rate": 9.75991278774972e-07, "loss": 1.0084, "step": 3025 }, { "epoch": 0.07297512102309675, "grad_norm": 1.7939332723617554, "learning_rate": 9.758645167832877e-07, "loss": 0.9695, "step": 3030 }, { "epoch": 0.07309554201488404, "grad_norm": 1.7346773147583008, "learning_rate": 9.757377547916033e-07, "loss": 0.9873, "step": 3035 }, { "epoch": 0.07321596300667133, "grad_norm": 1.540472388267517, "learning_rate": 9.75610992799919e-07, "loss": 0.9653, "step": 3040 }, { "epoch": 0.07333638399845861, "grad_norm": 1.637434720993042, "learning_rate": 9.754842308082344e-07, "loss": 0.9974, "step": 3045 }, { "epoch": 0.0734568049902459, "grad_norm": 1.5349056720733643, "learning_rate": 9.7535746881655e-07, "loss": 0.9389, "step": 3050 }, { "epoch": 0.07357722598203319, "grad_norm": 1.4215797185897827, "learning_rate": 9.752307068248657e-07, "loss": 0.9786, "step": 3055 }, { "epoch": 0.07369764697382047, "grad_norm": 1.6199102401733398, "learning_rate": 9.75103944833181e-07, "loss": 0.9213, "step": 3060 }, { "epoch": 0.07381806796560776, "grad_norm": 1.6182783842086792, "learning_rate": 9.749771828414967e-07, "loss": 0.9973, "step": 3065 }, { "epoch": 0.07393848895739505, "grad_norm": 1.4855315685272217, "learning_rate": 9.748504208498124e-07, "loss": 0.9413, "step": 3070 }, { "epoch": 0.07405890994918234, "grad_norm": 1.6804499626159668, "learning_rate": 9.747236588581278e-07, "loss": 0.9306, "step": 3075 }, { "epoch": 0.07417933094096962, "grad_norm": 1.6445069313049316, "learning_rate": 9.745968968664435e-07, "loss": 0.9883, "step": 3080 }, { "epoch": 0.07429975193275692, "grad_norm": 1.3424928188323975, "learning_rate": 9.744701348747591e-07, "loss": 0.9506, "step": 3085 }, { "epoch": 0.07442017292454421, "grad_norm": 1.3568273782730103, "learning_rate": 9.743433728830748e-07, "loss": 1.0278, "step": 3090 }, { "epoch": 0.0745405939163315, "grad_norm": 1.8795121908187866, "learning_rate": 9.742166108913902e-07, "loss": 0.9812, "step": 3095 }, { "epoch": 0.07466101490811879, "grad_norm": 1.6932713985443115, "learning_rate": 9.740898488997058e-07, "loss": 0.9878, "step": 3100 }, { "epoch": 0.07478143589990607, "grad_norm": 1.490128517150879, "learning_rate": 9.739630869080215e-07, "loss": 0.927, "step": 3105 }, { "epoch": 0.07490185689169336, "grad_norm": 1.718519687652588, "learning_rate": 9.738363249163371e-07, "loss": 1.0131, "step": 3110 }, { "epoch": 0.07502227788348065, "grad_norm": 1.6648001670837402, "learning_rate": 9.737095629246528e-07, "loss": 0.9911, "step": 3115 }, { "epoch": 0.07514269887526794, "grad_norm": 1.541821837425232, "learning_rate": 9.735828009329682e-07, "loss": 1.0154, "step": 3120 }, { "epoch": 0.07526311986705522, "grad_norm": 1.4682984352111816, "learning_rate": 9.734560389412838e-07, "loss": 0.981, "step": 3125 }, { "epoch": 0.07538354085884251, "grad_norm": 1.4823180437088013, "learning_rate": 9.733292769495995e-07, "loss": 1.0265, "step": 3130 }, { "epoch": 0.0755039618506298, "grad_norm": 1.359682559967041, "learning_rate": 9.73202514957915e-07, "loss": 0.9137, "step": 3135 }, { "epoch": 0.07562438284241708, "grad_norm": 1.5946718454360962, "learning_rate": 9.730757529662306e-07, "loss": 0.9695, "step": 3140 }, { "epoch": 0.07574480383420437, "grad_norm": 1.5661665201187134, "learning_rate": 9.729489909745462e-07, "loss": 0.9655, "step": 3145 }, { "epoch": 0.07586522482599167, "grad_norm": 1.5131149291992188, "learning_rate": 9.728222289828616e-07, "loss": 0.9829, "step": 3150 }, { "epoch": 0.07598564581777896, "grad_norm": 1.6737961769104004, "learning_rate": 9.726954669911773e-07, "loss": 0.9591, "step": 3155 }, { "epoch": 0.07610606680956625, "grad_norm": 1.9588265419006348, "learning_rate": 9.72568704999493e-07, "loss": 0.9509, "step": 3160 }, { "epoch": 0.07622648780135353, "grad_norm": 1.6424431800842285, "learning_rate": 9.724419430078084e-07, "loss": 0.9529, "step": 3165 }, { "epoch": 0.07634690879314082, "grad_norm": 1.5378398895263672, "learning_rate": 9.72315181016124e-07, "loss": 1.002, "step": 3170 }, { "epoch": 0.07646732978492811, "grad_norm": 1.6808853149414062, "learning_rate": 9.721884190244397e-07, "loss": 1.004, "step": 3175 }, { "epoch": 0.0765877507767154, "grad_norm": 1.6004494428634644, "learning_rate": 9.720616570327553e-07, "loss": 1.0206, "step": 3180 }, { "epoch": 0.07670817176850268, "grad_norm": 1.5458874702453613, "learning_rate": 9.71934895041071e-07, "loss": 0.972, "step": 3185 }, { "epoch": 0.07682859276028997, "grad_norm": 1.4363664388656616, "learning_rate": 9.718081330493864e-07, "loss": 1.0765, "step": 3190 }, { "epoch": 0.07694901375207726, "grad_norm": 1.6539244651794434, "learning_rate": 9.71681371057702e-07, "loss": 1.0203, "step": 3195 }, { "epoch": 0.07706943474386455, "grad_norm": 1.4915926456451416, "learning_rate": 9.715546090660177e-07, "loss": 0.9945, "step": 3200 }, { "epoch": 0.07718985573565183, "grad_norm": 1.519199013710022, "learning_rate": 9.714278470743333e-07, "loss": 0.9774, "step": 3205 }, { "epoch": 0.07731027672743913, "grad_norm": 1.5417412519454956, "learning_rate": 9.713010850826488e-07, "loss": 0.9993, "step": 3210 }, { "epoch": 0.07743069771922642, "grad_norm": 1.4990864992141724, "learning_rate": 9.711743230909644e-07, "loss": 0.9397, "step": 3215 }, { "epoch": 0.07755111871101371, "grad_norm": 2.056589126586914, "learning_rate": 9.7104756109928e-07, "loss": 1.0334, "step": 3220 }, { "epoch": 0.077671539702801, "grad_norm": 1.4858630895614624, "learning_rate": 9.709207991075955e-07, "loss": 0.9464, "step": 3225 }, { "epoch": 0.07779196069458828, "grad_norm": 1.5432004928588867, "learning_rate": 9.707940371159111e-07, "loss": 0.9859, "step": 3230 }, { "epoch": 0.07791238168637557, "grad_norm": 1.7187660932540894, "learning_rate": 9.706672751242268e-07, "loss": 0.995, "step": 3235 }, { "epoch": 0.07803280267816286, "grad_norm": 1.5984174013137817, "learning_rate": 9.705405131325422e-07, "loss": 1.0028, "step": 3240 }, { "epoch": 0.07815322366995014, "grad_norm": 1.5194472074508667, "learning_rate": 9.704137511408578e-07, "loss": 1.0039, "step": 3245 }, { "epoch": 0.07827364466173743, "grad_norm": 1.6343015432357788, "learning_rate": 9.702869891491735e-07, "loss": 1.0423, "step": 3250 }, { "epoch": 0.07839406565352472, "grad_norm": 1.5536481142044067, "learning_rate": 9.70160227157489e-07, "loss": 0.9829, "step": 3255 }, { "epoch": 0.078514486645312, "grad_norm": 1.6728906631469727, "learning_rate": 9.700334651658046e-07, "loss": 0.9838, "step": 3260 }, { "epoch": 0.0786349076370993, "grad_norm": 1.785743236541748, "learning_rate": 9.699067031741202e-07, "loss": 1.0017, "step": 3265 }, { "epoch": 0.07875532862888658, "grad_norm": 1.5994573831558228, "learning_rate": 9.697799411824359e-07, "loss": 1.0219, "step": 3270 }, { "epoch": 0.07887574962067388, "grad_norm": 1.6276570558547974, "learning_rate": 9.696531791907515e-07, "loss": 0.9886, "step": 3275 }, { "epoch": 0.07899617061246117, "grad_norm": 1.6569063663482666, "learning_rate": 9.69526417199067e-07, "loss": 0.9907, "step": 3280 }, { "epoch": 0.07911659160424846, "grad_norm": 1.6754655838012695, "learning_rate": 9.693996552073826e-07, "loss": 0.9841, "step": 3285 }, { "epoch": 0.07923701259603574, "grad_norm": 1.7406693696975708, "learning_rate": 9.692728932156982e-07, "loss": 1.0043, "step": 3290 }, { "epoch": 0.07935743358782303, "grad_norm": 1.4916058778762817, "learning_rate": 9.691461312240139e-07, "loss": 0.9928, "step": 3295 }, { "epoch": 0.07947785457961032, "grad_norm": 1.5197107791900635, "learning_rate": 9.690193692323293e-07, "loss": 0.9679, "step": 3300 }, { "epoch": 0.0795982755713976, "grad_norm": 1.7245734930038452, "learning_rate": 9.68892607240645e-07, "loss": 0.9643, "step": 3305 }, { "epoch": 0.0797186965631849, "grad_norm": 1.7159137725830078, "learning_rate": 9.687658452489606e-07, "loss": 0.9778, "step": 3310 }, { "epoch": 0.07983911755497218, "grad_norm": 1.564124345779419, "learning_rate": 9.68639083257276e-07, "loss": 0.9566, "step": 3315 }, { "epoch": 0.07995953854675947, "grad_norm": 1.5709251165390015, "learning_rate": 9.685123212655917e-07, "loss": 0.9781, "step": 3320 }, { "epoch": 0.08007995953854675, "grad_norm": 1.6757510900497437, "learning_rate": 9.683855592739073e-07, "loss": 1.0377, "step": 3325 }, { "epoch": 0.08020038053033404, "grad_norm": 1.6469229459762573, "learning_rate": 9.682587972822227e-07, "loss": 0.9878, "step": 3330 }, { "epoch": 0.08032080152212133, "grad_norm": 1.445010781288147, "learning_rate": 9.681320352905384e-07, "loss": 0.9342, "step": 3335 }, { "epoch": 0.08044122251390863, "grad_norm": 2.0901291370391846, "learning_rate": 9.68005273298854e-07, "loss": 0.9842, "step": 3340 }, { "epoch": 0.08056164350569592, "grad_norm": 1.6954982280731201, "learning_rate": 9.678785113071697e-07, "loss": 0.9938, "step": 3345 }, { "epoch": 0.0806820644974832, "grad_norm": 1.504270076751709, "learning_rate": 9.677517493154851e-07, "loss": 0.906, "step": 3350 }, { "epoch": 0.08080248548927049, "grad_norm": 1.5847922563552856, "learning_rate": 9.676249873238008e-07, "loss": 0.939, "step": 3355 }, { "epoch": 0.08092290648105778, "grad_norm": 1.6328890323638916, "learning_rate": 9.674982253321164e-07, "loss": 0.9739, "step": 3360 }, { "epoch": 0.08104332747284507, "grad_norm": 1.4796109199523926, "learning_rate": 9.67371463340432e-07, "loss": 0.9276, "step": 3365 }, { "epoch": 0.08116374846463235, "grad_norm": 1.4965159893035889, "learning_rate": 9.672447013487477e-07, "loss": 0.9853, "step": 3370 }, { "epoch": 0.08128416945641964, "grad_norm": 1.587471842765808, "learning_rate": 9.671179393570631e-07, "loss": 0.9829, "step": 3375 }, { "epoch": 0.08140459044820693, "grad_norm": 1.473659873008728, "learning_rate": 9.669911773653788e-07, "loss": 0.9669, "step": 3380 }, { "epoch": 0.08152501143999422, "grad_norm": 1.6593642234802246, "learning_rate": 9.668644153736944e-07, "loss": 0.9811, "step": 3385 }, { "epoch": 0.0816454324317815, "grad_norm": 1.6420273780822754, "learning_rate": 9.667376533820098e-07, "loss": 0.9429, "step": 3390 }, { "epoch": 0.08176585342356879, "grad_norm": 1.4429707527160645, "learning_rate": 9.666108913903255e-07, "loss": 0.8926, "step": 3395 }, { "epoch": 0.08188627441535609, "grad_norm": 1.7320809364318848, "learning_rate": 9.664841293986411e-07, "loss": 1.0105, "step": 3400 }, { "epoch": 0.08200669540714338, "grad_norm": 1.5669728517532349, "learning_rate": 9.663573674069566e-07, "loss": 1.0101, "step": 3405 }, { "epoch": 0.08212711639893067, "grad_norm": 1.6901319026947021, "learning_rate": 9.662306054152722e-07, "loss": 0.9212, "step": 3410 }, { "epoch": 0.08224753739071795, "grad_norm": 1.5522770881652832, "learning_rate": 9.661038434235879e-07, "loss": 1.0259, "step": 3415 }, { "epoch": 0.08236795838250524, "grad_norm": 1.5578824281692505, "learning_rate": 9.659770814319033e-07, "loss": 0.9705, "step": 3420 }, { "epoch": 0.08248837937429253, "grad_norm": 1.5385942459106445, "learning_rate": 9.65850319440219e-07, "loss": 0.9389, "step": 3425 }, { "epoch": 0.08260880036607982, "grad_norm": 1.453081488609314, "learning_rate": 9.657235574485346e-07, "loss": 0.917, "step": 3430 }, { "epoch": 0.0827292213578671, "grad_norm": 1.5530083179473877, "learning_rate": 9.655967954568502e-07, "loss": 0.989, "step": 3435 }, { "epoch": 0.08284964234965439, "grad_norm": 1.7280933856964111, "learning_rate": 9.654700334651657e-07, "loss": 0.9389, "step": 3440 }, { "epoch": 0.08297006334144168, "grad_norm": 1.5677049160003662, "learning_rate": 9.653432714734813e-07, "loss": 0.9811, "step": 3445 }, { "epoch": 0.08309048433322896, "grad_norm": 1.63473379611969, "learning_rate": 9.65216509481797e-07, "loss": 0.9095, "step": 3450 }, { "epoch": 0.08321090532501625, "grad_norm": 1.622157335281372, "learning_rate": 9.650897474901126e-07, "loss": 0.9601, "step": 3455 }, { "epoch": 0.08333132631680354, "grad_norm": 1.4803037643432617, "learning_rate": 9.649629854984282e-07, "loss": 0.9547, "step": 3460 }, { "epoch": 0.08345174730859084, "grad_norm": 1.5026421546936035, "learning_rate": 9.648362235067437e-07, "loss": 1.0317, "step": 3465 }, { "epoch": 0.08357216830037813, "grad_norm": 1.8309855461120605, "learning_rate": 9.647094615150593e-07, "loss": 0.956, "step": 3470 }, { "epoch": 0.08369258929216541, "grad_norm": 1.4832587242126465, "learning_rate": 9.64582699523375e-07, "loss": 0.939, "step": 3475 }, { "epoch": 0.0838130102839527, "grad_norm": 1.4295638799667358, "learning_rate": 9.644559375316904e-07, "loss": 1.0362, "step": 3480 }, { "epoch": 0.08393343127573999, "grad_norm": 1.5575592517852783, "learning_rate": 9.64329175540006e-07, "loss": 1.0136, "step": 3485 }, { "epoch": 0.08405385226752728, "grad_norm": 1.6031485795974731, "learning_rate": 9.642024135483217e-07, "loss": 0.9677, "step": 3490 }, { "epoch": 0.08417427325931456, "grad_norm": 1.580514669418335, "learning_rate": 9.640756515566371e-07, "loss": 0.96, "step": 3495 }, { "epoch": 0.08429469425110185, "grad_norm": 1.5536184310913086, "learning_rate": 9.639488895649528e-07, "loss": 0.9376, "step": 3500 }, { "epoch": 0.08441511524288914, "grad_norm": 1.4722299575805664, "learning_rate": 9.638221275732684e-07, "loss": 0.9752, "step": 3505 }, { "epoch": 0.08453553623467643, "grad_norm": 1.56247878074646, "learning_rate": 9.636953655815838e-07, "loss": 0.9566, "step": 3510 }, { "epoch": 0.08465595722646371, "grad_norm": 1.4400372505187988, "learning_rate": 9.635686035898995e-07, "loss": 0.9702, "step": 3515 }, { "epoch": 0.084776378218251, "grad_norm": 1.4799162149429321, "learning_rate": 9.634418415982151e-07, "loss": 0.9713, "step": 3520 }, { "epoch": 0.08489679921003829, "grad_norm": 1.4603667259216309, "learning_rate": 9.633150796065308e-07, "loss": 0.9461, "step": 3525 }, { "epoch": 0.08501722020182559, "grad_norm": 1.6365811824798584, "learning_rate": 9.631883176148464e-07, "loss": 0.9602, "step": 3530 }, { "epoch": 0.08513764119361288, "grad_norm": 1.5613988637924194, "learning_rate": 9.630615556231618e-07, "loss": 0.9674, "step": 3535 }, { "epoch": 0.08525806218540016, "grad_norm": 1.7290339469909668, "learning_rate": 9.629347936314775e-07, "loss": 0.9317, "step": 3540 }, { "epoch": 0.08537848317718745, "grad_norm": 1.7438316345214844, "learning_rate": 9.628080316397931e-07, "loss": 0.957, "step": 3545 }, { "epoch": 0.08549890416897474, "grad_norm": 1.7681242227554321, "learning_rate": 9.626812696481088e-07, "loss": 0.9307, "step": 3550 }, { "epoch": 0.08561932516076202, "grad_norm": 1.491039752960205, "learning_rate": 9.625545076564242e-07, "loss": 0.9786, "step": 3555 }, { "epoch": 0.08573974615254931, "grad_norm": 1.6438604593276978, "learning_rate": 9.624277456647399e-07, "loss": 0.9276, "step": 3560 }, { "epoch": 0.0858601671443366, "grad_norm": 1.577164649963379, "learning_rate": 9.623009836730555e-07, "loss": 1.0229, "step": 3565 }, { "epoch": 0.08598058813612389, "grad_norm": 1.5834629535675049, "learning_rate": 9.62174221681371e-07, "loss": 1.0192, "step": 3570 }, { "epoch": 0.08610100912791117, "grad_norm": 1.7259495258331299, "learning_rate": 9.620474596896866e-07, "loss": 0.9634, "step": 3575 }, { "epoch": 0.08622143011969846, "grad_norm": 1.504324197769165, "learning_rate": 9.619206976980022e-07, "loss": 0.9599, "step": 3580 }, { "epoch": 0.08634185111148575, "grad_norm": 1.4879755973815918, "learning_rate": 9.617939357063177e-07, "loss": 0.9769, "step": 3585 }, { "epoch": 0.08646227210327304, "grad_norm": 1.4818052053451538, "learning_rate": 9.616671737146333e-07, "loss": 0.9764, "step": 3590 }, { "epoch": 0.08658269309506034, "grad_norm": 1.4573898315429688, "learning_rate": 9.61540411722949e-07, "loss": 0.9719, "step": 3595 }, { "epoch": 0.08670311408684762, "grad_norm": 1.512553334236145, "learning_rate": 9.614136497312644e-07, "loss": 0.9713, "step": 3600 }, { "epoch": 0.08682353507863491, "grad_norm": 1.518481969833374, "learning_rate": 9.6128688773958e-07, "loss": 1.0032, "step": 3605 }, { "epoch": 0.0869439560704222, "grad_norm": 1.7753887176513672, "learning_rate": 9.611601257478957e-07, "loss": 0.9889, "step": 3610 }, { "epoch": 0.08706437706220949, "grad_norm": 1.4080522060394287, "learning_rate": 9.610333637562113e-07, "loss": 0.9666, "step": 3615 }, { "epoch": 0.08718479805399677, "grad_norm": 1.6740397214889526, "learning_rate": 9.60906601764527e-07, "loss": 0.9517, "step": 3620 }, { "epoch": 0.08730521904578406, "grad_norm": 1.6367727518081665, "learning_rate": 9.607798397728424e-07, "loss": 0.9712, "step": 3625 }, { "epoch": 0.08742564003757135, "grad_norm": 1.6772522926330566, "learning_rate": 9.60653077781158e-07, "loss": 0.9832, "step": 3630 }, { "epoch": 0.08754606102935863, "grad_norm": 1.4191092252731323, "learning_rate": 9.605263157894737e-07, "loss": 0.947, "step": 3635 }, { "epoch": 0.08766648202114592, "grad_norm": 1.4476778507232666, "learning_rate": 9.603995537977893e-07, "loss": 0.957, "step": 3640 }, { "epoch": 0.08778690301293321, "grad_norm": 1.9380329847335815, "learning_rate": 9.602727918061048e-07, "loss": 1.0579, "step": 3645 }, { "epoch": 0.0879073240047205, "grad_norm": 1.3948523998260498, "learning_rate": 9.601460298144204e-07, "loss": 0.9996, "step": 3650 }, { "epoch": 0.0880277449965078, "grad_norm": 1.8436353206634521, "learning_rate": 9.60019267822736e-07, "loss": 0.9496, "step": 3655 }, { "epoch": 0.08814816598829509, "grad_norm": 1.5903769731521606, "learning_rate": 9.598925058310515e-07, "loss": 0.993, "step": 3660 }, { "epoch": 0.08826858698008237, "grad_norm": 1.574457049369812, "learning_rate": 9.597657438393671e-07, "loss": 1.0448, "step": 3665 }, { "epoch": 0.08838900797186966, "grad_norm": 1.6137624979019165, "learning_rate": 9.596389818476828e-07, "loss": 1.0311, "step": 3670 }, { "epoch": 0.08850942896365695, "grad_norm": 1.7495733499526978, "learning_rate": 9.595122198559982e-07, "loss": 1.0024, "step": 3675 }, { "epoch": 0.08862984995544423, "grad_norm": 1.5811641216278076, "learning_rate": 9.593854578643138e-07, "loss": 0.9636, "step": 3680 }, { "epoch": 0.08875027094723152, "grad_norm": 1.7564260959625244, "learning_rate": 9.592586958726295e-07, "loss": 0.9737, "step": 3685 }, { "epoch": 0.08887069193901881, "grad_norm": 1.8012256622314453, "learning_rate": 9.591319338809451e-07, "loss": 0.991, "step": 3690 }, { "epoch": 0.0889911129308061, "grad_norm": 1.920172095298767, "learning_rate": 9.590051718892606e-07, "loss": 1.0374, "step": 3695 }, { "epoch": 0.08911153392259338, "grad_norm": 1.4977744817733765, "learning_rate": 9.588784098975762e-07, "loss": 0.9509, "step": 3700 }, { "epoch": 0.08923195491438067, "grad_norm": 1.7665150165557861, "learning_rate": 9.587516479058919e-07, "loss": 0.9735, "step": 3705 }, { "epoch": 0.08935237590616796, "grad_norm": 1.5105448961257935, "learning_rate": 9.586248859142075e-07, "loss": 1.0022, "step": 3710 }, { "epoch": 0.08947279689795525, "grad_norm": 1.488258957862854, "learning_rate": 9.584981239225232e-07, "loss": 1.0177, "step": 3715 }, { "epoch": 0.08959321788974255, "grad_norm": 1.6075270175933838, "learning_rate": 9.583713619308386e-07, "loss": 0.9024, "step": 3720 }, { "epoch": 0.08971363888152983, "grad_norm": 1.5839999914169312, "learning_rate": 9.582445999391542e-07, "loss": 0.9843, "step": 3725 }, { "epoch": 0.08983405987331712, "grad_norm": 1.4419506788253784, "learning_rate": 9.581178379474699e-07, "loss": 0.9383, "step": 3730 }, { "epoch": 0.08995448086510441, "grad_norm": 1.660599946975708, "learning_rate": 9.579910759557853e-07, "loss": 0.9855, "step": 3735 }, { "epoch": 0.0900749018568917, "grad_norm": 1.6551530361175537, "learning_rate": 9.57864313964101e-07, "loss": 0.9816, "step": 3740 }, { "epoch": 0.09019532284867898, "grad_norm": 1.4094029664993286, "learning_rate": 9.577375519724166e-07, "loss": 0.9733, "step": 3745 }, { "epoch": 0.09031574384046627, "grad_norm": 1.5762144327163696, "learning_rate": 9.57610789980732e-07, "loss": 0.9718, "step": 3750 }, { "epoch": 0.09043616483225356, "grad_norm": 1.6266120672225952, "learning_rate": 9.574840279890477e-07, "loss": 0.9761, "step": 3755 }, { "epoch": 0.09055658582404084, "grad_norm": 1.5777689218521118, "learning_rate": 9.573572659973633e-07, "loss": 1.0, "step": 3760 }, { "epoch": 0.09067700681582813, "grad_norm": 1.5066728591918945, "learning_rate": 9.572305040056788e-07, "loss": 0.9302, "step": 3765 }, { "epoch": 0.09079742780761542, "grad_norm": 1.7050273418426514, "learning_rate": 9.571037420139944e-07, "loss": 0.953, "step": 3770 }, { "epoch": 0.0909178487994027, "grad_norm": 1.7422499656677246, "learning_rate": 9.5697698002231e-07, "loss": 1.003, "step": 3775 }, { "epoch": 0.09103826979119, "grad_norm": 1.5633741617202759, "learning_rate": 9.568502180306257e-07, "loss": 0.9682, "step": 3780 }, { "epoch": 0.0911586907829773, "grad_norm": 1.7610095739364624, "learning_rate": 9.567234560389411e-07, "loss": 0.9223, "step": 3785 }, { "epoch": 0.09127911177476458, "grad_norm": 1.6257444620132446, "learning_rate": 9.565966940472568e-07, "loss": 0.9416, "step": 3790 }, { "epoch": 0.09139953276655187, "grad_norm": 1.455300211906433, "learning_rate": 9.564699320555724e-07, "loss": 1.0258, "step": 3795 }, { "epoch": 0.09151995375833916, "grad_norm": 1.596089243888855, "learning_rate": 9.56343170063888e-07, "loss": 1.001, "step": 3800 }, { "epoch": 0.09164037475012644, "grad_norm": 1.5101442337036133, "learning_rate": 9.562164080722037e-07, "loss": 1.0037, "step": 3805 }, { "epoch": 0.09176079574191373, "grad_norm": 1.5162341594696045, "learning_rate": 9.560896460805191e-07, "loss": 1.0177, "step": 3810 }, { "epoch": 0.09188121673370102, "grad_norm": 1.6944547891616821, "learning_rate": 9.559628840888348e-07, "loss": 0.9239, "step": 3815 }, { "epoch": 0.0920016377254883, "grad_norm": 1.5876063108444214, "learning_rate": 9.558361220971504e-07, "loss": 0.9869, "step": 3820 }, { "epoch": 0.09212205871727559, "grad_norm": 1.616463303565979, "learning_rate": 9.55709360105466e-07, "loss": 1.03, "step": 3825 }, { "epoch": 0.09224247970906288, "grad_norm": 1.567016363143921, "learning_rate": 9.555825981137815e-07, "loss": 0.9364, "step": 3830 }, { "epoch": 0.09236290070085017, "grad_norm": 1.412610650062561, "learning_rate": 9.554558361220971e-07, "loss": 0.9713, "step": 3835 }, { "epoch": 0.09248332169263745, "grad_norm": 1.6522778272628784, "learning_rate": 9.553290741304128e-07, "loss": 0.9959, "step": 3840 }, { "epoch": 0.09260374268442476, "grad_norm": 1.5945355892181396, "learning_rate": 9.552023121387282e-07, "loss": 0.9766, "step": 3845 }, { "epoch": 0.09272416367621204, "grad_norm": 1.6672556400299072, "learning_rate": 9.550755501470439e-07, "loss": 0.9743, "step": 3850 }, { "epoch": 0.09284458466799933, "grad_norm": 1.7128037214279175, "learning_rate": 9.549487881553595e-07, "loss": 0.982, "step": 3855 }, { "epoch": 0.09296500565978662, "grad_norm": 1.6894267797470093, "learning_rate": 9.54822026163675e-07, "loss": 0.9418, "step": 3860 }, { "epoch": 0.0930854266515739, "grad_norm": 1.9653635025024414, "learning_rate": 9.546952641719906e-07, "loss": 1.0066, "step": 3865 }, { "epoch": 0.09320584764336119, "grad_norm": 1.4464094638824463, "learning_rate": 9.545685021803062e-07, "loss": 0.9741, "step": 3870 }, { "epoch": 0.09332626863514848, "grad_norm": 1.7934719324111938, "learning_rate": 9.544417401886219e-07, "loss": 0.9716, "step": 3875 }, { "epoch": 0.09344668962693577, "grad_norm": 1.5995069742202759, "learning_rate": 9.543149781969373e-07, "loss": 0.9481, "step": 3880 }, { "epoch": 0.09356711061872305, "grad_norm": 1.2907156944274902, "learning_rate": 9.54188216205253e-07, "loss": 0.8987, "step": 3885 }, { "epoch": 0.09368753161051034, "grad_norm": 1.5202016830444336, "learning_rate": 9.540614542135686e-07, "loss": 0.897, "step": 3890 }, { "epoch": 0.09380795260229763, "grad_norm": 1.5255939960479736, "learning_rate": 9.539346922218842e-07, "loss": 0.9924, "step": 3895 }, { "epoch": 0.09392837359408492, "grad_norm": 1.9490069150924683, "learning_rate": 9.538079302301999e-07, "loss": 1.005, "step": 3900 }, { "epoch": 0.0940487945858722, "grad_norm": 1.5038361549377441, "learning_rate": 9.536811682385153e-07, "loss": 1.0007, "step": 3905 }, { "epoch": 0.0941692155776595, "grad_norm": 1.6892212629318237, "learning_rate": 9.53554406246831e-07, "loss": 0.9309, "step": 3910 }, { "epoch": 0.09428963656944679, "grad_norm": 1.5403926372528076, "learning_rate": 9.534276442551465e-07, "loss": 0.9412, "step": 3915 }, { "epoch": 0.09441005756123408, "grad_norm": 1.5528895854949951, "learning_rate": 9.53300882263462e-07, "loss": 0.9814, "step": 3920 }, { "epoch": 0.09453047855302137, "grad_norm": 1.7345143556594849, "learning_rate": 9.531741202717777e-07, "loss": 1.0062, "step": 3925 }, { "epoch": 0.09465089954480865, "grad_norm": 1.5540204048156738, "learning_rate": 9.530473582800933e-07, "loss": 0.9077, "step": 3930 }, { "epoch": 0.09477132053659594, "grad_norm": 1.5319840908050537, "learning_rate": 9.529205962884088e-07, "loss": 0.9675, "step": 3935 }, { "epoch": 0.09489174152838323, "grad_norm": 1.5255876779556274, "learning_rate": 9.527938342967244e-07, "loss": 0.9512, "step": 3940 }, { "epoch": 0.09501216252017052, "grad_norm": 1.393852710723877, "learning_rate": 9.526670723050401e-07, "loss": 0.9679, "step": 3945 }, { "epoch": 0.0951325835119578, "grad_norm": 1.5980697870254517, "learning_rate": 9.525403103133556e-07, "loss": 0.9291, "step": 3950 }, { "epoch": 0.09525300450374509, "grad_norm": 1.8581165075302124, "learning_rate": 9.524135483216712e-07, "loss": 0.8802, "step": 3955 }, { "epoch": 0.09537342549553238, "grad_norm": 1.8296480178833008, "learning_rate": 9.522867863299868e-07, "loss": 0.9591, "step": 3960 }, { "epoch": 0.09549384648731966, "grad_norm": 1.7010616064071655, "learning_rate": 9.521600243383023e-07, "loss": 0.9367, "step": 3965 }, { "epoch": 0.09561426747910695, "grad_norm": 1.459513545036316, "learning_rate": 9.52033262346618e-07, "loss": 0.9193, "step": 3970 }, { "epoch": 0.09573468847089425, "grad_norm": 1.5484166145324707, "learning_rate": 9.519065003549336e-07, "loss": 0.97, "step": 3975 }, { "epoch": 0.09585510946268154, "grad_norm": 1.3898035287857056, "learning_rate": 9.517797383632491e-07, "loss": 1.0005, "step": 3980 }, { "epoch": 0.09597553045446883, "grad_norm": 1.6612249612808228, "learning_rate": 9.516529763715647e-07, "loss": 0.9713, "step": 3985 }, { "epoch": 0.09609595144625611, "grad_norm": 1.581580638885498, "learning_rate": 9.515262143798803e-07, "loss": 0.9966, "step": 3990 }, { "epoch": 0.0962163724380434, "grad_norm": 1.6020958423614502, "learning_rate": 9.513994523881959e-07, "loss": 0.9582, "step": 3995 }, { "epoch": 0.09633679342983069, "grad_norm": 1.6202304363250732, "learning_rate": 9.512726903965115e-07, "loss": 0.9673, "step": 4000 }, { "epoch": 0.09645721442161798, "grad_norm": 1.4806082248687744, "learning_rate": 9.51145928404827e-07, "loss": 0.9414, "step": 4005 }, { "epoch": 0.09657763541340526, "grad_norm": 1.4153348207473755, "learning_rate": 9.510191664131426e-07, "loss": 1.0094, "step": 4010 }, { "epoch": 0.09669805640519255, "grad_norm": 1.5099906921386719, "learning_rate": 9.508924044214582e-07, "loss": 1.0002, "step": 4015 }, { "epoch": 0.09681847739697984, "grad_norm": 1.4537502527236938, "learning_rate": 9.507656424297739e-07, "loss": 0.9604, "step": 4020 }, { "epoch": 0.09693889838876713, "grad_norm": 1.5901180505752563, "learning_rate": 9.506388804380894e-07, "loss": 0.9957, "step": 4025 }, { "epoch": 0.09705931938055441, "grad_norm": 1.530016303062439, "learning_rate": 9.50512118446405e-07, "loss": 0.9687, "step": 4030 }, { "epoch": 0.0971797403723417, "grad_norm": 1.5487666130065918, "learning_rate": 9.503853564547206e-07, "loss": 0.964, "step": 4035 }, { "epoch": 0.097300161364129, "grad_norm": 1.649968147277832, "learning_rate": 9.502585944630361e-07, "loss": 0.995, "step": 4040 }, { "epoch": 0.09742058235591629, "grad_norm": 1.5910958051681519, "learning_rate": 9.501318324713518e-07, "loss": 0.9521, "step": 4045 }, { "epoch": 0.09754100334770358, "grad_norm": 1.509109377861023, "learning_rate": 9.500050704796674e-07, "loss": 0.9757, "step": 4050 }, { "epoch": 0.09766142433949086, "grad_norm": 1.4290871620178223, "learning_rate": 9.498783084879829e-07, "loss": 0.9602, "step": 4055 }, { "epoch": 0.09778184533127815, "grad_norm": 1.5802230834960938, "learning_rate": 9.497515464962985e-07, "loss": 0.9717, "step": 4060 }, { "epoch": 0.09790226632306544, "grad_norm": 1.4167861938476562, "learning_rate": 9.496247845046142e-07, "loss": 0.9509, "step": 4065 }, { "epoch": 0.09802268731485272, "grad_norm": 1.5323904752731323, "learning_rate": 9.494980225129297e-07, "loss": 0.9557, "step": 4070 }, { "epoch": 0.09814310830664001, "grad_norm": 1.2644950151443481, "learning_rate": 9.493712605212452e-07, "loss": 0.9239, "step": 4075 }, { "epoch": 0.0982635292984273, "grad_norm": 1.574794054031372, "learning_rate": 9.492444985295609e-07, "loss": 0.9739, "step": 4080 }, { "epoch": 0.09838395029021459, "grad_norm": 1.424659252166748, "learning_rate": 9.491177365378764e-07, "loss": 0.9867, "step": 4085 }, { "epoch": 0.09850437128200187, "grad_norm": 1.5145343542099, "learning_rate": 9.489909745461921e-07, "loss": 0.9996, "step": 4090 }, { "epoch": 0.09862479227378916, "grad_norm": 1.4513949155807495, "learning_rate": 9.488642125545077e-07, "loss": 0.9537, "step": 4095 }, { "epoch": 0.09874521326557646, "grad_norm": 1.501413345336914, "learning_rate": 9.487374505628231e-07, "loss": 0.9569, "step": 4100 }, { "epoch": 0.09886563425736375, "grad_norm": 1.536135196685791, "learning_rate": 9.486106885711388e-07, "loss": 0.9226, "step": 4105 }, { "epoch": 0.09898605524915104, "grad_norm": 1.2886914014816284, "learning_rate": 9.484839265794544e-07, "loss": 0.9593, "step": 4110 }, { "epoch": 0.09910647624093832, "grad_norm": 1.841726303100586, "learning_rate": 9.4835716458777e-07, "loss": 1.0258, "step": 4115 }, { "epoch": 0.09922689723272561, "grad_norm": 1.5284498929977417, "learning_rate": 9.482304025960855e-07, "loss": 0.9678, "step": 4120 }, { "epoch": 0.0993473182245129, "grad_norm": 3.6049418449401855, "learning_rate": 9.481036406044011e-07, "loss": 0.9033, "step": 4125 }, { "epoch": 0.09946773921630019, "grad_norm": 1.560465931892395, "learning_rate": 9.479768786127167e-07, "loss": 0.9126, "step": 4130 }, { "epoch": 0.09958816020808747, "grad_norm": 1.5048307180404663, "learning_rate": 9.478501166210323e-07, "loss": 0.9472, "step": 4135 }, { "epoch": 0.09970858119987476, "grad_norm": 1.4856950044631958, "learning_rate": 9.47723354629348e-07, "loss": 0.9714, "step": 4140 }, { "epoch": 0.09982900219166205, "grad_norm": 1.6258323192596436, "learning_rate": 9.475965926376634e-07, "loss": 0.9564, "step": 4145 }, { "epoch": 0.09994942318344933, "grad_norm": 1.552347183227539, "learning_rate": 9.474698306459791e-07, "loss": 0.9473, "step": 4150 }, { "epoch": 0.10006984417523662, "grad_norm": 1.8179153203964233, "learning_rate": 9.473430686542947e-07, "loss": 0.9837, "step": 4155 }, { "epoch": 0.10019026516702391, "grad_norm": 1.4746826887130737, "learning_rate": 9.472163066626102e-07, "loss": 0.9202, "step": 4160 }, { "epoch": 0.10031068615881121, "grad_norm": 1.7761483192443848, "learning_rate": 9.470895446709259e-07, "loss": 1.0175, "step": 4165 }, { "epoch": 0.1004311071505985, "grad_norm": 1.3966078758239746, "learning_rate": 9.469627826792414e-07, "loss": 0.9678, "step": 4170 }, { "epoch": 0.10055152814238578, "grad_norm": 1.6143893003463745, "learning_rate": 9.46836020687557e-07, "loss": 0.9992, "step": 4175 }, { "epoch": 0.10067194913417307, "grad_norm": 1.681153655052185, "learning_rate": 9.467092586958726e-07, "loss": 0.965, "step": 4180 }, { "epoch": 0.10079237012596036, "grad_norm": 1.4772725105285645, "learning_rate": 9.465824967041883e-07, "loss": 1.012, "step": 4185 }, { "epoch": 0.10091279111774765, "grad_norm": 1.9717448949813843, "learning_rate": 9.464557347125037e-07, "loss": 1.0125, "step": 4190 }, { "epoch": 0.10103321210953493, "grad_norm": 1.4508270025253296, "learning_rate": 9.463289727208193e-07, "loss": 0.9695, "step": 4195 }, { "epoch": 0.10115363310132222, "grad_norm": 1.4963538646697998, "learning_rate": 9.46202210729135e-07, "loss": 1.0119, "step": 4200 }, { "epoch": 0.10127405409310951, "grad_norm": 1.5520143508911133, "learning_rate": 9.460754487374505e-07, "loss": 0.9696, "step": 4205 }, { "epoch": 0.1013944750848968, "grad_norm": 1.6408535242080688, "learning_rate": 9.459486867457662e-07, "loss": 0.9551, "step": 4210 }, { "epoch": 0.10151489607668408, "grad_norm": 1.5083121061325073, "learning_rate": 9.458219247540817e-07, "loss": 0.9692, "step": 4215 }, { "epoch": 0.10163531706847137, "grad_norm": 1.5416884422302246, "learning_rate": 9.456951627623972e-07, "loss": 0.9981, "step": 4220 }, { "epoch": 0.10175573806025866, "grad_norm": 1.4579458236694336, "learning_rate": 9.455684007707129e-07, "loss": 0.9631, "step": 4225 }, { "epoch": 0.10187615905204596, "grad_norm": 1.7778942584991455, "learning_rate": 9.454416387790285e-07, "loss": 0.9359, "step": 4230 }, { "epoch": 0.10199658004383325, "grad_norm": 1.5496573448181152, "learning_rate": 9.45314876787344e-07, "loss": 0.9948, "step": 4235 }, { "epoch": 0.10211700103562053, "grad_norm": 1.6221665143966675, "learning_rate": 9.451881147956596e-07, "loss": 0.9882, "step": 4240 }, { "epoch": 0.10223742202740782, "grad_norm": 2.9152276515960693, "learning_rate": 9.450613528039752e-07, "loss": 0.9812, "step": 4245 }, { "epoch": 0.10235784301919511, "grad_norm": 1.5360208749771118, "learning_rate": 9.449345908122908e-07, "loss": 0.944, "step": 4250 }, { "epoch": 0.1024782640109824, "grad_norm": 1.5541127920150757, "learning_rate": 9.448078288206064e-07, "loss": 0.9994, "step": 4255 }, { "epoch": 0.10259868500276968, "grad_norm": 1.4846186637878418, "learning_rate": 9.44681066828922e-07, "loss": 0.9377, "step": 4260 }, { "epoch": 0.10271910599455697, "grad_norm": 1.5796730518341064, "learning_rate": 9.445543048372375e-07, "loss": 0.903, "step": 4265 }, { "epoch": 0.10283952698634426, "grad_norm": 1.6794935464859009, "learning_rate": 9.444275428455532e-07, "loss": 0.9928, "step": 4270 }, { "epoch": 0.10295994797813154, "grad_norm": 1.5276776552200317, "learning_rate": 9.443007808538688e-07, "loss": 0.9501, "step": 4275 }, { "epoch": 0.10308036896991883, "grad_norm": 1.5344417095184326, "learning_rate": 9.441740188621843e-07, "loss": 0.9711, "step": 4280 }, { "epoch": 0.10320078996170612, "grad_norm": 1.7467256784439087, "learning_rate": 9.440472568704999e-07, "loss": 0.9535, "step": 4285 }, { "epoch": 0.10332121095349342, "grad_norm": 1.38759183883667, "learning_rate": 9.439204948788155e-07, "loss": 0.9555, "step": 4290 }, { "epoch": 0.10344163194528071, "grad_norm": 1.624312162399292, "learning_rate": 9.437937328871311e-07, "loss": 1.0224, "step": 4295 }, { "epoch": 0.103562052937068, "grad_norm": 1.673833966255188, "learning_rate": 9.436669708954467e-07, "loss": 0.9612, "step": 4300 }, { "epoch": 0.10368247392885528, "grad_norm": 1.6893752813339233, "learning_rate": 9.435402089037622e-07, "loss": 0.9885, "step": 4305 }, { "epoch": 0.10380289492064257, "grad_norm": 1.6171820163726807, "learning_rate": 9.434134469120778e-07, "loss": 1.0103, "step": 4310 }, { "epoch": 0.10392331591242986, "grad_norm": 1.706410527229309, "learning_rate": 9.432866849203934e-07, "loss": 0.9841, "step": 4315 }, { "epoch": 0.10404373690421714, "grad_norm": 1.5108826160430908, "learning_rate": 9.431599229287091e-07, "loss": 0.9814, "step": 4320 }, { "epoch": 0.10416415789600443, "grad_norm": 1.5877901315689087, "learning_rate": 9.430331609370246e-07, "loss": 0.9791, "step": 4325 }, { "epoch": 0.10428457888779172, "grad_norm": 1.650411605834961, "learning_rate": 9.429063989453401e-07, "loss": 0.9648, "step": 4330 }, { "epoch": 0.104404999879579, "grad_norm": 1.6145753860473633, "learning_rate": 9.427796369536558e-07, "loss": 0.9916, "step": 4335 }, { "epoch": 0.10452542087136629, "grad_norm": 1.7058037519454956, "learning_rate": 9.426528749619713e-07, "loss": 0.9741, "step": 4340 }, { "epoch": 0.10464584186315358, "grad_norm": 1.5574986934661865, "learning_rate": 9.42526112970287e-07, "loss": 0.9752, "step": 4345 }, { "epoch": 0.10476626285494087, "grad_norm": 1.51565420627594, "learning_rate": 9.423993509786026e-07, "loss": 0.968, "step": 4350 }, { "epoch": 0.10488668384672817, "grad_norm": 1.8395243883132935, "learning_rate": 9.422725889869181e-07, "loss": 0.953, "step": 4355 }, { "epoch": 0.10500710483851546, "grad_norm": 1.5606211423873901, "learning_rate": 9.421458269952337e-07, "loss": 0.957, "step": 4360 }, { "epoch": 0.10512752583030274, "grad_norm": 1.6414927244186401, "learning_rate": 9.420190650035493e-07, "loss": 0.9739, "step": 4365 }, { "epoch": 0.10524794682209003, "grad_norm": 1.4883346557617188, "learning_rate": 9.418923030118649e-07, "loss": 0.9875, "step": 4370 }, { "epoch": 0.10536836781387732, "grad_norm": 1.480233073234558, "learning_rate": 9.417655410201804e-07, "loss": 0.9975, "step": 4375 }, { "epoch": 0.1054887888056646, "grad_norm": 1.6190452575683594, "learning_rate": 9.416387790284961e-07, "loss": 1.0012, "step": 4380 }, { "epoch": 0.10560920979745189, "grad_norm": 1.6386826038360596, "learning_rate": 9.415120170368116e-07, "loss": 0.9849, "step": 4385 }, { "epoch": 0.10572963078923918, "grad_norm": 1.881450891494751, "learning_rate": 9.413852550451272e-07, "loss": 0.923, "step": 4390 }, { "epoch": 0.10585005178102647, "grad_norm": 1.536907434463501, "learning_rate": 9.412584930534429e-07, "loss": 0.9716, "step": 4395 }, { "epoch": 0.10597047277281375, "grad_norm": 2.033087968826294, "learning_rate": 9.411317310617583e-07, "loss": 0.945, "step": 4400 }, { "epoch": 0.10609089376460104, "grad_norm": 1.5700300931930542, "learning_rate": 9.41004969070074e-07, "loss": 0.9874, "step": 4405 }, { "epoch": 0.10621131475638833, "grad_norm": 1.6118327379226685, "learning_rate": 9.408782070783896e-07, "loss": 0.9513, "step": 4410 }, { "epoch": 0.10633173574817562, "grad_norm": 1.5397894382476807, "learning_rate": 9.407514450867052e-07, "loss": 0.9445, "step": 4415 }, { "epoch": 0.10645215673996292, "grad_norm": 1.7848899364471436, "learning_rate": 9.406246830950207e-07, "loss": 0.9626, "step": 4420 }, { "epoch": 0.1065725777317502, "grad_norm": 1.5513091087341309, "learning_rate": 9.404979211033363e-07, "loss": 0.9744, "step": 4425 }, { "epoch": 0.10669299872353749, "grad_norm": 1.535009741783142, "learning_rate": 9.403711591116519e-07, "loss": 0.9383, "step": 4430 }, { "epoch": 0.10681341971532478, "grad_norm": 1.59181547164917, "learning_rate": 9.402443971199675e-07, "loss": 0.9882, "step": 4435 }, { "epoch": 0.10693384070711207, "grad_norm": 1.7125526666641235, "learning_rate": 9.401176351282832e-07, "loss": 0.979, "step": 4440 }, { "epoch": 0.10705426169889935, "grad_norm": 1.619505524635315, "learning_rate": 9.399908731365986e-07, "loss": 0.9604, "step": 4445 }, { "epoch": 0.10717468269068664, "grad_norm": 1.9453794956207275, "learning_rate": 9.398641111449142e-07, "loss": 0.9507, "step": 4450 }, { "epoch": 0.10729510368247393, "grad_norm": 1.5830106735229492, "learning_rate": 9.397373491532299e-07, "loss": 0.9718, "step": 4455 }, { "epoch": 0.10741552467426121, "grad_norm": 1.6598644256591797, "learning_rate": 9.396105871615454e-07, "loss": 1.0022, "step": 4460 }, { "epoch": 0.1075359456660485, "grad_norm": 1.6971837282180786, "learning_rate": 9.394838251698611e-07, "loss": 1.0149, "step": 4465 }, { "epoch": 0.10765636665783579, "grad_norm": 1.7452025413513184, "learning_rate": 9.393570631781766e-07, "loss": 0.9395, "step": 4470 }, { "epoch": 0.10777678764962308, "grad_norm": 1.655214786529541, "learning_rate": 9.392303011864922e-07, "loss": 0.9422, "step": 4475 }, { "epoch": 0.10789720864141038, "grad_norm": 1.542333960533142, "learning_rate": 9.391035391948078e-07, "loss": 1.021, "step": 4480 }, { "epoch": 0.10801762963319766, "grad_norm": 1.7552878856658936, "learning_rate": 9.389767772031234e-07, "loss": 0.9563, "step": 4485 }, { "epoch": 0.10813805062498495, "grad_norm": 1.8255192041397095, "learning_rate": 9.388500152114389e-07, "loss": 0.9953, "step": 4490 }, { "epoch": 0.10825847161677224, "grad_norm": 1.5070478916168213, "learning_rate": 9.387232532197545e-07, "loss": 0.9908, "step": 4495 }, { "epoch": 0.10837889260855953, "grad_norm": 1.4872255325317383, "learning_rate": 9.385964912280702e-07, "loss": 0.9305, "step": 4500 }, { "epoch": 0.10849931360034681, "grad_norm": 1.501749873161316, "learning_rate": 9.384697292363857e-07, "loss": 0.9861, "step": 4505 }, { "epoch": 0.1086197345921341, "grad_norm": 1.7855017185211182, "learning_rate": 9.383429672447013e-07, "loss": 0.9322, "step": 4510 }, { "epoch": 0.10874015558392139, "grad_norm": 1.3943707942962646, "learning_rate": 9.382162052530169e-07, "loss": 0.9588, "step": 4515 }, { "epoch": 0.10886057657570868, "grad_norm": 1.5438886880874634, "learning_rate": 9.380894432613324e-07, "loss": 0.9358, "step": 4520 }, { "epoch": 0.10898099756749596, "grad_norm": 1.761030673980713, "learning_rate": 9.379626812696481e-07, "loss": 0.9704, "step": 4525 }, { "epoch": 0.10910141855928325, "grad_norm": 1.877665638923645, "learning_rate": 9.378359192779637e-07, "loss": 1.0007, "step": 4530 }, { "epoch": 0.10922183955107054, "grad_norm": 1.6068402528762817, "learning_rate": 9.377091572862791e-07, "loss": 0.9647, "step": 4535 }, { "epoch": 0.10934226054285782, "grad_norm": 1.6051069498062134, "learning_rate": 9.375823952945948e-07, "loss": 0.9498, "step": 4540 }, { "epoch": 0.10946268153464513, "grad_norm": 1.457535982131958, "learning_rate": 9.374556333029104e-07, "loss": 0.9663, "step": 4545 }, { "epoch": 0.10958310252643241, "grad_norm": 1.6678305864334106, "learning_rate": 9.37328871311226e-07, "loss": 1.0178, "step": 4550 }, { "epoch": 0.1097035235182197, "grad_norm": 1.4807989597320557, "learning_rate": 9.372021093195416e-07, "loss": 0.8829, "step": 4555 }, { "epoch": 0.10982394451000699, "grad_norm": 1.5890085697174072, "learning_rate": 9.370753473278572e-07, "loss": 0.9951, "step": 4560 }, { "epoch": 0.10994436550179428, "grad_norm": 1.8150923252105713, "learning_rate": 9.369485853361727e-07, "loss": 0.8929, "step": 4565 }, { "epoch": 0.11006478649358156, "grad_norm": 1.5665993690490723, "learning_rate": 9.368218233444883e-07, "loss": 0.9729, "step": 4570 }, { "epoch": 0.11018520748536885, "grad_norm": 1.9737279415130615, "learning_rate": 9.36695061352804e-07, "loss": 1.0065, "step": 4575 }, { "epoch": 0.11030562847715614, "grad_norm": 1.4127687215805054, "learning_rate": 9.365682993611195e-07, "loss": 0.9853, "step": 4580 }, { "epoch": 0.11042604946894342, "grad_norm": 1.557056188583374, "learning_rate": 9.364415373694351e-07, "loss": 0.9623, "step": 4585 }, { "epoch": 0.11054647046073071, "grad_norm": 1.6299985647201538, "learning_rate": 9.363147753777507e-07, "loss": 0.9772, "step": 4590 }, { "epoch": 0.110666891452518, "grad_norm": 1.655799388885498, "learning_rate": 9.361880133860662e-07, "loss": 1.0002, "step": 4595 }, { "epoch": 0.11078731244430529, "grad_norm": 1.546984314918518, "learning_rate": 9.360612513943819e-07, "loss": 0.8815, "step": 4600 }, { "epoch": 0.11090773343609257, "grad_norm": 1.4920119047164917, "learning_rate": 9.359344894026974e-07, "loss": 0.9835, "step": 4605 }, { "epoch": 0.11102815442787987, "grad_norm": 1.5333186388015747, "learning_rate": 9.35807727411013e-07, "loss": 0.9534, "step": 4610 }, { "epoch": 0.11114857541966716, "grad_norm": 1.6916743516921997, "learning_rate": 9.356809654193286e-07, "loss": 0.9304, "step": 4615 }, { "epoch": 0.11126899641145445, "grad_norm": 1.8019620180130005, "learning_rate": 9.355542034276443e-07, "loss": 1.0045, "step": 4620 }, { "epoch": 0.11138941740324174, "grad_norm": 1.4818156957626343, "learning_rate": 9.354274414359598e-07, "loss": 0.9803, "step": 4625 }, { "epoch": 0.11150983839502902, "grad_norm": 1.5333963632583618, "learning_rate": 9.353006794442753e-07, "loss": 0.9324, "step": 4630 }, { "epoch": 0.11163025938681631, "grad_norm": 1.7001012563705444, "learning_rate": 9.35173917452591e-07, "loss": 0.8934, "step": 4635 }, { "epoch": 0.1117506803786036, "grad_norm": 1.4808404445648193, "learning_rate": 9.350471554609065e-07, "loss": 0.9245, "step": 4640 }, { "epoch": 0.11187110137039089, "grad_norm": 1.511756181716919, "learning_rate": 9.349203934692222e-07, "loss": 0.9419, "step": 4645 }, { "epoch": 0.11199152236217817, "grad_norm": 1.5101591348648071, "learning_rate": 9.347936314775378e-07, "loss": 0.9783, "step": 4650 }, { "epoch": 0.11211194335396546, "grad_norm": 1.513121247291565, "learning_rate": 9.346668694858532e-07, "loss": 1.0224, "step": 4655 }, { "epoch": 0.11223236434575275, "grad_norm": 1.4988608360290527, "learning_rate": 9.345401074941689e-07, "loss": 0.9689, "step": 4660 }, { "epoch": 0.11235278533754003, "grad_norm": 1.5721975564956665, "learning_rate": 9.344133455024845e-07, "loss": 0.9555, "step": 4665 }, { "epoch": 0.11247320632932732, "grad_norm": 1.696729302406311, "learning_rate": 9.342865835108001e-07, "loss": 0.9495, "step": 4670 }, { "epoch": 0.11259362732111462, "grad_norm": 1.4797080755233765, "learning_rate": 9.341598215191156e-07, "loss": 0.9938, "step": 4675 }, { "epoch": 0.11271404831290191, "grad_norm": 1.5340688228607178, "learning_rate": 9.340330595274313e-07, "loss": 0.9271, "step": 4680 }, { "epoch": 0.1128344693046892, "grad_norm": 1.5662153959274292, "learning_rate": 9.339062975357468e-07, "loss": 0.98, "step": 4685 }, { "epoch": 0.11295489029647648, "grad_norm": 1.5172141790390015, "learning_rate": 9.337795355440624e-07, "loss": 0.958, "step": 4690 }, { "epoch": 0.11307531128826377, "grad_norm": 1.6660382747650146, "learning_rate": 9.336527735523781e-07, "loss": 0.9115, "step": 4695 }, { "epoch": 0.11319573228005106, "grad_norm": 1.740270972251892, "learning_rate": 9.335260115606935e-07, "loss": 0.9381, "step": 4700 }, { "epoch": 0.11331615327183835, "grad_norm": 1.401108980178833, "learning_rate": 9.333992495690092e-07, "loss": 0.9772, "step": 4705 }, { "epoch": 0.11343657426362563, "grad_norm": 1.6223112344741821, "learning_rate": 9.332724875773248e-07, "loss": 0.9462, "step": 4710 }, { "epoch": 0.11355699525541292, "grad_norm": 1.654981255531311, "learning_rate": 9.331457255856403e-07, "loss": 0.9887, "step": 4715 }, { "epoch": 0.11367741624720021, "grad_norm": 1.5551520586013794, "learning_rate": 9.330189635939559e-07, "loss": 0.9886, "step": 4720 }, { "epoch": 0.1137978372389875, "grad_norm": 2.040355920791626, "learning_rate": 9.328922016022715e-07, "loss": 0.9203, "step": 4725 }, { "epoch": 0.11391825823077478, "grad_norm": 1.7608866691589355, "learning_rate": 9.327654396105871e-07, "loss": 0.962, "step": 4730 }, { "epoch": 0.11403867922256208, "grad_norm": 2.0532608032226562, "learning_rate": 9.326386776189027e-07, "loss": 0.9713, "step": 4735 }, { "epoch": 0.11415910021434937, "grad_norm": 1.3905706405639648, "learning_rate": 9.325119156272184e-07, "loss": 0.9415, "step": 4740 }, { "epoch": 0.11427952120613666, "grad_norm": 1.5101875066757202, "learning_rate": 9.323851536355338e-07, "loss": 1.0173, "step": 4745 }, { "epoch": 0.11439994219792395, "grad_norm": 1.4829469919204712, "learning_rate": 9.322583916438494e-07, "loss": 1.0155, "step": 4750 }, { "epoch": 0.11452036318971123, "grad_norm": 1.5244263410568237, "learning_rate": 9.321316296521651e-07, "loss": 0.9863, "step": 4755 }, { "epoch": 0.11464078418149852, "grad_norm": 1.638397216796875, "learning_rate": 9.320048676604806e-07, "loss": 1.004, "step": 4760 }, { "epoch": 0.11476120517328581, "grad_norm": 1.5108238458633423, "learning_rate": 9.318781056687963e-07, "loss": 0.9795, "step": 4765 }, { "epoch": 0.1148816261650731, "grad_norm": 1.6074522733688354, "learning_rate": 9.317513436771118e-07, "loss": 1.0123, "step": 4770 }, { "epoch": 0.11500204715686038, "grad_norm": 1.4743024110794067, "learning_rate": 9.316245816854273e-07, "loss": 0.9394, "step": 4775 }, { "epoch": 0.11512246814864767, "grad_norm": 1.5913125276565552, "learning_rate": 9.31497819693743e-07, "loss": 0.9358, "step": 4780 }, { "epoch": 0.11524288914043496, "grad_norm": 1.4869250059127808, "learning_rate": 9.313710577020586e-07, "loss": 0.9499, "step": 4785 }, { "epoch": 0.11536331013222224, "grad_norm": 1.6868833303451538, "learning_rate": 9.312442957103741e-07, "loss": 0.9614, "step": 4790 }, { "epoch": 0.11548373112400953, "grad_norm": 1.4460880756378174, "learning_rate": 9.311175337186897e-07, "loss": 0.8808, "step": 4795 }, { "epoch": 0.11560415211579683, "grad_norm": 1.3952046632766724, "learning_rate": 9.309907717270054e-07, "loss": 0.9999, "step": 4800 }, { "epoch": 0.11572457310758412, "grad_norm": 1.459100365638733, "learning_rate": 9.308640097353209e-07, "loss": 0.9508, "step": 4805 }, { "epoch": 0.1158449940993714, "grad_norm": 1.4393701553344727, "learning_rate": 9.307372477436365e-07, "loss": 0.9974, "step": 4810 }, { "epoch": 0.1159654150911587, "grad_norm": 1.6273918151855469, "learning_rate": 9.306104857519521e-07, "loss": 0.9248, "step": 4815 }, { "epoch": 0.11608583608294598, "grad_norm": 1.478814959526062, "learning_rate": 9.304837237602676e-07, "loss": 0.9576, "step": 4820 }, { "epoch": 0.11620625707473327, "grad_norm": 1.8840432167053223, "learning_rate": 9.303569617685833e-07, "loss": 1.0513, "step": 4825 }, { "epoch": 0.11632667806652056, "grad_norm": 1.6974865198135376, "learning_rate": 9.302301997768989e-07, "loss": 0.9699, "step": 4830 }, { "epoch": 0.11644709905830784, "grad_norm": 1.5943163633346558, "learning_rate": 9.301034377852143e-07, "loss": 0.946, "step": 4835 }, { "epoch": 0.11656752005009513, "grad_norm": 1.5880036354064941, "learning_rate": 9.2997667579353e-07, "loss": 0.9744, "step": 4840 }, { "epoch": 0.11668794104188242, "grad_norm": 1.8085670471191406, "learning_rate": 9.298499138018456e-07, "loss": 0.9736, "step": 4845 }, { "epoch": 0.1168083620336697, "grad_norm": 1.504788875579834, "learning_rate": 9.297231518101612e-07, "loss": 0.9505, "step": 4850 }, { "epoch": 0.11692878302545699, "grad_norm": 1.4562904834747314, "learning_rate": 9.295963898184768e-07, "loss": 0.9379, "step": 4855 }, { "epoch": 0.11704920401724428, "grad_norm": 1.4931610822677612, "learning_rate": 9.294696278267923e-07, "loss": 0.9242, "step": 4860 }, { "epoch": 0.11716962500903158, "grad_norm": 1.4892380237579346, "learning_rate": 9.29342865835108e-07, "loss": 0.9444, "step": 4865 }, { "epoch": 0.11729004600081887, "grad_norm": 1.8332098722457886, "learning_rate": 9.292161038434235e-07, "loss": 1.0237, "step": 4870 }, { "epoch": 0.11741046699260616, "grad_norm": 1.5724458694458008, "learning_rate": 9.290893418517392e-07, "loss": 0.8696, "step": 4875 }, { "epoch": 0.11753088798439344, "grad_norm": 1.5204848051071167, "learning_rate": 9.289625798600548e-07, "loss": 0.9876, "step": 4880 }, { "epoch": 0.11765130897618073, "grad_norm": 1.428585410118103, "learning_rate": 9.288358178683703e-07, "loss": 0.935, "step": 4885 }, { "epoch": 0.11777172996796802, "grad_norm": 1.7297282218933105, "learning_rate": 9.287090558766859e-07, "loss": 0.9904, "step": 4890 }, { "epoch": 0.1178921509597553, "grad_norm": 1.5793259143829346, "learning_rate": 9.285822938850015e-07, "loss": 0.9694, "step": 4895 }, { "epoch": 0.11801257195154259, "grad_norm": 1.7904987335205078, "learning_rate": 9.284555318933171e-07, "loss": 0.9852, "step": 4900 }, { "epoch": 0.11813299294332988, "grad_norm": 1.4578253030776978, "learning_rate": 9.283287699016326e-07, "loss": 0.9786, "step": 4905 }, { "epoch": 0.11825341393511717, "grad_norm": 1.439958930015564, "learning_rate": 9.282020079099483e-07, "loss": 0.9291, "step": 4910 }, { "epoch": 0.11837383492690445, "grad_norm": 1.6253753900527954, "learning_rate": 9.280752459182638e-07, "loss": 0.9948, "step": 4915 }, { "epoch": 0.11849425591869174, "grad_norm": 1.617333173751831, "learning_rate": 9.279484839265794e-07, "loss": 0.9229, "step": 4920 }, { "epoch": 0.11861467691047904, "grad_norm": 1.7600502967834473, "learning_rate": 9.278217219348951e-07, "loss": 0.9895, "step": 4925 }, { "epoch": 0.11873509790226633, "grad_norm": 1.636748194694519, "learning_rate": 9.276949599432105e-07, "loss": 0.9766, "step": 4930 }, { "epoch": 0.11885551889405362, "grad_norm": 1.6138620376586914, "learning_rate": 9.275681979515262e-07, "loss": 0.9108, "step": 4935 }, { "epoch": 0.1189759398858409, "grad_norm": 1.5964850187301636, "learning_rate": 9.274414359598418e-07, "loss": 0.9817, "step": 4940 }, { "epoch": 0.11909636087762819, "grad_norm": 1.6752839088439941, "learning_rate": 9.273146739681574e-07, "loss": 0.9545, "step": 4945 }, { "epoch": 0.11921678186941548, "grad_norm": 1.6496039628982544, "learning_rate": 9.271879119764729e-07, "loss": 0.9907, "step": 4950 }, { "epoch": 0.11933720286120277, "grad_norm": 1.4864082336425781, "learning_rate": 9.270611499847885e-07, "loss": 0.9827, "step": 4955 }, { "epoch": 0.11945762385299005, "grad_norm": 1.483738660812378, "learning_rate": 9.269343879931041e-07, "loss": 0.9609, "step": 4960 }, { "epoch": 0.11957804484477734, "grad_norm": 1.645350694656372, "learning_rate": 9.268076260014197e-07, "loss": 0.9546, "step": 4965 }, { "epoch": 0.11969846583656463, "grad_norm": 1.473766803741455, "learning_rate": 9.266808640097354e-07, "loss": 0.9515, "step": 4970 }, { "epoch": 0.11981888682835191, "grad_norm": 1.4186217784881592, "learning_rate": 9.265541020180508e-07, "loss": 1.0048, "step": 4975 }, { "epoch": 0.1199393078201392, "grad_norm": 1.6372003555297852, "learning_rate": 9.264273400263664e-07, "loss": 0.9712, "step": 4980 }, { "epoch": 0.12005972881192649, "grad_norm": 2.0134482383728027, "learning_rate": 9.263005780346821e-07, "loss": 0.9544, "step": 4985 }, { "epoch": 0.12018014980371379, "grad_norm": 1.6138123273849487, "learning_rate": 9.261738160429976e-07, "loss": 0.9704, "step": 4990 }, { "epoch": 0.12030057079550108, "grad_norm": 1.5256341695785522, "learning_rate": 9.260470540513133e-07, "loss": 0.9477, "step": 4995 }, { "epoch": 0.12042099178728836, "grad_norm": 1.558856725692749, "learning_rate": 9.259202920596288e-07, "loss": 0.9431, "step": 5000 }, { "epoch": 0.12054141277907565, "grad_norm": 1.8143106698989868, "learning_rate": 9.257935300679444e-07, "loss": 0.9618, "step": 5005 }, { "epoch": 0.12066183377086294, "grad_norm": 1.6698336601257324, "learning_rate": 9.2566676807626e-07, "loss": 0.9523, "step": 5010 }, { "epoch": 0.12078225476265023, "grad_norm": 1.6376757621765137, "learning_rate": 9.255400060845756e-07, "loss": 0.8967, "step": 5015 }, { "epoch": 0.12090267575443751, "grad_norm": 1.6007685661315918, "learning_rate": 9.254132440928911e-07, "loss": 0.9103, "step": 5020 }, { "epoch": 0.1210230967462248, "grad_norm": 1.5988738536834717, "learning_rate": 9.252864821012067e-07, "loss": 0.9777, "step": 5025 }, { "epoch": 0.12114351773801209, "grad_norm": 1.5461716651916504, "learning_rate": 9.251597201095224e-07, "loss": 0.9269, "step": 5030 }, { "epoch": 0.12126393872979938, "grad_norm": 1.6856049299240112, "learning_rate": 9.250329581178379e-07, "loss": 0.9482, "step": 5035 }, { "epoch": 0.12138435972158666, "grad_norm": 1.4944417476654053, "learning_rate": 9.249061961261535e-07, "loss": 0.9646, "step": 5040 }, { "epoch": 0.12150478071337395, "grad_norm": 1.87632155418396, "learning_rate": 9.247794341344691e-07, "loss": 0.9717, "step": 5045 }, { "epoch": 0.12162520170516124, "grad_norm": 1.8691729307174683, "learning_rate": 9.246526721427846e-07, "loss": 0.929, "step": 5050 }, { "epoch": 0.12174562269694854, "grad_norm": 1.5722233057022095, "learning_rate": 9.245259101511003e-07, "loss": 0.9637, "step": 5055 }, { "epoch": 0.12186604368873583, "grad_norm": 1.6120997667312622, "learning_rate": 9.243991481594159e-07, "loss": 1.0072, "step": 5060 }, { "epoch": 0.12198646468052311, "grad_norm": 1.6073572635650635, "learning_rate": 9.242723861677313e-07, "loss": 0.9322, "step": 5065 }, { "epoch": 0.1221068856723104, "grad_norm": 1.6781420707702637, "learning_rate": 9.24145624176047e-07, "loss": 0.9434, "step": 5070 }, { "epoch": 0.12222730666409769, "grad_norm": 1.8128712177276611, "learning_rate": 9.240188621843626e-07, "loss": 0.918, "step": 5075 }, { "epoch": 0.12234772765588497, "grad_norm": 1.5147145986557007, "learning_rate": 9.238921001926782e-07, "loss": 0.9762, "step": 5080 }, { "epoch": 0.12246814864767226, "grad_norm": 1.45526123046875, "learning_rate": 9.237653382009938e-07, "loss": 0.9586, "step": 5085 }, { "epoch": 0.12258856963945955, "grad_norm": 1.5992141962051392, "learning_rate": 9.236385762093094e-07, "loss": 0.933, "step": 5090 }, { "epoch": 0.12270899063124684, "grad_norm": 1.419451117515564, "learning_rate": 9.235118142176249e-07, "loss": 0.9963, "step": 5095 }, { "epoch": 0.12282941162303412, "grad_norm": 1.6753170490264893, "learning_rate": 9.233850522259405e-07, "loss": 0.9369, "step": 5100 }, { "epoch": 0.12294983261482141, "grad_norm": 1.876473307609558, "learning_rate": 9.232582902342562e-07, "loss": 0.9491, "step": 5105 }, { "epoch": 0.1230702536066087, "grad_norm": 1.5998787879943848, "learning_rate": 9.231315282425717e-07, "loss": 0.9891, "step": 5110 }, { "epoch": 0.12319067459839599, "grad_norm": 1.7187938690185547, "learning_rate": 9.230047662508873e-07, "loss": 0.9506, "step": 5115 }, { "epoch": 0.12331109559018329, "grad_norm": 1.3768280744552612, "learning_rate": 9.228780042592029e-07, "loss": 0.9974, "step": 5120 }, { "epoch": 0.12343151658197057, "grad_norm": 1.4900585412979126, "learning_rate": 9.227512422675184e-07, "loss": 0.9339, "step": 5125 }, { "epoch": 0.12355193757375786, "grad_norm": 1.7107560634613037, "learning_rate": 9.226244802758341e-07, "loss": 0.9153, "step": 5130 }, { "epoch": 0.12367235856554515, "grad_norm": 2.090656042098999, "learning_rate": 9.224977182841496e-07, "loss": 0.895, "step": 5135 }, { "epoch": 0.12379277955733244, "grad_norm": 1.7037839889526367, "learning_rate": 9.223709562924652e-07, "loss": 1.0013, "step": 5140 }, { "epoch": 0.12391320054911972, "grad_norm": 1.5398280620574951, "learning_rate": 9.222441943007808e-07, "loss": 0.9445, "step": 5145 }, { "epoch": 0.12403362154090701, "grad_norm": 1.6887266635894775, "learning_rate": 9.221174323090965e-07, "loss": 0.9767, "step": 5150 }, { "epoch": 0.1241540425326943, "grad_norm": 1.401342511177063, "learning_rate": 9.21990670317412e-07, "loss": 0.8865, "step": 5155 }, { "epoch": 0.12427446352448158, "grad_norm": 1.3718047142028809, "learning_rate": 9.218639083257275e-07, "loss": 0.9621, "step": 5160 }, { "epoch": 0.12439488451626887, "grad_norm": 1.6709517240524292, "learning_rate": 9.217371463340432e-07, "loss": 0.967, "step": 5165 }, { "epoch": 0.12451530550805616, "grad_norm": 1.510709285736084, "learning_rate": 9.216103843423587e-07, "loss": 0.9674, "step": 5170 }, { "epoch": 0.12463572649984345, "grad_norm": 1.467331886291504, "learning_rate": 9.214836223506744e-07, "loss": 0.9637, "step": 5175 }, { "epoch": 0.12475614749163075, "grad_norm": 1.5031909942626953, "learning_rate": 9.2135686035899e-07, "loss": 0.9485, "step": 5180 }, { "epoch": 0.12487656848341804, "grad_norm": 1.759961485862732, "learning_rate": 9.212300983673054e-07, "loss": 1.0108, "step": 5185 }, { "epoch": 0.12499698947520532, "grad_norm": 1.519850492477417, "learning_rate": 9.211033363756211e-07, "loss": 0.9442, "step": 5190 }, { "epoch": 0.1251174104669926, "grad_norm": 1.8199149370193481, "learning_rate": 9.209765743839367e-07, "loss": 0.9235, "step": 5195 }, { "epoch": 0.1252378314587799, "grad_norm": 1.6225138902664185, "learning_rate": 9.208498123922523e-07, "loss": 0.9856, "step": 5200 }, { "epoch": 0.12535825245056717, "grad_norm": 1.6960610151290894, "learning_rate": 9.207230504005678e-07, "loss": 1.0328, "step": 5205 }, { "epoch": 0.12547867344235447, "grad_norm": 1.9315990209579468, "learning_rate": 9.205962884088835e-07, "loss": 0.9509, "step": 5210 }, { "epoch": 0.12559909443414177, "grad_norm": 1.6579340696334839, "learning_rate": 9.20469526417199e-07, "loss": 0.9352, "step": 5215 }, { "epoch": 0.12571951542592905, "grad_norm": 1.6070151329040527, "learning_rate": 9.203427644255146e-07, "loss": 0.9809, "step": 5220 }, { "epoch": 0.12583993641771635, "grad_norm": 1.4353399276733398, "learning_rate": 9.202160024338303e-07, "loss": 0.9512, "step": 5225 }, { "epoch": 0.12596035740950362, "grad_norm": 1.5951476097106934, "learning_rate": 9.200892404421457e-07, "loss": 0.9959, "step": 5230 }, { "epoch": 0.12608077840129092, "grad_norm": 1.600804090499878, "learning_rate": 9.199624784504614e-07, "loss": 0.9749, "step": 5235 }, { "epoch": 0.1262011993930782, "grad_norm": 1.6272754669189453, "learning_rate": 9.19835716458777e-07, "loss": 0.9287, "step": 5240 }, { "epoch": 0.1263216203848655, "grad_norm": 1.556373953819275, "learning_rate": 9.197089544670925e-07, "loss": 0.9435, "step": 5245 }, { "epoch": 0.12644204137665277, "grad_norm": 1.7332128286361694, "learning_rate": 9.195821924754081e-07, "loss": 0.9235, "step": 5250 }, { "epoch": 0.12656246236844007, "grad_norm": 1.6399269104003906, "learning_rate": 9.194554304837237e-07, "loss": 0.9436, "step": 5255 }, { "epoch": 0.12668288336022734, "grad_norm": 1.756186842918396, "learning_rate": 9.193286684920393e-07, "loss": 0.9137, "step": 5260 }, { "epoch": 0.12680330435201465, "grad_norm": 1.9386248588562012, "learning_rate": 9.192019065003549e-07, "loss": 0.9831, "step": 5265 }, { "epoch": 0.12692372534380192, "grad_norm": 1.8038384914398193, "learning_rate": 9.190751445086706e-07, "loss": 0.8885, "step": 5270 }, { "epoch": 0.12704414633558922, "grad_norm": 1.5861470699310303, "learning_rate": 9.18948382516986e-07, "loss": 0.9518, "step": 5275 }, { "epoch": 0.12716456732737652, "grad_norm": 1.5452990531921387, "learning_rate": 9.188216205253016e-07, "loss": 1.0352, "step": 5280 }, { "epoch": 0.1272849883191638, "grad_norm": 1.6214449405670166, "learning_rate": 9.186948585336173e-07, "loss": 1.0022, "step": 5285 }, { "epoch": 0.1274054093109511, "grad_norm": 1.5629680156707764, "learning_rate": 9.185680965419328e-07, "loss": 0.9569, "step": 5290 }, { "epoch": 0.12752583030273837, "grad_norm": 1.5142781734466553, "learning_rate": 9.184413345502485e-07, "loss": 0.9449, "step": 5295 }, { "epoch": 0.12764625129452567, "grad_norm": 1.5961127281188965, "learning_rate": 9.18314572558564e-07, "loss": 0.9837, "step": 5300 }, { "epoch": 0.12776667228631294, "grad_norm": 1.583109974861145, "learning_rate": 9.181878105668795e-07, "loss": 0.9623, "step": 5305 }, { "epoch": 0.12788709327810024, "grad_norm": 1.740462303161621, "learning_rate": 9.180610485751952e-07, "loss": 0.9572, "step": 5310 }, { "epoch": 0.12800751426988752, "grad_norm": 1.6290134191513062, "learning_rate": 9.179342865835108e-07, "loss": 0.9361, "step": 5315 }, { "epoch": 0.12812793526167482, "grad_norm": 1.3808825016021729, "learning_rate": 9.178075245918263e-07, "loss": 0.9961, "step": 5320 }, { "epoch": 0.1282483562534621, "grad_norm": 1.8814772367477417, "learning_rate": 9.176807626001419e-07, "loss": 0.9583, "step": 5325 }, { "epoch": 0.1283687772452494, "grad_norm": 1.5854055881500244, "learning_rate": 9.175540006084576e-07, "loss": 0.9677, "step": 5330 }, { "epoch": 0.12848919823703667, "grad_norm": 1.688438892364502, "learning_rate": 9.174272386167731e-07, "loss": 0.9858, "step": 5335 }, { "epoch": 0.12860961922882397, "grad_norm": 1.4895740747451782, "learning_rate": 9.173004766250887e-07, "loss": 0.9622, "step": 5340 }, { "epoch": 0.12873004022061127, "grad_norm": 1.4580119848251343, "learning_rate": 9.171737146334043e-07, "loss": 0.9619, "step": 5345 }, { "epoch": 0.12885046121239854, "grad_norm": 1.5655674934387207, "learning_rate": 9.170469526417198e-07, "loss": 0.9412, "step": 5350 }, { "epoch": 0.12897088220418584, "grad_norm": 1.5745370388031006, "learning_rate": 9.169201906500355e-07, "loss": 0.9373, "step": 5355 }, { "epoch": 0.12909130319597312, "grad_norm": 1.4132782220840454, "learning_rate": 9.167934286583511e-07, "loss": 0.9671, "step": 5360 }, { "epoch": 0.12921172418776042, "grad_norm": 1.5725817680358887, "learning_rate": 9.166666666666665e-07, "loss": 0.9964, "step": 5365 }, { "epoch": 0.1293321451795477, "grad_norm": 1.7641382217407227, "learning_rate": 9.165399046749822e-07, "loss": 0.9692, "step": 5370 }, { "epoch": 0.129452566171335, "grad_norm": 1.7528362274169922, "learning_rate": 9.164131426832978e-07, "loss": 0.938, "step": 5375 }, { "epoch": 0.12957298716312227, "grad_norm": 1.7025678157806396, "learning_rate": 9.162863806916134e-07, "loss": 0.9336, "step": 5380 }, { "epoch": 0.12969340815490957, "grad_norm": 1.630113959312439, "learning_rate": 9.16159618699929e-07, "loss": 1.0092, "step": 5385 }, { "epoch": 0.12981382914669684, "grad_norm": 1.58848237991333, "learning_rate": 9.160328567082445e-07, "loss": 0.9254, "step": 5390 }, { "epoch": 0.12993425013848414, "grad_norm": 1.4365260601043701, "learning_rate": 9.159060947165601e-07, "loss": 0.9769, "step": 5395 }, { "epoch": 0.13005467113027142, "grad_norm": 1.4236555099487305, "learning_rate": 9.157793327248757e-07, "loss": 0.9208, "step": 5400 }, { "epoch": 0.13017509212205872, "grad_norm": 1.6881691217422485, "learning_rate": 9.156525707331914e-07, "loss": 0.9772, "step": 5405 }, { "epoch": 0.13029551311384602, "grad_norm": 1.568904995918274, "learning_rate": 9.155258087415069e-07, "loss": 0.9121, "step": 5410 }, { "epoch": 0.1304159341056333, "grad_norm": 1.721828818321228, "learning_rate": 9.153990467498225e-07, "loss": 0.9717, "step": 5415 }, { "epoch": 0.1305363550974206, "grad_norm": 1.6432464122772217, "learning_rate": 9.152722847581381e-07, "loss": 0.9182, "step": 5420 }, { "epoch": 0.13065677608920787, "grad_norm": 1.5352879762649536, "learning_rate": 9.151455227664536e-07, "loss": 0.9659, "step": 5425 }, { "epoch": 0.13077719708099517, "grad_norm": 1.8074719905853271, "learning_rate": 9.150187607747693e-07, "loss": 0.9903, "step": 5430 }, { "epoch": 0.13089761807278244, "grad_norm": 1.5905964374542236, "learning_rate": 9.148919987830848e-07, "loss": 0.9566, "step": 5435 }, { "epoch": 0.13101803906456974, "grad_norm": 1.4441380500793457, "learning_rate": 9.147652367914004e-07, "loss": 0.9314, "step": 5440 }, { "epoch": 0.13113846005635701, "grad_norm": 1.55857253074646, "learning_rate": 9.14638474799716e-07, "loss": 0.9121, "step": 5445 }, { "epoch": 0.13125888104814432, "grad_norm": 1.6368721723556519, "learning_rate": 9.145117128080317e-07, "loss": 0.9487, "step": 5450 }, { "epoch": 0.1313793020399316, "grad_norm": 1.7417676448822021, "learning_rate": 9.143849508163472e-07, "loss": 0.9368, "step": 5455 }, { "epoch": 0.1314997230317189, "grad_norm": 1.3386549949645996, "learning_rate": 9.142581888246627e-07, "loss": 0.9817, "step": 5460 }, { "epoch": 0.1316201440235062, "grad_norm": 1.568506121635437, "learning_rate": 9.141314268329784e-07, "loss": 0.9796, "step": 5465 }, { "epoch": 0.13174056501529346, "grad_norm": 1.5980567932128906, "learning_rate": 9.140046648412939e-07, "loss": 0.9429, "step": 5470 }, { "epoch": 0.13186098600708077, "grad_norm": 1.4643741846084595, "learning_rate": 9.138779028496096e-07, "loss": 0.9005, "step": 5475 }, { "epoch": 0.13198140699886804, "grad_norm": 1.727617621421814, "learning_rate": 9.137511408579252e-07, "loss": 1.002, "step": 5480 }, { "epoch": 0.13210182799065534, "grad_norm": 1.4297009706497192, "learning_rate": 9.136243788662406e-07, "loss": 0.9769, "step": 5485 }, { "epoch": 0.13222224898244261, "grad_norm": 1.4668093919754028, "learning_rate": 9.134976168745563e-07, "loss": 1.007, "step": 5490 }, { "epoch": 0.13234266997422992, "grad_norm": 1.4713921546936035, "learning_rate": 9.133708548828719e-07, "loss": 0.923, "step": 5495 }, { "epoch": 0.1324630909660172, "grad_norm": 1.6667219400405884, "learning_rate": 9.132440928911875e-07, "loss": 0.9222, "step": 5500 }, { "epoch": 0.1325835119578045, "grad_norm": 1.664129376411438, "learning_rate": 9.13117330899503e-07, "loss": 0.9041, "step": 5505 }, { "epoch": 0.13270393294959176, "grad_norm": 1.6742534637451172, "learning_rate": 9.129905689078186e-07, "loss": 0.9022, "step": 5510 }, { "epoch": 0.13282435394137906, "grad_norm": 1.4654879570007324, "learning_rate": 9.128638069161342e-07, "loss": 0.95, "step": 5515 }, { "epoch": 0.13294477493316634, "grad_norm": 1.748233675956726, "learning_rate": 9.127370449244498e-07, "loss": 0.957, "step": 5520 }, { "epoch": 0.13306519592495364, "grad_norm": 1.5935348272323608, "learning_rate": 9.126102829327655e-07, "loss": 0.9468, "step": 5525 }, { "epoch": 0.13318561691674094, "grad_norm": 1.6781808137893677, "learning_rate": 9.124835209410809e-07, "loss": 0.9734, "step": 5530 }, { "epoch": 0.1333060379085282, "grad_norm": 1.6739495992660522, "learning_rate": 9.123567589493966e-07, "loss": 0.9759, "step": 5535 }, { "epoch": 0.13342645890031551, "grad_norm": 1.5237581729888916, "learning_rate": 9.122299969577122e-07, "loss": 0.9928, "step": 5540 }, { "epoch": 0.1335468798921028, "grad_norm": 1.8118046522140503, "learning_rate": 9.121032349660277e-07, "loss": 0.9387, "step": 5545 }, { "epoch": 0.1336673008838901, "grad_norm": 1.53839910030365, "learning_rate": 9.119764729743433e-07, "loss": 0.9461, "step": 5550 }, { "epoch": 0.13378772187567736, "grad_norm": 1.5928133726119995, "learning_rate": 9.118497109826589e-07, "loss": 0.9554, "step": 5555 }, { "epoch": 0.13390814286746466, "grad_norm": 1.980698585510254, "learning_rate": 9.117229489909745e-07, "loss": 0.9315, "step": 5560 }, { "epoch": 0.13402856385925194, "grad_norm": 1.5155107975006104, "learning_rate": 9.115961869992901e-07, "loss": 0.9696, "step": 5565 }, { "epoch": 0.13414898485103924, "grad_norm": 1.5934728384017944, "learning_rate": 9.114694250076057e-07, "loss": 0.9984, "step": 5570 }, { "epoch": 0.1342694058428265, "grad_norm": 1.5468522310256958, "learning_rate": 9.113426630159212e-07, "loss": 0.931, "step": 5575 }, { "epoch": 0.1343898268346138, "grad_norm": 1.7637958526611328, "learning_rate": 9.112159010242368e-07, "loss": 0.9437, "step": 5580 }, { "epoch": 0.13451024782640109, "grad_norm": 1.5114047527313232, "learning_rate": 9.110891390325525e-07, "loss": 0.9994, "step": 5585 }, { "epoch": 0.1346306688181884, "grad_norm": 1.611918330192566, "learning_rate": 9.10962377040868e-07, "loss": 0.9207, "step": 5590 }, { "epoch": 0.1347510898099757, "grad_norm": 1.6683274507522583, "learning_rate": 9.108356150491837e-07, "loss": 0.9548, "step": 5595 }, { "epoch": 0.13487151080176296, "grad_norm": 1.4041019678115845, "learning_rate": 9.107088530574992e-07, "loss": 0.9524, "step": 5600 }, { "epoch": 0.13499193179355026, "grad_norm": 1.4725638628005981, "learning_rate": 9.105820910658147e-07, "loss": 0.9636, "step": 5605 }, { "epoch": 0.13511235278533754, "grad_norm": 1.5536869764328003, "learning_rate": 9.104553290741304e-07, "loss": 0.9784, "step": 5610 }, { "epoch": 0.13523277377712484, "grad_norm": 1.4738366603851318, "learning_rate": 9.10328567082446e-07, "loss": 0.9641, "step": 5615 }, { "epoch": 0.1353531947689121, "grad_norm": 1.5258055925369263, "learning_rate": 9.102018050907615e-07, "loss": 0.9735, "step": 5620 }, { "epoch": 0.1354736157606994, "grad_norm": 1.5292253494262695, "learning_rate": 9.100750430990771e-07, "loss": 0.9347, "step": 5625 }, { "epoch": 0.13559403675248669, "grad_norm": 1.5491033792495728, "learning_rate": 9.099482811073927e-07, "loss": 1.0053, "step": 5630 }, { "epoch": 0.135714457744274, "grad_norm": 1.5524201393127441, "learning_rate": 9.098215191157083e-07, "loss": 0.9651, "step": 5635 }, { "epoch": 0.13583487873606126, "grad_norm": 1.6256749629974365, "learning_rate": 9.096947571240239e-07, "loss": 0.9522, "step": 5640 }, { "epoch": 0.13595529972784856, "grad_norm": 1.6116081476211548, "learning_rate": 9.095679951323395e-07, "loss": 0.9779, "step": 5645 }, { "epoch": 0.13607572071963583, "grad_norm": 1.7412595748901367, "learning_rate": 9.09441233140655e-07, "loss": 0.9511, "step": 5650 }, { "epoch": 0.13619614171142314, "grad_norm": 1.6832035779953003, "learning_rate": 9.093144711489706e-07, "loss": 0.9324, "step": 5655 }, { "epoch": 0.13631656270321044, "grad_norm": 1.6584666967391968, "learning_rate": 9.091877091572863e-07, "loss": 0.9868, "step": 5660 }, { "epoch": 0.1364369836949977, "grad_norm": 1.8463393449783325, "learning_rate": 9.090609471656017e-07, "loss": 0.989, "step": 5665 }, { "epoch": 0.136557404686785, "grad_norm": 1.6989035606384277, "learning_rate": 9.089341851739174e-07, "loss": 0.9642, "step": 5670 }, { "epoch": 0.13667782567857228, "grad_norm": 1.5193052291870117, "learning_rate": 9.08807423182233e-07, "loss": 0.9344, "step": 5675 }, { "epoch": 0.13679824667035959, "grad_norm": 1.4853129386901855, "learning_rate": 9.086806611905486e-07, "loss": 0.9027, "step": 5680 }, { "epoch": 0.13691866766214686, "grad_norm": 1.9148509502410889, "learning_rate": 9.085538991988642e-07, "loss": 0.9396, "step": 5685 }, { "epoch": 0.13703908865393416, "grad_norm": 1.6306005716323853, "learning_rate": 9.084271372071797e-07, "loss": 0.9338, "step": 5690 }, { "epoch": 0.13715950964572143, "grad_norm": 1.5270458459854126, "learning_rate": 9.083003752154953e-07, "loss": 0.8957, "step": 5695 }, { "epoch": 0.13727993063750873, "grad_norm": 1.4417778253555298, "learning_rate": 9.081736132238109e-07, "loss": 0.9709, "step": 5700 }, { "epoch": 0.137400351629296, "grad_norm": 1.6104899644851685, "learning_rate": 9.080468512321266e-07, "loss": 0.9205, "step": 5705 }, { "epoch": 0.1375207726210833, "grad_norm": 1.6175036430358887, "learning_rate": 9.079200892404421e-07, "loss": 0.9469, "step": 5710 }, { "epoch": 0.13764119361287058, "grad_norm": 1.5673390626907349, "learning_rate": 9.077933272487576e-07, "loss": 0.9618, "step": 5715 }, { "epoch": 0.13776161460465788, "grad_norm": 1.7446295022964478, "learning_rate": 9.076665652570733e-07, "loss": 0.925, "step": 5720 }, { "epoch": 0.13788203559644518, "grad_norm": 1.4070531129837036, "learning_rate": 9.075398032653888e-07, "loss": 0.9859, "step": 5725 }, { "epoch": 0.13800245658823246, "grad_norm": 1.5243618488311768, "learning_rate": 9.074130412737045e-07, "loss": 0.9412, "step": 5730 }, { "epoch": 0.13812287758001976, "grad_norm": 1.5630604028701782, "learning_rate": 9.0728627928202e-07, "loss": 0.9375, "step": 5735 }, { "epoch": 0.13824329857180703, "grad_norm": 1.7350032329559326, "learning_rate": 9.071595172903356e-07, "loss": 0.9522, "step": 5740 }, { "epoch": 0.13836371956359433, "grad_norm": 1.8089555501937866, "learning_rate": 9.070327552986512e-07, "loss": 0.9314, "step": 5745 }, { "epoch": 0.1384841405553816, "grad_norm": 1.511834740638733, "learning_rate": 9.069059933069668e-07, "loss": 1.0104, "step": 5750 }, { "epoch": 0.1386045615471689, "grad_norm": 1.5103285312652588, "learning_rate": 9.067792313152824e-07, "loss": 0.9809, "step": 5755 }, { "epoch": 0.13872498253895618, "grad_norm": 1.5690333843231201, "learning_rate": 9.066524693235979e-07, "loss": 0.996, "step": 5760 }, { "epoch": 0.13884540353074348, "grad_norm": 1.577641248703003, "learning_rate": 9.065257073319136e-07, "loss": 0.9487, "step": 5765 }, { "epoch": 0.13896582452253076, "grad_norm": 1.4639778137207031, "learning_rate": 9.063989453402291e-07, "loss": 0.971, "step": 5770 }, { "epoch": 0.13908624551431806, "grad_norm": 1.6108697652816772, "learning_rate": 9.062721833485447e-07, "loss": 0.9708, "step": 5775 }, { "epoch": 0.13920666650610533, "grad_norm": 1.603767991065979, "learning_rate": 9.061454213568604e-07, "loss": 0.8838, "step": 5780 }, { "epoch": 0.13932708749789263, "grad_norm": 1.5478912591934204, "learning_rate": 9.060186593651758e-07, "loss": 0.9728, "step": 5785 }, { "epoch": 0.13944750848967993, "grad_norm": 1.4290289878845215, "learning_rate": 9.058918973734915e-07, "loss": 0.863, "step": 5790 }, { "epoch": 0.1395679294814672, "grad_norm": 1.6638075113296509, "learning_rate": 9.057651353818071e-07, "loss": 0.9495, "step": 5795 }, { "epoch": 0.1396883504732545, "grad_norm": 1.6155606508255005, "learning_rate": 9.056383733901227e-07, "loss": 0.9387, "step": 5800 }, { "epoch": 0.13980877146504178, "grad_norm": 1.5489352941513062, "learning_rate": 9.055116113984382e-07, "loss": 0.9324, "step": 5805 }, { "epoch": 0.13992919245682908, "grad_norm": 1.4846972227096558, "learning_rate": 9.053848494067538e-07, "loss": 0.9736, "step": 5810 }, { "epoch": 0.14004961344861636, "grad_norm": 1.772792935371399, "learning_rate": 9.052580874150694e-07, "loss": 0.9563, "step": 5815 }, { "epoch": 0.14017003444040366, "grad_norm": 1.4382880926132202, "learning_rate": 9.05131325423385e-07, "loss": 0.9554, "step": 5820 }, { "epoch": 0.14029045543219093, "grad_norm": 1.7088966369628906, "learning_rate": 9.050045634317007e-07, "loss": 1.0169, "step": 5825 }, { "epoch": 0.14041087642397823, "grad_norm": 1.3709522485733032, "learning_rate": 9.048778014400161e-07, "loss": 0.9589, "step": 5830 }, { "epoch": 0.1405312974157655, "grad_norm": 1.8668538331985474, "learning_rate": 9.047510394483317e-07, "loss": 0.9706, "step": 5835 }, { "epoch": 0.1406517184075528, "grad_norm": 1.7845852375030518, "learning_rate": 9.046242774566474e-07, "loss": 0.9407, "step": 5840 }, { "epoch": 0.14077213939934008, "grad_norm": 1.5803017616271973, "learning_rate": 9.044975154649629e-07, "loss": 0.9663, "step": 5845 }, { "epoch": 0.14089256039112738, "grad_norm": 1.6961511373519897, "learning_rate": 9.043707534732785e-07, "loss": 0.9783, "step": 5850 }, { "epoch": 0.14101298138291468, "grad_norm": 1.4206011295318604, "learning_rate": 9.042439914815941e-07, "loss": 0.9357, "step": 5855 }, { "epoch": 0.14113340237470196, "grad_norm": 1.9127111434936523, "learning_rate": 9.041172294899096e-07, "loss": 0.9295, "step": 5860 }, { "epoch": 0.14125382336648926, "grad_norm": 1.4716366529464722, "learning_rate": 9.039904674982253e-07, "loss": 0.9509, "step": 5865 }, { "epoch": 0.14137424435827653, "grad_norm": 1.47152841091156, "learning_rate": 9.038637055065409e-07, "loss": 0.9803, "step": 5870 }, { "epoch": 0.14149466535006383, "grad_norm": 1.5282686948776245, "learning_rate": 9.037369435148564e-07, "loss": 0.9496, "step": 5875 }, { "epoch": 0.1416150863418511, "grad_norm": 1.6607931852340698, "learning_rate": 9.03610181523172e-07, "loss": 0.9748, "step": 5880 }, { "epoch": 0.1417355073336384, "grad_norm": 1.5049351453781128, "learning_rate": 9.034834195314877e-07, "loss": 0.9323, "step": 5885 }, { "epoch": 0.14185592832542568, "grad_norm": 1.4687178134918213, "learning_rate": 9.033566575398033e-07, "loss": 0.953, "step": 5890 }, { "epoch": 0.14197634931721298, "grad_norm": 1.6209310293197632, "learning_rate": 9.032298955481188e-07, "loss": 0.927, "step": 5895 }, { "epoch": 0.14209677030900025, "grad_norm": 1.742845892906189, "learning_rate": 9.031031335564344e-07, "loss": 0.9633, "step": 5900 }, { "epoch": 0.14221719130078755, "grad_norm": 1.5639408826828003, "learning_rate": 9.0297637156475e-07, "loss": 0.9433, "step": 5905 }, { "epoch": 0.14233761229257486, "grad_norm": 1.7477506399154663, "learning_rate": 9.028496095730656e-07, "loss": 0.9144, "step": 5910 }, { "epoch": 0.14245803328436213, "grad_norm": 1.8129380941390991, "learning_rate": 9.027228475813812e-07, "loss": 0.9531, "step": 5915 }, { "epoch": 0.14257845427614943, "grad_norm": 1.5080280303955078, "learning_rate": 9.025960855896967e-07, "loss": 0.987, "step": 5920 }, { "epoch": 0.1426988752679367, "grad_norm": 1.4849168062210083, "learning_rate": 9.024693235980123e-07, "loss": 0.9883, "step": 5925 }, { "epoch": 0.142819296259724, "grad_norm": 1.9817742109298706, "learning_rate": 9.023425616063279e-07, "loss": 0.9211, "step": 5930 }, { "epoch": 0.14293971725151128, "grad_norm": 1.593101143836975, "learning_rate": 9.022157996146436e-07, "loss": 0.9607, "step": 5935 }, { "epoch": 0.14306013824329858, "grad_norm": 2.151980400085449, "learning_rate": 9.020890376229591e-07, "loss": 0.9711, "step": 5940 }, { "epoch": 0.14318055923508585, "grad_norm": 1.5875486135482788, "learning_rate": 9.019622756312747e-07, "loss": 0.9618, "step": 5945 }, { "epoch": 0.14330098022687315, "grad_norm": 1.7066106796264648, "learning_rate": 9.018355136395903e-07, "loss": 0.9255, "step": 5950 }, { "epoch": 0.14342140121866043, "grad_norm": 1.424856424331665, "learning_rate": 9.017087516479058e-07, "loss": 0.9177, "step": 5955 }, { "epoch": 0.14354182221044773, "grad_norm": 1.345221996307373, "learning_rate": 9.015819896562215e-07, "loss": 0.9523, "step": 5960 }, { "epoch": 0.143662243202235, "grad_norm": 1.4497827291488647, "learning_rate": 9.014552276645371e-07, "loss": 0.9444, "step": 5965 }, { "epoch": 0.1437826641940223, "grad_norm": 1.830645203590393, "learning_rate": 9.013284656728526e-07, "loss": 0.9451, "step": 5970 }, { "epoch": 0.1439030851858096, "grad_norm": 1.7493947744369507, "learning_rate": 9.012017036811682e-07, "loss": 0.9562, "step": 5975 }, { "epoch": 0.14402350617759688, "grad_norm": 1.5937539339065552, "learning_rate": 9.010749416894839e-07, "loss": 1.0019, "step": 5980 }, { "epoch": 0.14414392716938418, "grad_norm": 1.4270418882369995, "learning_rate": 9.009481796977994e-07, "loss": 0.9903, "step": 5985 }, { "epoch": 0.14426434816117145, "grad_norm": 1.5541683435440063, "learning_rate": 9.008214177061149e-07, "loss": 0.9484, "step": 5990 }, { "epoch": 0.14438476915295875, "grad_norm": 1.7125719785690308, "learning_rate": 9.006946557144306e-07, "loss": 0.9652, "step": 5995 }, { "epoch": 0.14450519014474603, "grad_norm": 1.6315988302230835, "learning_rate": 9.005678937227461e-07, "loss": 0.9524, "step": 6000 }, { "epoch": 0.14462561113653333, "grad_norm": 1.493573546409607, "learning_rate": 9.004411317310618e-07, "loss": 0.8963, "step": 6005 }, { "epoch": 0.1447460321283206, "grad_norm": 1.3541276454925537, "learning_rate": 9.003143697393774e-07, "loss": 1.0076, "step": 6010 }, { "epoch": 0.1448664531201079, "grad_norm": 1.7591745853424072, "learning_rate": 9.001876077476928e-07, "loss": 0.9745, "step": 6015 }, { "epoch": 0.14498687411189518, "grad_norm": 1.5763760805130005, "learning_rate": 9.000608457560085e-07, "loss": 0.972, "step": 6020 }, { "epoch": 0.14510729510368248, "grad_norm": 1.6166871786117554, "learning_rate": 8.999340837643241e-07, "loss": 0.9752, "step": 6025 }, { "epoch": 0.14522771609546975, "grad_norm": 1.7187212705612183, "learning_rate": 8.998073217726397e-07, "loss": 0.9813, "step": 6030 }, { "epoch": 0.14534813708725705, "grad_norm": 1.5862038135528564, "learning_rate": 8.996805597809552e-07, "loss": 0.9084, "step": 6035 }, { "epoch": 0.14546855807904435, "grad_norm": 1.7693798542022705, "learning_rate": 8.995537977892708e-07, "loss": 0.8757, "step": 6040 }, { "epoch": 0.14558897907083163, "grad_norm": 1.809888482093811, "learning_rate": 8.994270357975864e-07, "loss": 0.937, "step": 6045 }, { "epoch": 0.14570940006261893, "grad_norm": 1.5466762781143188, "learning_rate": 8.99300273805902e-07, "loss": 0.9364, "step": 6050 }, { "epoch": 0.1458298210544062, "grad_norm": 1.6745027303695679, "learning_rate": 8.991735118142177e-07, "loss": 0.9831, "step": 6055 }, { "epoch": 0.1459502420461935, "grad_norm": 1.58736252784729, "learning_rate": 8.990467498225331e-07, "loss": 0.971, "step": 6060 }, { "epoch": 0.14607066303798077, "grad_norm": 1.7206737995147705, "learning_rate": 8.989199878308488e-07, "loss": 0.9727, "step": 6065 }, { "epoch": 0.14619108402976808, "grad_norm": 1.7877613306045532, "learning_rate": 8.987932258391644e-07, "loss": 0.9706, "step": 6070 }, { "epoch": 0.14631150502155535, "grad_norm": 1.5204682350158691, "learning_rate": 8.986664638474799e-07, "loss": 0.8874, "step": 6075 }, { "epoch": 0.14643192601334265, "grad_norm": 1.5084939002990723, "learning_rate": 8.985397018557956e-07, "loss": 0.9579, "step": 6080 }, { "epoch": 0.14655234700512992, "grad_norm": 1.596987247467041, "learning_rate": 8.984129398641111e-07, "loss": 0.9857, "step": 6085 }, { "epoch": 0.14667276799691722, "grad_norm": 1.6701748371124268, "learning_rate": 8.982861778724267e-07, "loss": 0.9154, "step": 6090 }, { "epoch": 0.1467931889887045, "grad_norm": 1.676477074623108, "learning_rate": 8.981594158807423e-07, "loss": 0.8887, "step": 6095 }, { "epoch": 0.1469136099804918, "grad_norm": 1.4097827672958374, "learning_rate": 8.98032653889058e-07, "loss": 0.9449, "step": 6100 }, { "epoch": 0.1470340309722791, "grad_norm": 1.5947428941726685, "learning_rate": 8.979058918973734e-07, "loss": 0.9222, "step": 6105 }, { "epoch": 0.14715445196406637, "grad_norm": 1.4588384628295898, "learning_rate": 8.97779129905689e-07, "loss": 0.9933, "step": 6110 }, { "epoch": 0.14727487295585368, "grad_norm": 1.5385102033615112, "learning_rate": 8.976523679140047e-07, "loss": 0.9369, "step": 6115 }, { "epoch": 0.14739529394764095, "grad_norm": 1.8313229084014893, "learning_rate": 8.975256059223202e-07, "loss": 0.9278, "step": 6120 }, { "epoch": 0.14751571493942825, "grad_norm": 1.6547642946243286, "learning_rate": 8.973988439306359e-07, "loss": 0.9718, "step": 6125 }, { "epoch": 0.14763613593121552, "grad_norm": 1.3734008073806763, "learning_rate": 8.972720819389514e-07, "loss": 0.8818, "step": 6130 }, { "epoch": 0.14775655692300282, "grad_norm": 1.4598811864852905, "learning_rate": 8.971453199472669e-07, "loss": 0.9302, "step": 6135 }, { "epoch": 0.1478769779147901, "grad_norm": 1.5441845655441284, "learning_rate": 8.970185579555826e-07, "loss": 0.8982, "step": 6140 }, { "epoch": 0.1479973989065774, "grad_norm": 1.6037344932556152, "learning_rate": 8.968917959638982e-07, "loss": 0.9638, "step": 6145 }, { "epoch": 0.14811781989836467, "grad_norm": 1.66959810256958, "learning_rate": 8.967650339722137e-07, "loss": 0.9568, "step": 6150 }, { "epoch": 0.14823824089015197, "grad_norm": 1.5276808738708496, "learning_rate": 8.966382719805293e-07, "loss": 0.9364, "step": 6155 }, { "epoch": 0.14835866188193925, "grad_norm": 1.5752981901168823, "learning_rate": 8.965115099888449e-07, "loss": 0.9797, "step": 6160 }, { "epoch": 0.14847908287372655, "grad_norm": 1.6811892986297607, "learning_rate": 8.963847479971605e-07, "loss": 0.9202, "step": 6165 }, { "epoch": 0.14859950386551385, "grad_norm": 1.6610900163650513, "learning_rate": 8.962579860054761e-07, "loss": 0.9118, "step": 6170 }, { "epoch": 0.14871992485730112, "grad_norm": 1.4799003601074219, "learning_rate": 8.961312240137917e-07, "loss": 0.9738, "step": 6175 }, { "epoch": 0.14884034584908842, "grad_norm": 1.5117777585983276, "learning_rate": 8.960044620221072e-07, "loss": 0.9589, "step": 6180 }, { "epoch": 0.1489607668408757, "grad_norm": 1.5448501110076904, "learning_rate": 8.958777000304228e-07, "loss": 0.9391, "step": 6185 }, { "epoch": 0.149081187832663, "grad_norm": 1.6339643001556396, "learning_rate": 8.957509380387385e-07, "loss": 0.947, "step": 6190 }, { "epoch": 0.14920160882445027, "grad_norm": 1.4847960472106934, "learning_rate": 8.95624176047054e-07, "loss": 1.0043, "step": 6195 }, { "epoch": 0.14932202981623757, "grad_norm": 1.7087690830230713, "learning_rate": 8.954974140553696e-07, "loss": 1.0199, "step": 6200 }, { "epoch": 0.14944245080802485, "grad_norm": 1.7362369298934937, "learning_rate": 8.953706520636852e-07, "loss": 0.906, "step": 6205 }, { "epoch": 0.14956287179981215, "grad_norm": 1.414970874786377, "learning_rate": 8.952438900720008e-07, "loss": 0.9886, "step": 6210 }, { "epoch": 0.14968329279159942, "grad_norm": 1.4704195261001587, "learning_rate": 8.951171280803164e-07, "loss": 0.9758, "step": 6215 }, { "epoch": 0.14980371378338672, "grad_norm": 1.5782198905944824, "learning_rate": 8.949903660886319e-07, "loss": 1.0132, "step": 6220 }, { "epoch": 0.149924134775174, "grad_norm": 1.6264985799789429, "learning_rate": 8.948636040969475e-07, "loss": 0.9184, "step": 6225 }, { "epoch": 0.1500445557669613, "grad_norm": 1.6577425003051758, "learning_rate": 8.947368421052631e-07, "loss": 0.9571, "step": 6230 }, { "epoch": 0.1501649767587486, "grad_norm": 1.3714252710342407, "learning_rate": 8.946100801135788e-07, "loss": 0.9219, "step": 6235 }, { "epoch": 0.15028539775053587, "grad_norm": 1.5763870477676392, "learning_rate": 8.944833181218943e-07, "loss": 0.9337, "step": 6240 }, { "epoch": 0.15040581874232317, "grad_norm": 1.5407581329345703, "learning_rate": 8.943565561302098e-07, "loss": 0.9042, "step": 6245 }, { "epoch": 0.15052623973411045, "grad_norm": 1.387267827987671, "learning_rate": 8.942297941385255e-07, "loss": 0.9513, "step": 6250 }, { "epoch": 0.15064666072589775, "grad_norm": 1.731074571609497, "learning_rate": 8.94103032146841e-07, "loss": 0.9562, "step": 6255 }, { "epoch": 0.15076708171768502, "grad_norm": 1.5412241220474243, "learning_rate": 8.939762701551567e-07, "loss": 0.9866, "step": 6260 }, { "epoch": 0.15088750270947232, "grad_norm": 1.3752002716064453, "learning_rate": 8.938495081634723e-07, "loss": 0.9786, "step": 6265 }, { "epoch": 0.1510079237012596, "grad_norm": 1.5093090534210205, "learning_rate": 8.937227461717878e-07, "loss": 0.8847, "step": 6270 }, { "epoch": 0.1511283446930469, "grad_norm": 1.5734307765960693, "learning_rate": 8.935959841801034e-07, "loss": 0.9767, "step": 6275 }, { "epoch": 0.15124876568483417, "grad_norm": 1.5351828336715698, "learning_rate": 8.93469222188419e-07, "loss": 0.9378, "step": 6280 }, { "epoch": 0.15136918667662147, "grad_norm": 1.6842786073684692, "learning_rate": 8.933424601967346e-07, "loss": 0.9786, "step": 6285 }, { "epoch": 0.15148960766840874, "grad_norm": 1.781619906425476, "learning_rate": 8.932156982050501e-07, "loss": 0.9514, "step": 6290 }, { "epoch": 0.15161002866019604, "grad_norm": 1.6095762252807617, "learning_rate": 8.930889362133658e-07, "loss": 0.9219, "step": 6295 }, { "epoch": 0.15173044965198335, "grad_norm": 1.392642855644226, "learning_rate": 8.929621742216813e-07, "loss": 0.9286, "step": 6300 }, { "epoch": 0.15185087064377062, "grad_norm": 1.4566751718521118, "learning_rate": 8.928354122299969e-07, "loss": 0.9776, "step": 6305 }, { "epoch": 0.15197129163555792, "grad_norm": 1.5674692392349243, "learning_rate": 8.927086502383126e-07, "loss": 0.9211, "step": 6310 }, { "epoch": 0.1520917126273452, "grad_norm": 1.6276681423187256, "learning_rate": 8.92581888246628e-07, "loss": 0.961, "step": 6315 }, { "epoch": 0.1522121336191325, "grad_norm": 1.4062598943710327, "learning_rate": 8.924551262549437e-07, "loss": 0.9144, "step": 6320 }, { "epoch": 0.15233255461091977, "grad_norm": 1.5248562097549438, "learning_rate": 8.923283642632593e-07, "loss": 0.986, "step": 6325 }, { "epoch": 0.15245297560270707, "grad_norm": 1.496514916419983, "learning_rate": 8.922016022715749e-07, "loss": 0.9464, "step": 6330 }, { "epoch": 0.15257339659449434, "grad_norm": 1.688599944114685, "learning_rate": 8.920748402798904e-07, "loss": 0.9531, "step": 6335 }, { "epoch": 0.15269381758628164, "grad_norm": 1.6141420602798462, "learning_rate": 8.91948078288206e-07, "loss": 0.9509, "step": 6340 }, { "epoch": 0.15281423857806892, "grad_norm": 1.5606366395950317, "learning_rate": 8.918213162965216e-07, "loss": 1.0047, "step": 6345 }, { "epoch": 0.15293465956985622, "grad_norm": 1.5859097242355347, "learning_rate": 8.916945543048372e-07, "loss": 0.9504, "step": 6350 }, { "epoch": 0.15305508056164352, "grad_norm": 1.4285775423049927, "learning_rate": 8.915677923131529e-07, "loss": 0.9887, "step": 6355 }, { "epoch": 0.1531755015534308, "grad_norm": 1.529550313949585, "learning_rate": 8.914410303214683e-07, "loss": 0.9791, "step": 6360 }, { "epoch": 0.1532959225452181, "grad_norm": 1.4174224138259888, "learning_rate": 8.913142683297839e-07, "loss": 0.9772, "step": 6365 }, { "epoch": 0.15341634353700537, "grad_norm": 1.4871257543563843, "learning_rate": 8.911875063380996e-07, "loss": 0.8747, "step": 6370 }, { "epoch": 0.15353676452879267, "grad_norm": 1.4553872346878052, "learning_rate": 8.910607443464151e-07, "loss": 0.9605, "step": 6375 }, { "epoch": 0.15365718552057994, "grad_norm": 1.5385500192642212, "learning_rate": 8.909339823547307e-07, "loss": 0.9069, "step": 6380 }, { "epoch": 0.15377760651236724, "grad_norm": 1.5096920728683472, "learning_rate": 8.908072203630463e-07, "loss": 0.9155, "step": 6385 }, { "epoch": 0.15389802750415452, "grad_norm": 1.5188795328140259, "learning_rate": 8.906804583713618e-07, "loss": 0.9971, "step": 6390 }, { "epoch": 0.15401844849594182, "grad_norm": 1.3419337272644043, "learning_rate": 8.905536963796775e-07, "loss": 0.9307, "step": 6395 }, { "epoch": 0.1541388694877291, "grad_norm": 1.4751125574111938, "learning_rate": 8.904269343879931e-07, "loss": 0.8893, "step": 6400 }, { "epoch": 0.1542592904795164, "grad_norm": 1.5324145555496216, "learning_rate": 8.903001723963086e-07, "loss": 0.966, "step": 6405 }, { "epoch": 0.15437971147130367, "grad_norm": 1.554923176765442, "learning_rate": 8.901734104046242e-07, "loss": 0.9722, "step": 6410 }, { "epoch": 0.15450013246309097, "grad_norm": 1.5499253273010254, "learning_rate": 8.900466484129399e-07, "loss": 0.9327, "step": 6415 }, { "epoch": 0.15462055345487827, "grad_norm": 1.539252519607544, "learning_rate": 8.899198864212554e-07, "loss": 0.9854, "step": 6420 }, { "epoch": 0.15474097444666554, "grad_norm": 1.5881555080413818, "learning_rate": 8.89793124429571e-07, "loss": 1.0085, "step": 6425 }, { "epoch": 0.15486139543845284, "grad_norm": 1.6186283826828003, "learning_rate": 8.896663624378866e-07, "loss": 0.9865, "step": 6430 }, { "epoch": 0.15498181643024012, "grad_norm": 1.7926065921783447, "learning_rate": 8.895396004462021e-07, "loss": 1.0098, "step": 6435 }, { "epoch": 0.15510223742202742, "grad_norm": 1.7826013565063477, "learning_rate": 8.894128384545178e-07, "loss": 0.9576, "step": 6440 }, { "epoch": 0.1552226584138147, "grad_norm": 1.736943006515503, "learning_rate": 8.892860764628334e-07, "loss": 0.9785, "step": 6445 }, { "epoch": 0.155343079405602, "grad_norm": 1.822184681892395, "learning_rate": 8.891593144711488e-07, "loss": 0.9646, "step": 6450 }, { "epoch": 0.15546350039738926, "grad_norm": 1.5065011978149414, "learning_rate": 8.890325524794645e-07, "loss": 0.9547, "step": 6455 }, { "epoch": 0.15558392138917657, "grad_norm": 1.5549193620681763, "learning_rate": 8.889057904877801e-07, "loss": 0.9702, "step": 6460 }, { "epoch": 0.15570434238096384, "grad_norm": 1.4823460578918457, "learning_rate": 8.887790284960957e-07, "loss": 0.9393, "step": 6465 }, { "epoch": 0.15582476337275114, "grad_norm": 1.5357725620269775, "learning_rate": 8.886522665044113e-07, "loss": 1.0052, "step": 6470 }, { "epoch": 0.15594518436453841, "grad_norm": 1.395545244216919, "learning_rate": 8.885255045127269e-07, "loss": 0.927, "step": 6475 }, { "epoch": 0.15606560535632572, "grad_norm": 1.5250643491744995, "learning_rate": 8.883987425210424e-07, "loss": 0.984, "step": 6480 }, { "epoch": 0.15618602634811302, "grad_norm": 1.5090745687484741, "learning_rate": 8.88271980529358e-07, "loss": 0.9602, "step": 6485 }, { "epoch": 0.1563064473399003, "grad_norm": 1.5585459470748901, "learning_rate": 8.881452185376737e-07, "loss": 0.9441, "step": 6490 }, { "epoch": 0.1564268683316876, "grad_norm": 1.7480950355529785, "learning_rate": 8.880184565459891e-07, "loss": 0.9616, "step": 6495 }, { "epoch": 0.15654728932347486, "grad_norm": 1.503509759902954, "learning_rate": 8.878916945543048e-07, "loss": 0.9515, "step": 6500 }, { "epoch": 0.15666771031526217, "grad_norm": 1.5634033679962158, "learning_rate": 8.877649325626204e-07, "loss": 1.0264, "step": 6505 }, { "epoch": 0.15678813130704944, "grad_norm": 1.530869960784912, "learning_rate": 8.876381705709359e-07, "loss": 0.984, "step": 6510 }, { "epoch": 0.15690855229883674, "grad_norm": 1.5853240489959717, "learning_rate": 8.875114085792516e-07, "loss": 1.0086, "step": 6515 }, { "epoch": 0.157028973290624, "grad_norm": 1.508705496788025, "learning_rate": 8.873846465875671e-07, "loss": 0.9516, "step": 6520 }, { "epoch": 0.15714939428241131, "grad_norm": 1.5178998708724976, "learning_rate": 8.872578845958827e-07, "loss": 0.9062, "step": 6525 }, { "epoch": 0.1572698152741986, "grad_norm": 1.529740810394287, "learning_rate": 8.871311226041983e-07, "loss": 0.9721, "step": 6530 }, { "epoch": 0.1573902362659859, "grad_norm": 1.5887424945831299, "learning_rate": 8.87004360612514e-07, "loss": 0.9015, "step": 6535 }, { "epoch": 0.15751065725777316, "grad_norm": 2.0315704345703125, "learning_rate": 8.868775986208295e-07, "loss": 0.9583, "step": 6540 }, { "epoch": 0.15763107824956046, "grad_norm": 1.5251991748809814, "learning_rate": 8.86750836629145e-07, "loss": 0.9607, "step": 6545 }, { "epoch": 0.15775149924134776, "grad_norm": 1.5378426313400269, "learning_rate": 8.866240746374607e-07, "loss": 1.0108, "step": 6550 }, { "epoch": 0.15787192023313504, "grad_norm": 1.4061293601989746, "learning_rate": 8.864973126457762e-07, "loss": 0.8955, "step": 6555 }, { "epoch": 0.15799234122492234, "grad_norm": 1.381935477256775, "learning_rate": 8.863705506540919e-07, "loss": 0.9266, "step": 6560 }, { "epoch": 0.1581127622167096, "grad_norm": 1.6147645711898804, "learning_rate": 8.862437886624074e-07, "loss": 0.9526, "step": 6565 }, { "epoch": 0.1582331832084969, "grad_norm": 1.4142693281173706, "learning_rate": 8.861170266707229e-07, "loss": 0.9508, "step": 6570 }, { "epoch": 0.1583536042002842, "grad_norm": 1.6666898727416992, "learning_rate": 8.859902646790386e-07, "loss": 0.9025, "step": 6575 }, { "epoch": 0.1584740251920715, "grad_norm": 1.609081745147705, "learning_rate": 8.858635026873542e-07, "loss": 0.9337, "step": 6580 }, { "epoch": 0.15859444618385876, "grad_norm": 1.6501929759979248, "learning_rate": 8.857367406956698e-07, "loss": 0.8925, "step": 6585 }, { "epoch": 0.15871486717564606, "grad_norm": 1.5381691455841064, "learning_rate": 8.856099787039853e-07, "loss": 0.9307, "step": 6590 }, { "epoch": 0.15883528816743334, "grad_norm": 1.5835388898849487, "learning_rate": 8.85483216712301e-07, "loss": 0.9817, "step": 6595 }, { "epoch": 0.15895570915922064, "grad_norm": 3.076845407485962, "learning_rate": 8.853564547206165e-07, "loss": 0.9501, "step": 6600 }, { "epoch": 0.1590761301510079, "grad_norm": 1.6130303144454956, "learning_rate": 8.852296927289321e-07, "loss": 0.9755, "step": 6605 }, { "epoch": 0.1591965511427952, "grad_norm": 1.55154287815094, "learning_rate": 8.851029307372478e-07, "loss": 0.9794, "step": 6610 }, { "epoch": 0.1593169721345825, "grad_norm": 1.5113273859024048, "learning_rate": 8.849761687455632e-07, "loss": 0.9276, "step": 6615 }, { "epoch": 0.1594373931263698, "grad_norm": 1.5491209030151367, "learning_rate": 8.848494067538789e-07, "loss": 1.0023, "step": 6620 }, { "epoch": 0.1595578141181571, "grad_norm": 1.6405673027038574, "learning_rate": 8.847226447621945e-07, "loss": 0.9533, "step": 6625 }, { "epoch": 0.15967823510994436, "grad_norm": 1.6780866384506226, "learning_rate": 8.8459588277051e-07, "loss": 0.959, "step": 6630 }, { "epoch": 0.15979865610173166, "grad_norm": 1.3100441694259644, "learning_rate": 8.844691207788256e-07, "loss": 1.0109, "step": 6635 }, { "epoch": 0.15991907709351894, "grad_norm": 1.7152460813522339, "learning_rate": 8.843423587871412e-07, "loss": 0.9075, "step": 6640 }, { "epoch": 0.16003949808530624, "grad_norm": 1.6447054147720337, "learning_rate": 8.842155967954568e-07, "loss": 0.937, "step": 6645 }, { "epoch": 0.1601599190770935, "grad_norm": 1.7059388160705566, "learning_rate": 8.840888348037724e-07, "loss": 0.9517, "step": 6650 }, { "epoch": 0.1602803400688808, "grad_norm": 1.547312617301941, "learning_rate": 8.839620728120881e-07, "loss": 0.8843, "step": 6655 }, { "epoch": 0.16040076106066808, "grad_norm": 1.666043758392334, "learning_rate": 8.838353108204035e-07, "loss": 0.9763, "step": 6660 }, { "epoch": 0.16052118205245539, "grad_norm": 1.5424047708511353, "learning_rate": 8.837085488287191e-07, "loss": 0.9772, "step": 6665 }, { "epoch": 0.16064160304424266, "grad_norm": 1.7461053133010864, "learning_rate": 8.835817868370348e-07, "loss": 0.9384, "step": 6670 }, { "epoch": 0.16076202403602996, "grad_norm": 1.5185924768447876, "learning_rate": 8.834550248453503e-07, "loss": 0.9413, "step": 6675 }, { "epoch": 0.16088244502781726, "grad_norm": 1.5015673637390137, "learning_rate": 8.833282628536659e-07, "loss": 0.9503, "step": 6680 }, { "epoch": 0.16100286601960453, "grad_norm": 1.4696515798568726, "learning_rate": 8.832015008619815e-07, "loss": 0.9829, "step": 6685 }, { "epoch": 0.16112328701139184, "grad_norm": 1.4103220701217651, "learning_rate": 8.83074738870297e-07, "loss": 0.9143, "step": 6690 }, { "epoch": 0.1612437080031791, "grad_norm": 1.5549726486206055, "learning_rate": 8.829479768786127e-07, "loss": 0.9381, "step": 6695 }, { "epoch": 0.1613641289949664, "grad_norm": 1.6166776418685913, "learning_rate": 8.828212148869283e-07, "loss": 0.9271, "step": 6700 }, { "epoch": 0.16148454998675368, "grad_norm": 1.652779459953308, "learning_rate": 8.826944528952438e-07, "loss": 0.9363, "step": 6705 }, { "epoch": 0.16160497097854098, "grad_norm": 1.5943504571914673, "learning_rate": 8.825676909035594e-07, "loss": 0.9475, "step": 6710 }, { "epoch": 0.16172539197032826, "grad_norm": 1.6199977397918701, "learning_rate": 8.82440928911875e-07, "loss": 0.9631, "step": 6715 }, { "epoch": 0.16184581296211556, "grad_norm": 1.6312978267669678, "learning_rate": 8.823141669201906e-07, "loss": 0.9328, "step": 6720 }, { "epoch": 0.16196623395390283, "grad_norm": 1.6854356527328491, "learning_rate": 8.821874049285062e-07, "loss": 1.0102, "step": 6725 }, { "epoch": 0.16208665494569013, "grad_norm": 1.5627775192260742, "learning_rate": 8.820606429368218e-07, "loss": 1.0122, "step": 6730 }, { "epoch": 0.1622070759374774, "grad_norm": 1.4499491453170776, "learning_rate": 8.819338809451373e-07, "loss": 0.9514, "step": 6735 }, { "epoch": 0.1623274969292647, "grad_norm": 1.5392065048217773, "learning_rate": 8.81807118953453e-07, "loss": 0.9317, "step": 6740 }, { "epoch": 0.162447917921052, "grad_norm": 1.5162497758865356, "learning_rate": 8.816803569617686e-07, "loss": 0.9393, "step": 6745 }, { "epoch": 0.16256833891283928, "grad_norm": 1.8237184286117554, "learning_rate": 8.81553594970084e-07, "loss": 0.9112, "step": 6750 }, { "epoch": 0.16268875990462658, "grad_norm": 1.511694073677063, "learning_rate": 8.814268329783997e-07, "loss": 0.9365, "step": 6755 }, { "epoch": 0.16280918089641386, "grad_norm": 1.4274612665176392, "learning_rate": 8.813000709867153e-07, "loss": 0.9378, "step": 6760 }, { "epoch": 0.16292960188820116, "grad_norm": 1.5141350030899048, "learning_rate": 8.811733089950309e-07, "loss": 0.9561, "step": 6765 }, { "epoch": 0.16305002287998843, "grad_norm": 1.6848194599151611, "learning_rate": 8.810465470033465e-07, "loss": 0.9606, "step": 6770 }, { "epoch": 0.16317044387177573, "grad_norm": 1.487057089805603, "learning_rate": 8.80919785011662e-07, "loss": 0.9161, "step": 6775 }, { "epoch": 0.163290864863563, "grad_norm": 1.4446853399276733, "learning_rate": 8.807930230199776e-07, "loss": 0.9692, "step": 6780 }, { "epoch": 0.1634112858553503, "grad_norm": 2.0542047023773193, "learning_rate": 8.806662610282932e-07, "loss": 0.9392, "step": 6785 }, { "epoch": 0.16353170684713758, "grad_norm": 1.7039285898208618, "learning_rate": 8.805394990366089e-07, "loss": 0.9278, "step": 6790 }, { "epoch": 0.16365212783892488, "grad_norm": 1.584222435951233, "learning_rate": 8.804127370449243e-07, "loss": 0.932, "step": 6795 }, { "epoch": 0.16377254883071218, "grad_norm": 1.6305924654006958, "learning_rate": 8.8028597505324e-07, "loss": 0.9655, "step": 6800 }, { "epoch": 0.16389296982249946, "grad_norm": 1.6200332641601562, "learning_rate": 8.801592130615556e-07, "loss": 0.96, "step": 6805 }, { "epoch": 0.16401339081428676, "grad_norm": 1.4420777559280396, "learning_rate": 8.800324510698711e-07, "loss": 0.9659, "step": 6810 }, { "epoch": 0.16413381180607403, "grad_norm": 1.7470130920410156, "learning_rate": 8.799056890781868e-07, "loss": 0.924, "step": 6815 }, { "epoch": 0.16425423279786133, "grad_norm": 1.611938714981079, "learning_rate": 8.797789270865023e-07, "loss": 0.981, "step": 6820 }, { "epoch": 0.1643746537896486, "grad_norm": 1.7520917654037476, "learning_rate": 8.796521650948179e-07, "loss": 0.9833, "step": 6825 }, { "epoch": 0.1644950747814359, "grad_norm": 1.7744758129119873, "learning_rate": 8.795254031031335e-07, "loss": 0.9853, "step": 6830 }, { "epoch": 0.16461549577322318, "grad_norm": 1.465274691581726, "learning_rate": 8.793986411114491e-07, "loss": 0.9643, "step": 6835 }, { "epoch": 0.16473591676501048, "grad_norm": 1.4554154872894287, "learning_rate": 8.792718791197647e-07, "loss": 0.9115, "step": 6840 }, { "epoch": 0.16485633775679776, "grad_norm": 1.8319108486175537, "learning_rate": 8.791451171280802e-07, "loss": 0.9758, "step": 6845 }, { "epoch": 0.16497675874858506, "grad_norm": 1.3700926303863525, "learning_rate": 8.790183551363959e-07, "loss": 0.987, "step": 6850 }, { "epoch": 0.16509717974037233, "grad_norm": 1.4417375326156616, "learning_rate": 8.788915931447114e-07, "loss": 0.9503, "step": 6855 }, { "epoch": 0.16521760073215963, "grad_norm": 1.8359936475753784, "learning_rate": 8.787648311530271e-07, "loss": 0.939, "step": 6860 }, { "epoch": 0.16533802172394693, "grad_norm": 1.694645643234253, "learning_rate": 8.786380691613426e-07, "loss": 0.9382, "step": 6865 }, { "epoch": 0.1654584427157342, "grad_norm": 1.778012752532959, "learning_rate": 8.785113071696581e-07, "loss": 0.9851, "step": 6870 }, { "epoch": 0.1655788637075215, "grad_norm": 1.6528106927871704, "learning_rate": 8.783845451779738e-07, "loss": 0.9749, "step": 6875 }, { "epoch": 0.16569928469930878, "grad_norm": 1.4038870334625244, "learning_rate": 8.782577831862894e-07, "loss": 0.9657, "step": 6880 }, { "epoch": 0.16581970569109608, "grad_norm": 1.967231035232544, "learning_rate": 8.78131021194605e-07, "loss": 0.9531, "step": 6885 }, { "epoch": 0.16594012668288335, "grad_norm": 1.5113967657089233, "learning_rate": 8.780042592029205e-07, "loss": 0.9842, "step": 6890 }, { "epoch": 0.16606054767467066, "grad_norm": 1.4383405447006226, "learning_rate": 8.778774972112361e-07, "loss": 0.9339, "step": 6895 }, { "epoch": 0.16618096866645793, "grad_norm": 1.5188686847686768, "learning_rate": 8.777507352195517e-07, "loss": 0.994, "step": 6900 }, { "epoch": 0.16630138965824523, "grad_norm": 1.4522814750671387, "learning_rate": 8.776239732278673e-07, "loss": 0.9889, "step": 6905 }, { "epoch": 0.1664218106500325, "grad_norm": 1.40193510055542, "learning_rate": 8.77497211236183e-07, "loss": 0.9585, "step": 6910 }, { "epoch": 0.1665422316418198, "grad_norm": 1.6679961681365967, "learning_rate": 8.773704492444985e-07, "loss": 0.9333, "step": 6915 }, { "epoch": 0.16666265263360708, "grad_norm": 1.6849275827407837, "learning_rate": 8.77243687252814e-07, "loss": 0.9663, "step": 6920 }, { "epoch": 0.16678307362539438, "grad_norm": 1.5537941455841064, "learning_rate": 8.771169252611297e-07, "loss": 0.969, "step": 6925 }, { "epoch": 0.16690349461718168, "grad_norm": 1.6917616128921509, "learning_rate": 8.769901632694453e-07, "loss": 0.9102, "step": 6930 }, { "epoch": 0.16702391560896895, "grad_norm": 1.4796494245529175, "learning_rate": 8.768634012777608e-07, "loss": 0.9696, "step": 6935 }, { "epoch": 0.16714433660075625, "grad_norm": 1.639575481414795, "learning_rate": 8.767366392860764e-07, "loss": 0.9919, "step": 6940 }, { "epoch": 0.16726475759254353, "grad_norm": 1.4652765989303589, "learning_rate": 8.766098772943921e-07, "loss": 0.9667, "step": 6945 }, { "epoch": 0.16738517858433083, "grad_norm": 1.4565825462341309, "learning_rate": 8.764831153027076e-07, "loss": 0.9219, "step": 6950 }, { "epoch": 0.1675055995761181, "grad_norm": 1.7221384048461914, "learning_rate": 8.763563533110232e-07, "loss": 0.9487, "step": 6955 }, { "epoch": 0.1676260205679054, "grad_norm": 1.753455638885498, "learning_rate": 8.762295913193388e-07, "loss": 0.91, "step": 6960 }, { "epoch": 0.16774644155969268, "grad_norm": 1.534593939781189, "learning_rate": 8.761028293276543e-07, "loss": 0.9343, "step": 6965 }, { "epoch": 0.16786686255147998, "grad_norm": 1.6360231637954712, "learning_rate": 8.7597606733597e-07, "loss": 0.9446, "step": 6970 }, { "epoch": 0.16798728354326725, "grad_norm": 1.6140836477279663, "learning_rate": 8.758493053442856e-07, "loss": 0.9353, "step": 6975 }, { "epoch": 0.16810770453505455, "grad_norm": 1.7073837518692017, "learning_rate": 8.75722543352601e-07, "loss": 0.9966, "step": 6980 }, { "epoch": 0.16822812552684183, "grad_norm": 1.6094011068344116, "learning_rate": 8.755957813609167e-07, "loss": 0.9944, "step": 6985 }, { "epoch": 0.16834854651862913, "grad_norm": 1.502415418624878, "learning_rate": 8.754690193692323e-07, "loss": 0.9864, "step": 6990 }, { "epoch": 0.16846896751041643, "grad_norm": 1.5931711196899414, "learning_rate": 8.753422573775479e-07, "loss": 0.8975, "step": 6995 }, { "epoch": 0.1685893885022037, "grad_norm": 1.5415033102035522, "learning_rate": 8.752154953858635e-07, "loss": 0.9557, "step": 7000 }, { "epoch": 0.168709809493991, "grad_norm": 1.4606413841247559, "learning_rate": 8.750887333941791e-07, "loss": 0.9077, "step": 7005 }, { "epoch": 0.16883023048577828, "grad_norm": 1.534355640411377, "learning_rate": 8.749619714024946e-07, "loss": 0.9229, "step": 7010 }, { "epoch": 0.16895065147756558, "grad_norm": 1.5767483711242676, "learning_rate": 8.748352094108102e-07, "loss": 1.0249, "step": 7015 }, { "epoch": 0.16907107246935285, "grad_norm": 1.371443748474121, "learning_rate": 8.747084474191259e-07, "loss": 0.9365, "step": 7020 }, { "epoch": 0.16919149346114015, "grad_norm": 1.5228232145309448, "learning_rate": 8.745816854274414e-07, "loss": 0.9641, "step": 7025 }, { "epoch": 0.16931191445292743, "grad_norm": 1.520755648612976, "learning_rate": 8.74454923435757e-07, "loss": 0.9721, "step": 7030 }, { "epoch": 0.16943233544471473, "grad_norm": 1.4273567199707031, "learning_rate": 8.743281614440726e-07, "loss": 0.902, "step": 7035 }, { "epoch": 0.169552756436502, "grad_norm": 1.4603309631347656, "learning_rate": 8.742013994523881e-07, "loss": 0.9388, "step": 7040 }, { "epoch": 0.1696731774282893, "grad_norm": 3.162548303604126, "learning_rate": 8.740746374607038e-07, "loss": 0.9625, "step": 7045 }, { "epoch": 0.16979359842007657, "grad_norm": 1.5650300979614258, "learning_rate": 8.739478754690193e-07, "loss": 0.9908, "step": 7050 }, { "epoch": 0.16991401941186388, "grad_norm": 1.647933006286621, "learning_rate": 8.738211134773349e-07, "loss": 0.9675, "step": 7055 }, { "epoch": 0.17003444040365118, "grad_norm": 1.657855749130249, "learning_rate": 8.736943514856505e-07, "loss": 0.9611, "step": 7060 }, { "epoch": 0.17015486139543845, "grad_norm": 1.6878466606140137, "learning_rate": 8.735675894939662e-07, "loss": 0.9491, "step": 7065 }, { "epoch": 0.17027528238722575, "grad_norm": 1.745612621307373, "learning_rate": 8.734408275022817e-07, "loss": 0.9741, "step": 7070 }, { "epoch": 0.17039570337901302, "grad_norm": 1.6257210969924927, "learning_rate": 8.733140655105972e-07, "loss": 0.9407, "step": 7075 }, { "epoch": 0.17051612437080033, "grad_norm": 1.6022166013717651, "learning_rate": 8.731873035189129e-07, "loss": 0.8677, "step": 7080 }, { "epoch": 0.1706365453625876, "grad_norm": 1.5448520183563232, "learning_rate": 8.730605415272284e-07, "loss": 0.985, "step": 7085 }, { "epoch": 0.1707569663543749, "grad_norm": 1.5920443534851074, "learning_rate": 8.729337795355441e-07, "loss": 0.9544, "step": 7090 }, { "epoch": 0.17087738734616217, "grad_norm": 1.6187429428100586, "learning_rate": 8.728070175438597e-07, "loss": 0.961, "step": 7095 }, { "epoch": 0.17099780833794948, "grad_norm": 1.5467497110366821, "learning_rate": 8.726802555521751e-07, "loss": 0.9439, "step": 7100 }, { "epoch": 0.17111822932973675, "grad_norm": 1.6352099180221558, "learning_rate": 8.725534935604908e-07, "loss": 0.945, "step": 7105 }, { "epoch": 0.17123865032152405, "grad_norm": 1.3417030572891235, "learning_rate": 8.724267315688064e-07, "loss": 0.9185, "step": 7110 }, { "epoch": 0.17135907131331132, "grad_norm": 1.594738245010376, "learning_rate": 8.72299969577122e-07, "loss": 0.9201, "step": 7115 }, { "epoch": 0.17147949230509862, "grad_norm": 1.7222914695739746, "learning_rate": 8.721732075854375e-07, "loss": 0.927, "step": 7120 }, { "epoch": 0.17159991329688593, "grad_norm": 2.0674643516540527, "learning_rate": 8.720464455937532e-07, "loss": 0.9905, "step": 7125 }, { "epoch": 0.1717203342886732, "grad_norm": 1.612656831741333, "learning_rate": 8.719196836020687e-07, "loss": 0.9274, "step": 7130 }, { "epoch": 0.1718407552804605, "grad_norm": 1.4258354902267456, "learning_rate": 8.717929216103843e-07, "loss": 0.971, "step": 7135 }, { "epoch": 0.17196117627224777, "grad_norm": 1.4199203252792358, "learning_rate": 8.716661596187e-07, "loss": 0.9387, "step": 7140 }, { "epoch": 0.17208159726403507, "grad_norm": 1.5075029134750366, "learning_rate": 8.715393976270154e-07, "loss": 0.9702, "step": 7145 }, { "epoch": 0.17220201825582235, "grad_norm": 1.5064207315444946, "learning_rate": 8.714126356353311e-07, "loss": 0.8867, "step": 7150 }, { "epoch": 0.17232243924760965, "grad_norm": 1.5721375942230225, "learning_rate": 8.712858736436467e-07, "loss": 0.956, "step": 7155 }, { "epoch": 0.17244286023939692, "grad_norm": 1.6020773649215698, "learning_rate": 8.711591116519622e-07, "loss": 0.94, "step": 7160 }, { "epoch": 0.17256328123118422, "grad_norm": 1.4800697565078735, "learning_rate": 8.710323496602778e-07, "loss": 0.9859, "step": 7165 }, { "epoch": 0.1726837022229715, "grad_norm": 1.772230625152588, "learning_rate": 8.709055876685934e-07, "loss": 0.9597, "step": 7170 }, { "epoch": 0.1728041232147588, "grad_norm": 1.602545142173767, "learning_rate": 8.70778825676909e-07, "loss": 0.9412, "step": 7175 }, { "epoch": 0.17292454420654607, "grad_norm": 1.5732465982437134, "learning_rate": 8.706520636852246e-07, "loss": 0.9165, "step": 7180 }, { "epoch": 0.17304496519833337, "grad_norm": 1.5865917205810547, "learning_rate": 8.705253016935403e-07, "loss": 0.9101, "step": 7185 }, { "epoch": 0.17316538619012067, "grad_norm": 1.4857765436172485, "learning_rate": 8.703985397018557e-07, "loss": 0.9447, "step": 7190 }, { "epoch": 0.17328580718190795, "grad_norm": 1.5675077438354492, "learning_rate": 8.702717777101713e-07, "loss": 0.9187, "step": 7195 }, { "epoch": 0.17340622817369525, "grad_norm": 1.5084619522094727, "learning_rate": 8.70145015718487e-07, "loss": 0.966, "step": 7200 }, { "epoch": 0.17352664916548252, "grad_norm": 1.5520141124725342, "learning_rate": 8.700182537268025e-07, "loss": 0.9421, "step": 7205 }, { "epoch": 0.17364707015726982, "grad_norm": 1.9939416646957397, "learning_rate": 8.698914917351182e-07, "loss": 0.9815, "step": 7210 }, { "epoch": 0.1737674911490571, "grad_norm": 1.4251264333724976, "learning_rate": 8.697647297434337e-07, "loss": 0.9374, "step": 7215 }, { "epoch": 0.1738879121408444, "grad_norm": 1.5417747497558594, "learning_rate": 8.696379677517492e-07, "loss": 0.9117, "step": 7220 }, { "epoch": 0.17400833313263167, "grad_norm": 1.4149068593978882, "learning_rate": 8.695112057600649e-07, "loss": 0.897, "step": 7225 }, { "epoch": 0.17412875412441897, "grad_norm": 1.4917889833450317, "learning_rate": 8.693844437683805e-07, "loss": 0.9394, "step": 7230 }, { "epoch": 0.17424917511620625, "grad_norm": 1.6822994947433472, "learning_rate": 8.69257681776696e-07, "loss": 0.8761, "step": 7235 }, { "epoch": 0.17436959610799355, "grad_norm": 1.4337525367736816, "learning_rate": 8.691309197850116e-07, "loss": 0.8911, "step": 7240 }, { "epoch": 0.17449001709978085, "grad_norm": 1.5922350883483887, "learning_rate": 8.690041577933273e-07, "loss": 0.9328, "step": 7245 }, { "epoch": 0.17461043809156812, "grad_norm": 1.6190274953842163, "learning_rate": 8.688773958016428e-07, "loss": 0.952, "step": 7250 }, { "epoch": 0.17473085908335542, "grad_norm": 1.870335578918457, "learning_rate": 8.687506338099584e-07, "loss": 0.9857, "step": 7255 }, { "epoch": 0.1748512800751427, "grad_norm": 1.4689422845840454, "learning_rate": 8.68623871818274e-07, "loss": 0.8954, "step": 7260 }, { "epoch": 0.17497170106693, "grad_norm": 1.566745400428772, "learning_rate": 8.684971098265895e-07, "loss": 0.9357, "step": 7265 }, { "epoch": 0.17509212205871727, "grad_norm": 1.6396536827087402, "learning_rate": 8.683703478349052e-07, "loss": 0.9431, "step": 7270 }, { "epoch": 0.17521254305050457, "grad_norm": 1.8179210424423218, "learning_rate": 8.682435858432208e-07, "loss": 0.9463, "step": 7275 }, { "epoch": 0.17533296404229184, "grad_norm": 2.6301631927490234, "learning_rate": 8.681168238515362e-07, "loss": 0.9387, "step": 7280 }, { "epoch": 0.17545338503407915, "grad_norm": 1.613966941833496, "learning_rate": 8.679900618598519e-07, "loss": 0.9599, "step": 7285 }, { "epoch": 0.17557380602586642, "grad_norm": 1.6236636638641357, "learning_rate": 8.678632998681675e-07, "loss": 0.9078, "step": 7290 }, { "epoch": 0.17569422701765372, "grad_norm": 1.417366862297058, "learning_rate": 8.677365378764831e-07, "loss": 0.9107, "step": 7295 }, { "epoch": 0.175814648009441, "grad_norm": 1.6419947147369385, "learning_rate": 8.676097758847987e-07, "loss": 0.9224, "step": 7300 }, { "epoch": 0.1759350690012283, "grad_norm": 1.635498285293579, "learning_rate": 8.674830138931142e-07, "loss": 1.0329, "step": 7305 }, { "epoch": 0.1760554899930156, "grad_norm": 1.461501121520996, "learning_rate": 8.673562519014298e-07, "loss": 0.9704, "step": 7310 }, { "epoch": 0.17617591098480287, "grad_norm": 2.0301854610443115, "learning_rate": 8.672294899097454e-07, "loss": 0.957, "step": 7315 }, { "epoch": 0.17629633197659017, "grad_norm": 1.6653895378112793, "learning_rate": 8.671027279180611e-07, "loss": 1.0023, "step": 7320 }, { "epoch": 0.17641675296837744, "grad_norm": 1.5719858407974243, "learning_rate": 8.669759659263766e-07, "loss": 0.9648, "step": 7325 }, { "epoch": 0.17653717396016474, "grad_norm": 1.5789759159088135, "learning_rate": 8.668492039346922e-07, "loss": 0.9642, "step": 7330 }, { "epoch": 0.17665759495195202, "grad_norm": 1.499147891998291, "learning_rate": 8.667224419430078e-07, "loss": 0.9344, "step": 7335 }, { "epoch": 0.17677801594373932, "grad_norm": 1.4714581966400146, "learning_rate": 8.665956799513233e-07, "loss": 0.9036, "step": 7340 }, { "epoch": 0.1768984369355266, "grad_norm": 1.5490918159484863, "learning_rate": 8.66468917959639e-07, "loss": 1.0156, "step": 7345 }, { "epoch": 0.1770188579273139, "grad_norm": 1.5557063817977905, "learning_rate": 8.663421559679545e-07, "loss": 0.9306, "step": 7350 }, { "epoch": 0.17713927891910117, "grad_norm": 1.6066994667053223, "learning_rate": 8.662153939762701e-07, "loss": 0.9709, "step": 7355 }, { "epoch": 0.17725969991088847, "grad_norm": 1.4699970483779907, "learning_rate": 8.660886319845857e-07, "loss": 0.9337, "step": 7360 }, { "epoch": 0.17738012090267574, "grad_norm": 1.4439870119094849, "learning_rate": 8.659618699929013e-07, "loss": 0.9177, "step": 7365 }, { "epoch": 0.17750054189446304, "grad_norm": 1.50325608253479, "learning_rate": 8.658351080012169e-07, "loss": 0.9252, "step": 7370 }, { "epoch": 0.17762096288625034, "grad_norm": 1.4546009302139282, "learning_rate": 8.657083460095324e-07, "loss": 0.9483, "step": 7375 }, { "epoch": 0.17774138387803762, "grad_norm": 1.5476328134536743, "learning_rate": 8.655815840178481e-07, "loss": 0.9219, "step": 7380 }, { "epoch": 0.17786180486982492, "grad_norm": 1.4623545408248901, "learning_rate": 8.654548220261636e-07, "loss": 0.9562, "step": 7385 }, { "epoch": 0.1779822258616122, "grad_norm": 1.5667556524276733, "learning_rate": 8.653280600344793e-07, "loss": 0.9165, "step": 7390 }, { "epoch": 0.1781026468533995, "grad_norm": 1.402410626411438, "learning_rate": 8.652012980427949e-07, "loss": 0.9527, "step": 7395 }, { "epoch": 0.17822306784518677, "grad_norm": 1.6034663915634155, "learning_rate": 8.650745360511103e-07, "loss": 0.9819, "step": 7400 }, { "epoch": 0.17834348883697407, "grad_norm": 1.5540601015090942, "learning_rate": 8.64947774059426e-07, "loss": 0.9365, "step": 7405 }, { "epoch": 0.17846390982876134, "grad_norm": 1.5132516622543335, "learning_rate": 8.648210120677416e-07, "loss": 0.9918, "step": 7410 }, { "epoch": 0.17858433082054864, "grad_norm": 1.5507590770721436, "learning_rate": 8.646942500760572e-07, "loss": 0.9234, "step": 7415 }, { "epoch": 0.17870475181233592, "grad_norm": 1.5630336999893188, "learning_rate": 8.645674880843727e-07, "loss": 0.94, "step": 7420 }, { "epoch": 0.17882517280412322, "grad_norm": 1.7237917184829712, "learning_rate": 8.644407260926883e-07, "loss": 0.9653, "step": 7425 }, { "epoch": 0.1789455937959105, "grad_norm": 1.4693753719329834, "learning_rate": 8.643139641010039e-07, "loss": 0.9563, "step": 7430 }, { "epoch": 0.1790660147876978, "grad_norm": 1.636465072631836, "learning_rate": 8.641872021093195e-07, "loss": 0.9552, "step": 7435 }, { "epoch": 0.1791864357794851, "grad_norm": 1.3864063024520874, "learning_rate": 8.640604401176352e-07, "loss": 0.9553, "step": 7440 }, { "epoch": 0.17930685677127237, "grad_norm": 1.6570584774017334, "learning_rate": 8.639336781259506e-07, "loss": 0.9229, "step": 7445 }, { "epoch": 0.17942727776305967, "grad_norm": 1.4525080919265747, "learning_rate": 8.638069161342662e-07, "loss": 0.9361, "step": 7450 }, { "epoch": 0.17954769875484694, "grad_norm": 1.5122401714324951, "learning_rate": 8.636801541425819e-07, "loss": 0.9189, "step": 7455 }, { "epoch": 0.17966811974663424, "grad_norm": 1.5133171081542969, "learning_rate": 8.635533921508974e-07, "loss": 0.9435, "step": 7460 }, { "epoch": 0.17978854073842152, "grad_norm": 1.5368260145187378, "learning_rate": 8.63426630159213e-07, "loss": 0.9743, "step": 7465 }, { "epoch": 0.17990896173020882, "grad_norm": 1.7313311100006104, "learning_rate": 8.632998681675286e-07, "loss": 0.916, "step": 7470 }, { "epoch": 0.1800293827219961, "grad_norm": 1.5005427598953247, "learning_rate": 8.631731061758442e-07, "loss": 0.9366, "step": 7475 }, { "epoch": 0.1801498037137834, "grad_norm": 1.3697391748428345, "learning_rate": 8.630463441841598e-07, "loss": 0.9251, "step": 7480 }, { "epoch": 0.18027022470557066, "grad_norm": 1.6520494222640991, "learning_rate": 8.629195821924754e-07, "loss": 0.9587, "step": 7485 }, { "epoch": 0.18039064569735797, "grad_norm": 1.641788363456726, "learning_rate": 8.627928202007909e-07, "loss": 0.9348, "step": 7490 }, { "epoch": 0.18051106668914524, "grad_norm": 1.6810346841812134, "learning_rate": 8.626660582091065e-07, "loss": 0.9282, "step": 7495 }, { "epoch": 0.18063148768093254, "grad_norm": 1.5090734958648682, "learning_rate": 8.625392962174222e-07, "loss": 0.9644, "step": 7500 }, { "epoch": 0.18075190867271984, "grad_norm": 1.4409652948379517, "learning_rate": 8.624125342257377e-07, "loss": 0.8875, "step": 7505 }, { "epoch": 0.18087232966450711, "grad_norm": 1.6661747694015503, "learning_rate": 8.622857722340534e-07, "loss": 0.9867, "step": 7510 }, { "epoch": 0.18099275065629442, "grad_norm": 1.784266471862793, "learning_rate": 8.621590102423689e-07, "loss": 0.9067, "step": 7515 }, { "epoch": 0.1811131716480817, "grad_norm": 1.497228741645813, "learning_rate": 8.620322482506844e-07, "loss": 0.9663, "step": 7520 }, { "epoch": 0.181233592639869, "grad_norm": 1.8661032915115356, "learning_rate": 8.619054862590001e-07, "loss": 0.9239, "step": 7525 }, { "epoch": 0.18135401363165626, "grad_norm": 1.5789985656738281, "learning_rate": 8.617787242673157e-07, "loss": 1.0048, "step": 7530 }, { "epoch": 0.18147443462344356, "grad_norm": 1.6937061548233032, "learning_rate": 8.616519622756312e-07, "loss": 0.9206, "step": 7535 }, { "epoch": 0.18159485561523084, "grad_norm": 1.5689666271209717, "learning_rate": 8.615252002839468e-07, "loss": 0.9583, "step": 7540 }, { "epoch": 0.18171527660701814, "grad_norm": 1.628058671951294, "learning_rate": 8.613984382922624e-07, "loss": 0.9669, "step": 7545 }, { "epoch": 0.1818356975988054, "grad_norm": 1.5606167316436768, "learning_rate": 8.61271676300578e-07, "loss": 0.9188, "step": 7550 }, { "epoch": 0.1819561185905927, "grad_norm": 1.4785312414169312, "learning_rate": 8.611449143088936e-07, "loss": 0.9194, "step": 7555 }, { "epoch": 0.18207653958238, "grad_norm": 1.4869170188903809, "learning_rate": 8.610181523172092e-07, "loss": 0.9779, "step": 7560 }, { "epoch": 0.1821969605741673, "grad_norm": 1.5507047176361084, "learning_rate": 8.608913903255247e-07, "loss": 0.9377, "step": 7565 }, { "epoch": 0.1823173815659546, "grad_norm": 1.3956730365753174, "learning_rate": 8.607646283338403e-07, "loss": 0.9261, "step": 7570 }, { "epoch": 0.18243780255774186, "grad_norm": 1.5166481733322144, "learning_rate": 8.60637866342156e-07, "loss": 0.9878, "step": 7575 }, { "epoch": 0.18255822354952916, "grad_norm": 1.4868241548538208, "learning_rate": 8.605111043504714e-07, "loss": 0.8817, "step": 7580 }, { "epoch": 0.18267864454131644, "grad_norm": 1.660776138305664, "learning_rate": 8.603843423587871e-07, "loss": 0.9763, "step": 7585 }, { "epoch": 0.18279906553310374, "grad_norm": 1.8276817798614502, "learning_rate": 8.602575803671027e-07, "loss": 0.9639, "step": 7590 }, { "epoch": 0.182919486524891, "grad_norm": 1.818725347518921, "learning_rate": 8.601308183754183e-07, "loss": 0.9588, "step": 7595 }, { "epoch": 0.1830399075166783, "grad_norm": 1.521095871925354, "learning_rate": 8.600040563837339e-07, "loss": 0.9631, "step": 7600 }, { "epoch": 0.1831603285084656, "grad_norm": 1.6158627271652222, "learning_rate": 8.598772943920494e-07, "loss": 0.9082, "step": 7605 }, { "epoch": 0.1832807495002529, "grad_norm": 1.6289167404174805, "learning_rate": 8.59750532400365e-07, "loss": 0.9901, "step": 7610 }, { "epoch": 0.18340117049204016, "grad_norm": 1.6304922103881836, "learning_rate": 8.596237704086806e-07, "loss": 0.9861, "step": 7615 }, { "epoch": 0.18352159148382746, "grad_norm": 1.5539021492004395, "learning_rate": 8.594970084169963e-07, "loss": 0.9694, "step": 7620 }, { "epoch": 0.18364201247561474, "grad_norm": 1.6585330963134766, "learning_rate": 8.593702464253118e-07, "loss": 0.9466, "step": 7625 }, { "epoch": 0.18376243346740204, "grad_norm": 1.7435808181762695, "learning_rate": 8.592434844336273e-07, "loss": 0.9547, "step": 7630 }, { "epoch": 0.18388285445918934, "grad_norm": 1.4508938789367676, "learning_rate": 8.59116722441943e-07, "loss": 0.9705, "step": 7635 }, { "epoch": 0.1840032754509766, "grad_norm": 1.6086461544036865, "learning_rate": 8.589899604502585e-07, "loss": 0.8958, "step": 7640 }, { "epoch": 0.1841236964427639, "grad_norm": 1.4307724237442017, "learning_rate": 8.588631984585742e-07, "loss": 0.9562, "step": 7645 }, { "epoch": 0.18424411743455119, "grad_norm": 1.552018642425537, "learning_rate": 8.587364364668897e-07, "loss": 0.933, "step": 7650 }, { "epoch": 0.1843645384263385, "grad_norm": 1.5186680555343628, "learning_rate": 8.586096744752052e-07, "loss": 0.9275, "step": 7655 }, { "epoch": 0.18448495941812576, "grad_norm": 1.5889519453048706, "learning_rate": 8.584829124835209e-07, "loss": 0.9499, "step": 7660 }, { "epoch": 0.18460538040991306, "grad_norm": 1.6468712091445923, "learning_rate": 8.583561504918365e-07, "loss": 0.9632, "step": 7665 }, { "epoch": 0.18472580140170033, "grad_norm": 1.4589017629623413, "learning_rate": 8.582293885001521e-07, "loss": 0.9375, "step": 7670 }, { "epoch": 0.18484622239348764, "grad_norm": 1.484344482421875, "learning_rate": 8.581026265084676e-07, "loss": 0.9317, "step": 7675 }, { "epoch": 0.1849666433852749, "grad_norm": 1.5107464790344238, "learning_rate": 8.579758645167833e-07, "loss": 0.9496, "step": 7680 }, { "epoch": 0.1850870643770622, "grad_norm": 1.4735205173492432, "learning_rate": 8.578491025250988e-07, "loss": 0.9515, "step": 7685 }, { "epoch": 0.1852074853688495, "grad_norm": 1.405005931854248, "learning_rate": 8.577223405334144e-07, "loss": 0.9233, "step": 7690 }, { "epoch": 0.18532790636063678, "grad_norm": 1.4967010021209717, "learning_rate": 8.575955785417301e-07, "loss": 0.9095, "step": 7695 }, { "epoch": 0.18544832735242409, "grad_norm": 1.5843182802200317, "learning_rate": 8.574688165500455e-07, "loss": 0.9234, "step": 7700 }, { "epoch": 0.18556874834421136, "grad_norm": 1.563092827796936, "learning_rate": 8.573420545583612e-07, "loss": 0.9395, "step": 7705 }, { "epoch": 0.18568916933599866, "grad_norm": 1.7785170078277588, "learning_rate": 8.572152925666768e-07, "loss": 0.9331, "step": 7710 }, { "epoch": 0.18580959032778593, "grad_norm": 1.48356294631958, "learning_rate": 8.570885305749924e-07, "loss": 0.9174, "step": 7715 }, { "epoch": 0.18593001131957324, "grad_norm": 1.706422209739685, "learning_rate": 8.569617685833079e-07, "loss": 0.9301, "step": 7720 }, { "epoch": 0.1860504323113605, "grad_norm": 1.7067279815673828, "learning_rate": 8.568350065916235e-07, "loss": 0.9196, "step": 7725 }, { "epoch": 0.1861708533031478, "grad_norm": 1.6843154430389404, "learning_rate": 8.567082445999391e-07, "loss": 0.9272, "step": 7730 }, { "epoch": 0.18629127429493508, "grad_norm": 1.6786079406738281, "learning_rate": 8.565814826082547e-07, "loss": 0.9272, "step": 7735 }, { "epoch": 0.18641169528672238, "grad_norm": 1.850604772567749, "learning_rate": 8.564547206165704e-07, "loss": 0.9236, "step": 7740 }, { "epoch": 0.18653211627850966, "grad_norm": 1.6428070068359375, "learning_rate": 8.563279586248858e-07, "loss": 0.9401, "step": 7745 }, { "epoch": 0.18665253727029696, "grad_norm": 1.5928632020950317, "learning_rate": 8.562011966332014e-07, "loss": 0.9904, "step": 7750 }, { "epoch": 0.18677295826208426, "grad_norm": 1.7247638702392578, "learning_rate": 8.560744346415171e-07, "loss": 0.9355, "step": 7755 }, { "epoch": 0.18689337925387153, "grad_norm": 1.6918985843658447, "learning_rate": 8.559476726498326e-07, "loss": 0.9348, "step": 7760 }, { "epoch": 0.18701380024565883, "grad_norm": 1.6758086681365967, "learning_rate": 8.558209106581482e-07, "loss": 0.9397, "step": 7765 }, { "epoch": 0.1871342212374461, "grad_norm": 1.3978140354156494, "learning_rate": 8.556941486664638e-07, "loss": 0.9691, "step": 7770 }, { "epoch": 0.1872546422292334, "grad_norm": 1.5308520793914795, "learning_rate": 8.555673866747793e-07, "loss": 0.9205, "step": 7775 }, { "epoch": 0.18737506322102068, "grad_norm": 1.3909852504730225, "learning_rate": 8.55440624683095e-07, "loss": 0.9421, "step": 7780 }, { "epoch": 0.18749548421280798, "grad_norm": 1.5220245122909546, "learning_rate": 8.553138626914106e-07, "loss": 0.9511, "step": 7785 }, { "epoch": 0.18761590520459526, "grad_norm": 1.8534610271453857, "learning_rate": 8.551871006997261e-07, "loss": 0.9701, "step": 7790 }, { "epoch": 0.18773632619638256, "grad_norm": 1.5921791791915894, "learning_rate": 8.550603387080417e-07, "loss": 0.9832, "step": 7795 }, { "epoch": 0.18785674718816983, "grad_norm": 1.5496329069137573, "learning_rate": 8.549335767163574e-07, "loss": 0.9646, "step": 7800 }, { "epoch": 0.18797716817995713, "grad_norm": 1.608414888381958, "learning_rate": 8.548068147246729e-07, "loss": 0.9486, "step": 7805 }, { "epoch": 0.1880975891717444, "grad_norm": 1.4896926879882812, "learning_rate": 8.546800527329884e-07, "loss": 0.9662, "step": 7810 }, { "epoch": 0.1882180101635317, "grad_norm": 1.7178910970687866, "learning_rate": 8.545532907413041e-07, "loss": 0.949, "step": 7815 }, { "epoch": 0.188338431155319, "grad_norm": 1.6171796321868896, "learning_rate": 8.544265287496196e-07, "loss": 0.9203, "step": 7820 }, { "epoch": 0.18845885214710628, "grad_norm": 1.8074618577957153, "learning_rate": 8.542997667579353e-07, "loss": 0.925, "step": 7825 }, { "epoch": 0.18857927313889358, "grad_norm": 1.5316404104232788, "learning_rate": 8.541730047662509e-07, "loss": 0.9347, "step": 7830 }, { "epoch": 0.18869969413068086, "grad_norm": 1.57062566280365, "learning_rate": 8.540462427745663e-07, "loss": 0.9009, "step": 7835 }, { "epoch": 0.18882011512246816, "grad_norm": 1.4911035299301147, "learning_rate": 8.53919480782882e-07, "loss": 0.9158, "step": 7840 }, { "epoch": 0.18894053611425543, "grad_norm": 1.526751160621643, "learning_rate": 8.537927187911976e-07, "loss": 0.9323, "step": 7845 }, { "epoch": 0.18906095710604273, "grad_norm": 1.5808849334716797, "learning_rate": 8.536659567995132e-07, "loss": 0.9654, "step": 7850 }, { "epoch": 0.18918137809783, "grad_norm": 1.5629630088806152, "learning_rate": 8.535391948078288e-07, "loss": 0.9316, "step": 7855 }, { "epoch": 0.1893017990896173, "grad_norm": 1.66767156124115, "learning_rate": 8.534124328161444e-07, "loss": 0.9788, "step": 7860 }, { "epoch": 0.18942222008140458, "grad_norm": 1.495460033416748, "learning_rate": 8.532856708244599e-07, "loss": 0.8883, "step": 7865 }, { "epoch": 0.18954264107319188, "grad_norm": 1.5910550355911255, "learning_rate": 8.531589088327755e-07, "loss": 0.9311, "step": 7870 }, { "epoch": 0.18966306206497915, "grad_norm": 1.4263969659805298, "learning_rate": 8.530321468410912e-07, "loss": 0.9181, "step": 7875 }, { "epoch": 0.18978348305676646, "grad_norm": 1.524699330329895, "learning_rate": 8.529053848494066e-07, "loss": 0.9138, "step": 7880 }, { "epoch": 0.18990390404855376, "grad_norm": 1.612310767173767, "learning_rate": 8.527786228577223e-07, "loss": 0.9042, "step": 7885 }, { "epoch": 0.19002432504034103, "grad_norm": 1.527094841003418, "learning_rate": 8.526518608660379e-07, "loss": 0.9604, "step": 7890 }, { "epoch": 0.19014474603212833, "grad_norm": 1.7105774879455566, "learning_rate": 8.525250988743534e-07, "loss": 0.975, "step": 7895 }, { "epoch": 0.1902651670239156, "grad_norm": 1.6466574668884277, "learning_rate": 8.523983368826691e-07, "loss": 0.944, "step": 7900 }, { "epoch": 0.1903855880157029, "grad_norm": 1.4922170639038086, "learning_rate": 8.522715748909846e-07, "loss": 0.9308, "step": 7905 }, { "epoch": 0.19050600900749018, "grad_norm": 1.6497938632965088, "learning_rate": 8.521448128993002e-07, "loss": 0.9541, "step": 7910 }, { "epoch": 0.19062642999927748, "grad_norm": 1.5365254878997803, "learning_rate": 8.520180509076158e-07, "loss": 0.9478, "step": 7915 }, { "epoch": 0.19074685099106475, "grad_norm": 1.4882341623306274, "learning_rate": 8.518912889159315e-07, "loss": 0.984, "step": 7920 }, { "epoch": 0.19086727198285205, "grad_norm": 1.5878196954727173, "learning_rate": 8.517645269242469e-07, "loss": 0.9844, "step": 7925 }, { "epoch": 0.19098769297463933, "grad_norm": 1.6832592487335205, "learning_rate": 8.516377649325625e-07, "loss": 0.9382, "step": 7930 }, { "epoch": 0.19110811396642663, "grad_norm": 1.73088800907135, "learning_rate": 8.515110029408782e-07, "loss": 0.9062, "step": 7935 }, { "epoch": 0.1912285349582139, "grad_norm": 1.6562163829803467, "learning_rate": 8.513842409491938e-07, "loss": 0.9283, "step": 7940 }, { "epoch": 0.1913489559500012, "grad_norm": 1.4478557109832764, "learning_rate": 8.512574789575094e-07, "loss": 0.9725, "step": 7945 }, { "epoch": 0.1914693769417885, "grad_norm": 1.61317777633667, "learning_rate": 8.511307169658249e-07, "loss": 0.9291, "step": 7950 }, { "epoch": 0.19158979793357578, "grad_norm": 1.579496145248413, "learning_rate": 8.510039549741405e-07, "loss": 0.9744, "step": 7955 }, { "epoch": 0.19171021892536308, "grad_norm": 1.7063086032867432, "learning_rate": 8.508771929824561e-07, "loss": 0.9523, "step": 7960 }, { "epoch": 0.19183063991715035, "grad_norm": 1.4805172681808472, "learning_rate": 8.507504309907717e-07, "loss": 0.9667, "step": 7965 }, { "epoch": 0.19195106090893765, "grad_norm": 1.4771111011505127, "learning_rate": 8.506236689990874e-07, "loss": 0.9738, "step": 7970 }, { "epoch": 0.19207148190072493, "grad_norm": 1.4734350442886353, "learning_rate": 8.504969070074028e-07, "loss": 0.9327, "step": 7975 }, { "epoch": 0.19219190289251223, "grad_norm": 1.5960177183151245, "learning_rate": 8.503701450157185e-07, "loss": 0.9299, "step": 7980 }, { "epoch": 0.1923123238842995, "grad_norm": 1.7017934322357178, "learning_rate": 8.502433830240341e-07, "loss": 0.9444, "step": 7985 }, { "epoch": 0.1924327448760868, "grad_norm": 1.5495920181274414, "learning_rate": 8.501166210323496e-07, "loss": 0.9296, "step": 7990 }, { "epoch": 0.19255316586787408, "grad_norm": 1.8156256675720215, "learning_rate": 8.499898590406652e-07, "loss": 0.9251, "step": 7995 }, { "epoch": 0.19267358685966138, "grad_norm": 1.6993896961212158, "learning_rate": 8.498630970489808e-07, "loss": 0.9591, "step": 8000 }, { "epoch": 0.19279400785144865, "grad_norm": 1.6826540231704712, "learning_rate": 8.497363350572964e-07, "loss": 0.9337, "step": 8005 }, { "epoch": 0.19291442884323595, "grad_norm": 1.5303553342819214, "learning_rate": 8.49609573065612e-07, "loss": 0.9317, "step": 8010 }, { "epoch": 0.19303484983502325, "grad_norm": 1.7423923015594482, "learning_rate": 8.494828110739276e-07, "loss": 0.9019, "step": 8015 }, { "epoch": 0.19315527082681053, "grad_norm": 1.838249921798706, "learning_rate": 8.493560490822431e-07, "loss": 0.988, "step": 8020 }, { "epoch": 0.19327569181859783, "grad_norm": 1.5801361799240112, "learning_rate": 8.492292870905587e-07, "loss": 0.9737, "step": 8025 }, { "epoch": 0.1933961128103851, "grad_norm": 1.6283152103424072, "learning_rate": 8.491025250988744e-07, "loss": 0.9168, "step": 8030 }, { "epoch": 0.1935165338021724, "grad_norm": 1.3490309715270996, "learning_rate": 8.489757631071899e-07, "loss": 0.9394, "step": 8035 }, { "epoch": 0.19363695479395968, "grad_norm": 1.5663366317749023, "learning_rate": 8.488490011155056e-07, "loss": 0.906, "step": 8040 }, { "epoch": 0.19375737578574698, "grad_norm": 1.4401100873947144, "learning_rate": 8.487222391238211e-07, "loss": 0.9805, "step": 8045 }, { "epoch": 0.19387779677753425, "grad_norm": 1.521753191947937, "learning_rate": 8.485954771321366e-07, "loss": 0.9311, "step": 8050 }, { "epoch": 0.19399821776932155, "grad_norm": 1.6854158639907837, "learning_rate": 8.484687151404523e-07, "loss": 0.9865, "step": 8055 }, { "epoch": 0.19411863876110882, "grad_norm": 1.4435174465179443, "learning_rate": 8.483419531487679e-07, "loss": 0.9494, "step": 8060 }, { "epoch": 0.19423905975289613, "grad_norm": 1.5255597829818726, "learning_rate": 8.482151911570834e-07, "loss": 0.9435, "step": 8065 }, { "epoch": 0.1943594807446834, "grad_norm": 1.3990375995635986, "learning_rate": 8.48088429165399e-07, "loss": 0.9662, "step": 8070 }, { "epoch": 0.1944799017364707, "grad_norm": 1.5630102157592773, "learning_rate": 8.479616671737146e-07, "loss": 0.9869, "step": 8075 }, { "epoch": 0.194600322728258, "grad_norm": 1.6120853424072266, "learning_rate": 8.478349051820302e-07, "loss": 0.9512, "step": 8080 }, { "epoch": 0.19472074372004528, "grad_norm": 1.422527551651001, "learning_rate": 8.477081431903458e-07, "loss": 0.9422, "step": 8085 }, { "epoch": 0.19484116471183258, "grad_norm": 1.715245246887207, "learning_rate": 8.475813811986614e-07, "loss": 0.9268, "step": 8090 }, { "epoch": 0.19496158570361985, "grad_norm": 1.4960979223251343, "learning_rate": 8.474546192069769e-07, "loss": 0.9693, "step": 8095 }, { "epoch": 0.19508200669540715, "grad_norm": 1.811741828918457, "learning_rate": 8.473278572152925e-07, "loss": 0.9967, "step": 8100 }, { "epoch": 0.19520242768719442, "grad_norm": 1.8520315885543823, "learning_rate": 8.472010952236082e-07, "loss": 0.9514, "step": 8105 }, { "epoch": 0.19532284867898173, "grad_norm": 1.4953160285949707, "learning_rate": 8.470743332319236e-07, "loss": 0.9414, "step": 8110 }, { "epoch": 0.195443269670769, "grad_norm": 1.5047308206558228, "learning_rate": 8.469475712402393e-07, "loss": 0.9273, "step": 8115 }, { "epoch": 0.1955636906625563, "grad_norm": 1.5107359886169434, "learning_rate": 8.468208092485549e-07, "loss": 0.9351, "step": 8120 }, { "epoch": 0.19568411165434357, "grad_norm": 1.3560688495635986, "learning_rate": 8.466940472568705e-07, "loss": 0.9283, "step": 8125 }, { "epoch": 0.19580453264613087, "grad_norm": 2.0308735370635986, "learning_rate": 8.465672852651861e-07, "loss": 0.9829, "step": 8130 }, { "epoch": 0.19592495363791818, "grad_norm": 1.367697834968567, "learning_rate": 8.464405232735016e-07, "loss": 0.9557, "step": 8135 }, { "epoch": 0.19604537462970545, "grad_norm": 1.5483380556106567, "learning_rate": 8.463137612818172e-07, "loss": 0.9502, "step": 8140 }, { "epoch": 0.19616579562149275, "grad_norm": 1.533666729927063, "learning_rate": 8.461869992901328e-07, "loss": 0.9107, "step": 8145 }, { "epoch": 0.19628621661328002, "grad_norm": 1.575505018234253, "learning_rate": 8.460602372984485e-07, "loss": 0.9449, "step": 8150 }, { "epoch": 0.19640663760506732, "grad_norm": 1.605459213256836, "learning_rate": 8.45933475306764e-07, "loss": 0.9894, "step": 8155 }, { "epoch": 0.1965270585968546, "grad_norm": 1.5962629318237305, "learning_rate": 8.458067133150795e-07, "loss": 0.8916, "step": 8160 }, { "epoch": 0.1966474795886419, "grad_norm": 1.4973393678665161, "learning_rate": 8.456799513233952e-07, "loss": 0.9511, "step": 8165 }, { "epoch": 0.19676790058042917, "grad_norm": 1.5095032453536987, "learning_rate": 8.455531893317107e-07, "loss": 0.9788, "step": 8170 }, { "epoch": 0.19688832157221647, "grad_norm": 1.900133490562439, "learning_rate": 8.454264273400264e-07, "loss": 0.9538, "step": 8175 }, { "epoch": 0.19700874256400375, "grad_norm": 1.530402421951294, "learning_rate": 8.452996653483419e-07, "loss": 0.9698, "step": 8180 }, { "epoch": 0.19712916355579105, "grad_norm": 1.551681637763977, "learning_rate": 8.451729033566574e-07, "loss": 0.8823, "step": 8185 }, { "epoch": 0.19724958454757832, "grad_norm": 1.61061429977417, "learning_rate": 8.450461413649731e-07, "loss": 0.9481, "step": 8190 }, { "epoch": 0.19737000553936562, "grad_norm": 1.6920380592346191, "learning_rate": 8.449193793732887e-07, "loss": 0.9234, "step": 8195 }, { "epoch": 0.19749042653115292, "grad_norm": 1.539293885231018, "learning_rate": 8.447926173816043e-07, "loss": 0.9832, "step": 8200 }, { "epoch": 0.1976108475229402, "grad_norm": 1.5610792636871338, "learning_rate": 8.446658553899198e-07, "loss": 0.9322, "step": 8205 }, { "epoch": 0.1977312685147275, "grad_norm": 1.4349342584609985, "learning_rate": 8.445390933982355e-07, "loss": 0.9551, "step": 8210 }, { "epoch": 0.19785168950651477, "grad_norm": 1.6934919357299805, "learning_rate": 8.44412331406551e-07, "loss": 0.9032, "step": 8215 }, { "epoch": 0.19797211049830207, "grad_norm": 1.657364845275879, "learning_rate": 8.442855694148666e-07, "loss": 0.9226, "step": 8220 }, { "epoch": 0.19809253149008935, "grad_norm": 1.3018603324890137, "learning_rate": 8.441588074231823e-07, "loss": 0.9307, "step": 8225 }, { "epoch": 0.19821295248187665, "grad_norm": 1.4067305326461792, "learning_rate": 8.440320454314977e-07, "loss": 0.9819, "step": 8230 }, { "epoch": 0.19833337347366392, "grad_norm": 1.7177774906158447, "learning_rate": 8.439052834398134e-07, "loss": 0.9877, "step": 8235 }, { "epoch": 0.19845379446545122, "grad_norm": 1.5552080869674683, "learning_rate": 8.43778521448129e-07, "loss": 0.9211, "step": 8240 }, { "epoch": 0.1985742154572385, "grad_norm": 1.6212966442108154, "learning_rate": 8.436517594564446e-07, "loss": 0.882, "step": 8245 }, { "epoch": 0.1986946364490258, "grad_norm": 1.4554401636123657, "learning_rate": 8.435249974647601e-07, "loss": 0.9842, "step": 8250 }, { "epoch": 0.19881505744081307, "grad_norm": 1.6188297271728516, "learning_rate": 8.433982354730757e-07, "loss": 0.9164, "step": 8255 }, { "epoch": 0.19893547843260037, "grad_norm": 1.503028154373169, "learning_rate": 8.432714734813913e-07, "loss": 0.9838, "step": 8260 }, { "epoch": 0.19905589942438767, "grad_norm": 1.3847625255584717, "learning_rate": 8.431447114897069e-07, "loss": 0.9386, "step": 8265 }, { "epoch": 0.19917632041617495, "grad_norm": 1.653942346572876, "learning_rate": 8.430179494980226e-07, "loss": 0.9559, "step": 8270 }, { "epoch": 0.19929674140796225, "grad_norm": 1.5789453983306885, "learning_rate": 8.42891187506338e-07, "loss": 0.9565, "step": 8275 }, { "epoch": 0.19941716239974952, "grad_norm": 1.4539376497268677, "learning_rate": 8.427644255146536e-07, "loss": 0.9697, "step": 8280 }, { "epoch": 0.19953758339153682, "grad_norm": 1.746583342552185, "learning_rate": 8.426376635229693e-07, "loss": 1.0047, "step": 8285 }, { "epoch": 0.1996580043833241, "grad_norm": 2.238571882247925, "learning_rate": 8.425109015312848e-07, "loss": 0.9372, "step": 8290 }, { "epoch": 0.1997784253751114, "grad_norm": 1.5720535516738892, "learning_rate": 8.423841395396004e-07, "loss": 0.8936, "step": 8295 }, { "epoch": 0.19989884636689867, "grad_norm": 1.8832333087921143, "learning_rate": 8.42257377547916e-07, "loss": 1.0061, "step": 8300 }, { "epoch": 0.20001926735868597, "grad_norm": 1.635061264038086, "learning_rate": 8.421306155562315e-07, "loss": 0.98, "step": 8305 }, { "epoch": 0.20013968835047324, "grad_norm": 1.7162737846374512, "learning_rate": 8.420038535645472e-07, "loss": 0.9042, "step": 8310 }, { "epoch": 0.20026010934226054, "grad_norm": 1.5249680280685425, "learning_rate": 8.418770915728628e-07, "loss": 0.9422, "step": 8315 }, { "epoch": 0.20038053033404782, "grad_norm": 1.5159083604812622, "learning_rate": 8.417503295811783e-07, "loss": 0.9585, "step": 8320 }, { "epoch": 0.20050095132583512, "grad_norm": 1.547275424003601, "learning_rate": 8.416235675894939e-07, "loss": 0.9712, "step": 8325 }, { "epoch": 0.20062137231762242, "grad_norm": 1.4447563886642456, "learning_rate": 8.414968055978096e-07, "loss": 0.9383, "step": 8330 }, { "epoch": 0.2007417933094097, "grad_norm": 1.4032526016235352, "learning_rate": 8.413700436061251e-07, "loss": 0.987, "step": 8335 }, { "epoch": 0.200862214301197, "grad_norm": 1.5580028295516968, "learning_rate": 8.412432816144407e-07, "loss": 0.8971, "step": 8340 }, { "epoch": 0.20098263529298427, "grad_norm": 1.5366116762161255, "learning_rate": 8.411165196227563e-07, "loss": 0.9864, "step": 8345 }, { "epoch": 0.20110305628477157, "grad_norm": 1.590053677558899, "learning_rate": 8.409897576310718e-07, "loss": 0.9343, "step": 8350 }, { "epoch": 0.20122347727655884, "grad_norm": 1.5261359214782715, "learning_rate": 8.408629956393875e-07, "loss": 0.9035, "step": 8355 }, { "epoch": 0.20134389826834614, "grad_norm": 1.4733154773712158, "learning_rate": 8.407362336477031e-07, "loss": 0.9255, "step": 8360 }, { "epoch": 0.20146431926013342, "grad_norm": 1.9009312391281128, "learning_rate": 8.406094716560185e-07, "loss": 0.979, "step": 8365 }, { "epoch": 0.20158474025192072, "grad_norm": 1.560454249382019, "learning_rate": 8.404827096643342e-07, "loss": 0.9264, "step": 8370 }, { "epoch": 0.201705161243708, "grad_norm": 1.6878447532653809, "learning_rate": 8.403559476726498e-07, "loss": 0.9107, "step": 8375 }, { "epoch": 0.2018255822354953, "grad_norm": 1.3901766538619995, "learning_rate": 8.402291856809654e-07, "loss": 0.9724, "step": 8380 }, { "epoch": 0.20194600322728257, "grad_norm": 1.402201771736145, "learning_rate": 8.40102423689281e-07, "loss": 0.9326, "step": 8385 }, { "epoch": 0.20206642421906987, "grad_norm": 1.4999525547027588, "learning_rate": 8.399756616975966e-07, "loss": 0.983, "step": 8390 }, { "epoch": 0.20218684521085717, "grad_norm": 1.554654598236084, "learning_rate": 8.398488997059121e-07, "loss": 0.9594, "step": 8395 }, { "epoch": 0.20230726620264444, "grad_norm": 1.4805643558502197, "learning_rate": 8.397221377142277e-07, "loss": 0.9716, "step": 8400 }, { "epoch": 0.20242768719443174, "grad_norm": 1.4133025407791138, "learning_rate": 8.395953757225434e-07, "loss": 0.9541, "step": 8405 }, { "epoch": 0.20254810818621902, "grad_norm": 1.8007771968841553, "learning_rate": 8.394686137308588e-07, "loss": 0.9827, "step": 8410 }, { "epoch": 0.20266852917800632, "grad_norm": 1.5795798301696777, "learning_rate": 8.393418517391745e-07, "loss": 0.9792, "step": 8415 }, { "epoch": 0.2027889501697936, "grad_norm": 1.633016586303711, "learning_rate": 8.392150897474901e-07, "loss": 0.9379, "step": 8420 }, { "epoch": 0.2029093711615809, "grad_norm": 1.6277592182159424, "learning_rate": 8.390883277558056e-07, "loss": 0.9478, "step": 8425 }, { "epoch": 0.20302979215336817, "grad_norm": 1.718751311302185, "learning_rate": 8.389615657641213e-07, "loss": 0.944, "step": 8430 }, { "epoch": 0.20315021314515547, "grad_norm": 1.5914260149002075, "learning_rate": 8.388348037724368e-07, "loss": 0.9741, "step": 8435 }, { "epoch": 0.20327063413694274, "grad_norm": 1.4470100402832031, "learning_rate": 8.387080417807524e-07, "loss": 0.9075, "step": 8440 }, { "epoch": 0.20339105512873004, "grad_norm": 1.3547005653381348, "learning_rate": 8.38581279789068e-07, "loss": 0.9711, "step": 8445 }, { "epoch": 0.20351147612051732, "grad_norm": 1.6130958795547485, "learning_rate": 8.384545177973837e-07, "loss": 0.8619, "step": 8450 }, { "epoch": 0.20363189711230462, "grad_norm": 1.448756456375122, "learning_rate": 8.383277558056992e-07, "loss": 0.9311, "step": 8455 }, { "epoch": 0.20375231810409192, "grad_norm": 1.7305628061294556, "learning_rate": 8.382009938140147e-07, "loss": 0.9222, "step": 8460 }, { "epoch": 0.2038727390958792, "grad_norm": 1.4965406656265259, "learning_rate": 8.380742318223304e-07, "loss": 0.956, "step": 8465 }, { "epoch": 0.2039931600876665, "grad_norm": 1.5525379180908203, "learning_rate": 8.379474698306459e-07, "loss": 0.9343, "step": 8470 }, { "epoch": 0.20411358107945377, "grad_norm": 1.8234738111495972, "learning_rate": 8.378207078389616e-07, "loss": 0.9319, "step": 8475 }, { "epoch": 0.20423400207124107, "grad_norm": 1.8297779560089111, "learning_rate": 8.376939458472771e-07, "loss": 0.9049, "step": 8480 }, { "epoch": 0.20435442306302834, "grad_norm": 1.5790259838104248, "learning_rate": 8.375671838555926e-07, "loss": 0.9588, "step": 8485 }, { "epoch": 0.20447484405481564, "grad_norm": 1.7919198274612427, "learning_rate": 8.374404218639083e-07, "loss": 0.9558, "step": 8490 }, { "epoch": 0.20459526504660291, "grad_norm": 1.4658918380737305, "learning_rate": 8.373136598722239e-07, "loss": 0.9135, "step": 8495 }, { "epoch": 0.20471568603839022, "grad_norm": 1.581298828125, "learning_rate": 8.371868978805395e-07, "loss": 0.931, "step": 8500 }, { "epoch": 0.2048361070301775, "grad_norm": 1.5665123462677002, "learning_rate": 8.37060135888855e-07, "loss": 0.9686, "step": 8505 }, { "epoch": 0.2049565280219648, "grad_norm": 1.6349343061447144, "learning_rate": 8.369333738971707e-07, "loss": 0.9041, "step": 8510 }, { "epoch": 0.20507694901375206, "grad_norm": 1.7548354864120483, "learning_rate": 8.368066119054862e-07, "loss": 0.9142, "step": 8515 }, { "epoch": 0.20519737000553936, "grad_norm": 1.7395234107971191, "learning_rate": 8.366798499138018e-07, "loss": 0.8966, "step": 8520 }, { "epoch": 0.20531779099732667, "grad_norm": 1.6489882469177246, "learning_rate": 8.365530879221175e-07, "loss": 0.9579, "step": 8525 }, { "epoch": 0.20543821198911394, "grad_norm": 1.679075837135315, "learning_rate": 8.364263259304329e-07, "loss": 0.9257, "step": 8530 }, { "epoch": 0.20555863298090124, "grad_norm": 1.3792853355407715, "learning_rate": 8.362995639387486e-07, "loss": 0.9361, "step": 8535 }, { "epoch": 0.2056790539726885, "grad_norm": 1.6504968404769897, "learning_rate": 8.361728019470642e-07, "loss": 0.9498, "step": 8540 }, { "epoch": 0.20579947496447581, "grad_norm": 1.645012378692627, "learning_rate": 8.360460399553797e-07, "loss": 0.9963, "step": 8545 }, { "epoch": 0.2059198959562631, "grad_norm": 1.3971391916275024, "learning_rate": 8.359192779636953e-07, "loss": 0.9941, "step": 8550 }, { "epoch": 0.2060403169480504, "grad_norm": 1.675577998161316, "learning_rate": 8.357925159720109e-07, "loss": 0.9227, "step": 8555 }, { "epoch": 0.20616073793983766, "grad_norm": 1.5664169788360596, "learning_rate": 8.356657539803265e-07, "loss": 0.9335, "step": 8560 }, { "epoch": 0.20628115893162496, "grad_norm": 1.7027219533920288, "learning_rate": 8.355389919886421e-07, "loss": 0.961, "step": 8565 }, { "epoch": 0.20640157992341224, "grad_norm": 1.5615417957305908, "learning_rate": 8.354122299969578e-07, "loss": 0.9518, "step": 8570 }, { "epoch": 0.20652200091519954, "grad_norm": 1.8420103788375854, "learning_rate": 8.352854680052732e-07, "loss": 0.9987, "step": 8575 }, { "epoch": 0.20664242190698684, "grad_norm": 1.625141978263855, "learning_rate": 8.351587060135888e-07, "loss": 0.8968, "step": 8580 }, { "epoch": 0.2067628428987741, "grad_norm": 1.6060346364974976, "learning_rate": 8.350319440219045e-07, "loss": 0.9154, "step": 8585 }, { "epoch": 0.20688326389056141, "grad_norm": 1.5125768184661865, "learning_rate": 8.3490518203022e-07, "loss": 0.9727, "step": 8590 }, { "epoch": 0.2070036848823487, "grad_norm": 1.5166090726852417, "learning_rate": 8.347784200385356e-07, "loss": 0.9022, "step": 8595 }, { "epoch": 0.207124105874136, "grad_norm": 1.4007935523986816, "learning_rate": 8.346516580468512e-07, "loss": 0.948, "step": 8600 }, { "epoch": 0.20724452686592326, "grad_norm": 1.7391657829284668, "learning_rate": 8.345248960551667e-07, "loss": 0.8898, "step": 8605 }, { "epoch": 0.20736494785771056, "grad_norm": 1.3972952365875244, "learning_rate": 8.343981340634824e-07, "loss": 0.8994, "step": 8610 }, { "epoch": 0.20748536884949784, "grad_norm": 1.6144566535949707, "learning_rate": 8.34271372071798e-07, "loss": 0.9528, "step": 8615 }, { "epoch": 0.20760578984128514, "grad_norm": 1.5001790523529053, "learning_rate": 8.341446100801135e-07, "loss": 0.9638, "step": 8620 }, { "epoch": 0.2077262108330724, "grad_norm": 1.884637713432312, "learning_rate": 8.340178480884291e-07, "loss": 0.9093, "step": 8625 }, { "epoch": 0.2078466318248597, "grad_norm": 1.5699843168258667, "learning_rate": 8.338910860967447e-07, "loss": 0.973, "step": 8630 }, { "epoch": 0.20796705281664699, "grad_norm": 1.5710140466690063, "learning_rate": 8.337643241050603e-07, "loss": 0.9331, "step": 8635 }, { "epoch": 0.2080874738084343, "grad_norm": 1.4831452369689941, "learning_rate": 8.336375621133759e-07, "loss": 0.9384, "step": 8640 }, { "epoch": 0.2082078948002216, "grad_norm": 1.5688238143920898, "learning_rate": 8.335108001216915e-07, "loss": 0.9716, "step": 8645 }, { "epoch": 0.20832831579200886, "grad_norm": 1.4611543416976929, "learning_rate": 8.33384038130007e-07, "loss": 0.8993, "step": 8650 }, { "epoch": 0.20844873678379616, "grad_norm": 1.4965218305587769, "learning_rate": 8.332572761383227e-07, "loss": 0.9188, "step": 8655 }, { "epoch": 0.20856915777558344, "grad_norm": 1.3796385526657104, "learning_rate": 8.331305141466383e-07, "loss": 0.8892, "step": 8660 }, { "epoch": 0.20868957876737074, "grad_norm": 1.4881573915481567, "learning_rate": 8.330037521549537e-07, "loss": 0.8978, "step": 8665 }, { "epoch": 0.208809999759158, "grad_norm": 1.588682770729065, "learning_rate": 8.328769901632694e-07, "loss": 0.9164, "step": 8670 }, { "epoch": 0.2089304207509453, "grad_norm": 1.5392111539840698, "learning_rate": 8.32750228171585e-07, "loss": 0.9245, "step": 8675 }, { "epoch": 0.20905084174273258, "grad_norm": 2.1696298122406006, "learning_rate": 8.326234661799006e-07, "loss": 0.9343, "step": 8680 }, { "epoch": 0.20917126273451989, "grad_norm": 1.838498830795288, "learning_rate": 8.324967041882162e-07, "loss": 0.9351, "step": 8685 }, { "epoch": 0.20929168372630716, "grad_norm": 1.9327776432037354, "learning_rate": 8.323699421965317e-07, "loss": 0.9452, "step": 8690 }, { "epoch": 0.20941210471809446, "grad_norm": 1.5329734086990356, "learning_rate": 8.322431802048473e-07, "loss": 0.9778, "step": 8695 }, { "epoch": 0.20953252570988173, "grad_norm": 1.5288251638412476, "learning_rate": 8.321164182131629e-07, "loss": 0.9253, "step": 8700 }, { "epoch": 0.20965294670166904, "grad_norm": 1.4873708486557007, "learning_rate": 8.319896562214786e-07, "loss": 0.9109, "step": 8705 }, { "epoch": 0.20977336769345634, "grad_norm": 1.5548856258392334, "learning_rate": 8.31862894229794e-07, "loss": 0.9195, "step": 8710 }, { "epoch": 0.2098937886852436, "grad_norm": 1.4828455448150635, "learning_rate": 8.317361322381096e-07, "loss": 0.881, "step": 8715 }, { "epoch": 0.2100142096770309, "grad_norm": 1.46074378490448, "learning_rate": 8.316093702464253e-07, "loss": 0.9704, "step": 8720 }, { "epoch": 0.21013463066881818, "grad_norm": 1.7580933570861816, "learning_rate": 8.314826082547408e-07, "loss": 0.9254, "step": 8725 }, { "epoch": 0.21025505166060549, "grad_norm": 1.614861011505127, "learning_rate": 8.313558462630565e-07, "loss": 0.9465, "step": 8730 }, { "epoch": 0.21037547265239276, "grad_norm": 1.5930465459823608, "learning_rate": 8.31229084271372e-07, "loss": 0.9391, "step": 8735 }, { "epoch": 0.21049589364418006, "grad_norm": 1.4870792627334595, "learning_rate": 8.311023222796876e-07, "loss": 0.8878, "step": 8740 }, { "epoch": 0.21061631463596733, "grad_norm": 1.4152858257293701, "learning_rate": 8.309755602880032e-07, "loss": 0.9364, "step": 8745 }, { "epoch": 0.21073673562775463, "grad_norm": 1.6814806461334229, "learning_rate": 8.308487982963188e-07, "loss": 0.92, "step": 8750 }, { "epoch": 0.2108571566195419, "grad_norm": 1.6177480220794678, "learning_rate": 8.307220363046344e-07, "loss": 0.9439, "step": 8755 }, { "epoch": 0.2109775776113292, "grad_norm": 1.4960063695907593, "learning_rate": 8.305952743129499e-07, "loss": 0.8827, "step": 8760 }, { "epoch": 0.21109799860311648, "grad_norm": 1.6099573373794556, "learning_rate": 8.304685123212656e-07, "loss": 0.961, "step": 8765 }, { "epoch": 0.21121841959490378, "grad_norm": 1.7052769660949707, "learning_rate": 8.303417503295811e-07, "loss": 0.9841, "step": 8770 }, { "epoch": 0.21133884058669108, "grad_norm": 1.85787034034729, "learning_rate": 8.302149883378968e-07, "loss": 0.93, "step": 8775 }, { "epoch": 0.21145926157847836, "grad_norm": 1.7271051406860352, "learning_rate": 8.300882263462123e-07, "loss": 0.9026, "step": 8780 }, { "epoch": 0.21157968257026566, "grad_norm": 1.6201260089874268, "learning_rate": 8.299614643545278e-07, "loss": 0.9182, "step": 8785 }, { "epoch": 0.21170010356205293, "grad_norm": 2.0843045711517334, "learning_rate": 8.298347023628435e-07, "loss": 1.0306, "step": 8790 }, { "epoch": 0.21182052455384023, "grad_norm": 1.541614055633545, "learning_rate": 8.297079403711591e-07, "loss": 0.8982, "step": 8795 }, { "epoch": 0.2119409455456275, "grad_norm": 1.7830393314361572, "learning_rate": 8.295811783794747e-07, "loss": 0.9132, "step": 8800 }, { "epoch": 0.2120613665374148, "grad_norm": 1.6531665325164795, "learning_rate": 8.294544163877902e-07, "loss": 0.9826, "step": 8805 }, { "epoch": 0.21218178752920208, "grad_norm": 1.539405107498169, "learning_rate": 8.293276543961058e-07, "loss": 0.9498, "step": 8810 }, { "epoch": 0.21230220852098938, "grad_norm": 1.5371087789535522, "learning_rate": 8.292008924044214e-07, "loss": 0.8884, "step": 8815 }, { "epoch": 0.21242262951277666, "grad_norm": 1.4828464984893799, "learning_rate": 8.29074130412737e-07, "loss": 0.9002, "step": 8820 }, { "epoch": 0.21254305050456396, "grad_norm": 1.4968286752700806, "learning_rate": 8.289473684210527e-07, "loss": 0.9189, "step": 8825 }, { "epoch": 0.21266347149635123, "grad_norm": 1.496974229812622, "learning_rate": 8.288206064293681e-07, "loss": 0.951, "step": 8830 }, { "epoch": 0.21278389248813853, "grad_norm": 1.4990928173065186, "learning_rate": 8.286938444376837e-07, "loss": 0.977, "step": 8835 }, { "epoch": 0.21290431347992583, "grad_norm": 1.6481170654296875, "learning_rate": 8.285670824459994e-07, "loss": 0.9582, "step": 8840 }, { "epoch": 0.2130247344717131, "grad_norm": 1.5998990535736084, "learning_rate": 8.284403204543149e-07, "loss": 0.8715, "step": 8845 }, { "epoch": 0.2131451554635004, "grad_norm": 1.5779908895492554, "learning_rate": 8.283135584626305e-07, "loss": 0.9167, "step": 8850 }, { "epoch": 0.21326557645528768, "grad_norm": 1.4358510971069336, "learning_rate": 8.281867964709461e-07, "loss": 0.9238, "step": 8855 }, { "epoch": 0.21338599744707498, "grad_norm": 1.5531116724014282, "learning_rate": 8.280600344792617e-07, "loss": 0.9415, "step": 8860 }, { "epoch": 0.21350641843886226, "grad_norm": 1.5850262641906738, "learning_rate": 8.279332724875773e-07, "loss": 0.9163, "step": 8865 }, { "epoch": 0.21362683943064956, "grad_norm": 1.3992431163787842, "learning_rate": 8.278065104958929e-07, "loss": 0.9199, "step": 8870 }, { "epoch": 0.21374726042243683, "grad_norm": 1.2640268802642822, "learning_rate": 8.276797485042084e-07, "loss": 0.874, "step": 8875 }, { "epoch": 0.21386768141422413, "grad_norm": 1.5796748399734497, "learning_rate": 8.27552986512524e-07, "loss": 0.8909, "step": 8880 }, { "epoch": 0.2139881024060114, "grad_norm": 1.6204979419708252, "learning_rate": 8.274262245208397e-07, "loss": 0.8746, "step": 8885 }, { "epoch": 0.2141085233977987, "grad_norm": 1.566311001777649, "learning_rate": 8.272994625291552e-07, "loss": 0.9035, "step": 8890 }, { "epoch": 0.21422894438958598, "grad_norm": 1.482667326927185, "learning_rate": 8.271727005374707e-07, "loss": 0.9214, "step": 8895 }, { "epoch": 0.21434936538137328, "grad_norm": 1.520044207572937, "learning_rate": 8.270459385457864e-07, "loss": 0.9096, "step": 8900 }, { "epoch": 0.21446978637316058, "grad_norm": 1.5712969303131104, "learning_rate": 8.269191765541019e-07, "loss": 0.9333, "step": 8905 }, { "epoch": 0.21459020736494785, "grad_norm": 1.6662484407424927, "learning_rate": 8.267924145624176e-07, "loss": 0.9577, "step": 8910 }, { "epoch": 0.21471062835673516, "grad_norm": 1.487890362739563, "learning_rate": 8.266656525707332e-07, "loss": 0.9055, "step": 8915 }, { "epoch": 0.21483104934852243, "grad_norm": 1.7255772352218628, "learning_rate": 8.265388905790486e-07, "loss": 0.9385, "step": 8920 }, { "epoch": 0.21495147034030973, "grad_norm": 1.6064256429672241, "learning_rate": 8.264121285873643e-07, "loss": 0.9491, "step": 8925 }, { "epoch": 0.215071891332097, "grad_norm": 1.5699553489685059, "learning_rate": 8.262853665956799e-07, "loss": 0.9202, "step": 8930 }, { "epoch": 0.2151923123238843, "grad_norm": 1.543715238571167, "learning_rate": 8.261586046039955e-07, "loss": 0.9249, "step": 8935 }, { "epoch": 0.21531273331567158, "grad_norm": 1.4051569700241089, "learning_rate": 8.260318426123111e-07, "loss": 0.9502, "step": 8940 }, { "epoch": 0.21543315430745888, "grad_norm": 1.690550446510315, "learning_rate": 8.259050806206267e-07, "loss": 0.8807, "step": 8945 }, { "epoch": 0.21555357529924615, "grad_norm": 1.3593227863311768, "learning_rate": 8.257783186289422e-07, "loss": 0.9861, "step": 8950 }, { "epoch": 0.21567399629103345, "grad_norm": 1.652435064315796, "learning_rate": 8.256515566372578e-07, "loss": 0.9384, "step": 8955 }, { "epoch": 0.21579441728282076, "grad_norm": 1.7051680088043213, "learning_rate": 8.255247946455735e-07, "loss": 0.9455, "step": 8960 }, { "epoch": 0.21591483827460803, "grad_norm": 1.766378402709961, "learning_rate": 8.25398032653889e-07, "loss": 0.9533, "step": 8965 }, { "epoch": 0.21603525926639533, "grad_norm": 1.6049004793167114, "learning_rate": 8.252712706622046e-07, "loss": 0.9435, "step": 8970 }, { "epoch": 0.2161556802581826, "grad_norm": 1.7170352935791016, "learning_rate": 8.251445086705202e-07, "loss": 0.9371, "step": 8975 }, { "epoch": 0.2162761012499699, "grad_norm": 1.506013035774231, "learning_rate": 8.250177466788359e-07, "loss": 0.8851, "step": 8980 }, { "epoch": 0.21639652224175718, "grad_norm": 1.8347394466400146, "learning_rate": 8.248909846871514e-07, "loss": 0.9822, "step": 8985 }, { "epoch": 0.21651694323354448, "grad_norm": 1.7963132858276367, "learning_rate": 8.247642226954669e-07, "loss": 0.9288, "step": 8990 }, { "epoch": 0.21663736422533175, "grad_norm": 1.444070816040039, "learning_rate": 8.246374607037826e-07, "loss": 0.9085, "step": 8995 }, { "epoch": 0.21675778521711905, "grad_norm": 1.5620007514953613, "learning_rate": 8.245106987120981e-07, "loss": 0.9667, "step": 9000 }, { "epoch": 0.21687820620890633, "grad_norm": 1.4637254476547241, "learning_rate": 8.243839367204138e-07, "loss": 0.9519, "step": 9005 }, { "epoch": 0.21699862720069363, "grad_norm": 1.7866486310958862, "learning_rate": 8.242571747287294e-07, "loss": 0.9099, "step": 9010 }, { "epoch": 0.2171190481924809, "grad_norm": 1.5120880603790283, "learning_rate": 8.241304127370448e-07, "loss": 0.889, "step": 9015 }, { "epoch": 0.2172394691842682, "grad_norm": 1.47304105758667, "learning_rate": 8.240036507453605e-07, "loss": 0.9659, "step": 9020 }, { "epoch": 0.2173598901760555, "grad_norm": 1.4226914644241333, "learning_rate": 8.238768887536761e-07, "loss": 0.9371, "step": 9025 }, { "epoch": 0.21748031116784278, "grad_norm": 1.4126009941101074, "learning_rate": 8.237501267619917e-07, "loss": 0.9464, "step": 9030 }, { "epoch": 0.21760073215963008, "grad_norm": 1.5451222658157349, "learning_rate": 8.236233647703072e-07, "loss": 0.8829, "step": 9035 }, { "epoch": 0.21772115315141735, "grad_norm": 1.4735143184661865, "learning_rate": 8.234966027786229e-07, "loss": 0.9358, "step": 9040 }, { "epoch": 0.21784157414320465, "grad_norm": 1.7014250755310059, "learning_rate": 8.233698407869384e-07, "loss": 0.9346, "step": 9045 }, { "epoch": 0.21796199513499193, "grad_norm": 1.54008150100708, "learning_rate": 8.23243078795254e-07, "loss": 0.9538, "step": 9050 }, { "epoch": 0.21808241612677923, "grad_norm": 1.5994731187820435, "learning_rate": 8.231163168035697e-07, "loss": 0.9334, "step": 9055 }, { "epoch": 0.2182028371185665, "grad_norm": 1.5312918424606323, "learning_rate": 8.229895548118851e-07, "loss": 0.9295, "step": 9060 }, { "epoch": 0.2183232581103538, "grad_norm": 1.4426707029342651, "learning_rate": 8.228627928202008e-07, "loss": 0.9179, "step": 9065 }, { "epoch": 0.21844367910214108, "grad_norm": 1.4576219320297241, "learning_rate": 8.227360308285164e-07, "loss": 0.9565, "step": 9070 }, { "epoch": 0.21856410009392838, "grad_norm": 1.8166043758392334, "learning_rate": 8.226092688368319e-07, "loss": 0.9101, "step": 9075 }, { "epoch": 0.21868452108571565, "grad_norm": 1.6293615102767944, "learning_rate": 8.224825068451475e-07, "loss": 0.8985, "step": 9080 }, { "epoch": 0.21880494207750295, "grad_norm": 1.313899278640747, "learning_rate": 8.223557448534631e-07, "loss": 0.9632, "step": 9085 }, { "epoch": 0.21892536306929025, "grad_norm": 1.5151069164276123, "learning_rate": 8.222289828617787e-07, "loss": 0.9532, "step": 9090 }, { "epoch": 0.21904578406107753, "grad_norm": 1.794693946838379, "learning_rate": 8.221022208700943e-07, "loss": 0.9792, "step": 9095 }, { "epoch": 0.21916620505286483, "grad_norm": 1.683070421218872, "learning_rate": 8.2197545887841e-07, "loss": 0.9168, "step": 9100 }, { "epoch": 0.2192866260446521, "grad_norm": 1.560676097869873, "learning_rate": 8.218486968867254e-07, "loss": 0.9712, "step": 9105 }, { "epoch": 0.2194070470364394, "grad_norm": 1.6398364305496216, "learning_rate": 8.21721934895041e-07, "loss": 0.9973, "step": 9110 }, { "epoch": 0.21952746802822667, "grad_norm": 1.6339504718780518, "learning_rate": 8.215951729033567e-07, "loss": 0.876, "step": 9115 }, { "epoch": 0.21964788902001398, "grad_norm": 2.2558040618896484, "learning_rate": 8.214684109116722e-07, "loss": 0.985, "step": 9120 }, { "epoch": 0.21976831001180125, "grad_norm": 1.5891695022583008, "learning_rate": 8.213416489199878e-07, "loss": 0.9204, "step": 9125 }, { "epoch": 0.21988873100358855, "grad_norm": 1.626607894897461, "learning_rate": 8.212148869283034e-07, "loss": 0.9663, "step": 9130 }, { "epoch": 0.22000915199537582, "grad_norm": 1.8692668676376343, "learning_rate": 8.210881249366189e-07, "loss": 0.9775, "step": 9135 }, { "epoch": 0.22012957298716312, "grad_norm": 1.653274416923523, "learning_rate": 8.209613629449346e-07, "loss": 0.9183, "step": 9140 }, { "epoch": 0.2202499939789504, "grad_norm": 1.647321105003357, "learning_rate": 8.208346009532502e-07, "loss": 0.9363, "step": 9145 }, { "epoch": 0.2203704149707377, "grad_norm": 1.404857873916626, "learning_rate": 8.207078389615657e-07, "loss": 0.8812, "step": 9150 }, { "epoch": 0.220490835962525, "grad_norm": 1.4598122835159302, "learning_rate": 8.205810769698813e-07, "loss": 0.9517, "step": 9155 }, { "epoch": 0.22061125695431227, "grad_norm": 1.5059850215911865, "learning_rate": 8.20454314978197e-07, "loss": 0.886, "step": 9160 }, { "epoch": 0.22073167794609957, "grad_norm": 1.731574535369873, "learning_rate": 8.203275529865125e-07, "loss": 0.9316, "step": 9165 }, { "epoch": 0.22085209893788685, "grad_norm": 1.4054409265518188, "learning_rate": 8.202007909948281e-07, "loss": 0.9281, "step": 9170 }, { "epoch": 0.22097251992967415, "grad_norm": 1.78800630569458, "learning_rate": 8.200740290031437e-07, "loss": 0.954, "step": 9175 }, { "epoch": 0.22109294092146142, "grad_norm": 1.5038881301879883, "learning_rate": 8.199472670114592e-07, "loss": 0.9794, "step": 9180 }, { "epoch": 0.22121336191324872, "grad_norm": 1.5534361600875854, "learning_rate": 8.198205050197749e-07, "loss": 0.9235, "step": 9185 }, { "epoch": 0.221333782905036, "grad_norm": 1.6785494089126587, "learning_rate": 8.196937430280905e-07, "loss": 0.9028, "step": 9190 }, { "epoch": 0.2214542038968233, "grad_norm": 1.5974268913269043, "learning_rate": 8.195669810364059e-07, "loss": 0.9303, "step": 9195 }, { "epoch": 0.22157462488861057, "grad_norm": 1.4408422708511353, "learning_rate": 8.194402190447216e-07, "loss": 0.8687, "step": 9200 }, { "epoch": 0.22169504588039787, "grad_norm": 1.5255826711654663, "learning_rate": 8.193134570530372e-07, "loss": 0.9982, "step": 9205 }, { "epoch": 0.22181546687218515, "grad_norm": 1.4327969551086426, "learning_rate": 8.191866950613528e-07, "loss": 0.9542, "step": 9210 }, { "epoch": 0.22193588786397245, "grad_norm": 1.6425248384475708, "learning_rate": 8.190599330696684e-07, "loss": 0.8825, "step": 9215 }, { "epoch": 0.22205630885575975, "grad_norm": 1.54902184009552, "learning_rate": 8.189331710779839e-07, "loss": 0.9572, "step": 9220 }, { "epoch": 0.22217672984754702, "grad_norm": 1.4737131595611572, "learning_rate": 8.188064090862995e-07, "loss": 0.9314, "step": 9225 }, { "epoch": 0.22229715083933432, "grad_norm": 1.696393609046936, "learning_rate": 8.186796470946151e-07, "loss": 0.9247, "step": 9230 }, { "epoch": 0.2224175718311216, "grad_norm": 1.816698431968689, "learning_rate": 8.185528851029308e-07, "loss": 0.9583, "step": 9235 }, { "epoch": 0.2225379928229089, "grad_norm": 1.5856852531433105, "learning_rate": 8.184261231112462e-07, "loss": 0.9045, "step": 9240 }, { "epoch": 0.22265841381469617, "grad_norm": 1.691108226776123, "learning_rate": 8.182993611195619e-07, "loss": 0.9046, "step": 9245 }, { "epoch": 0.22277883480648347, "grad_norm": 1.5667667388916016, "learning_rate": 8.181725991278775e-07, "loss": 0.959, "step": 9250 }, { "epoch": 0.22289925579827075, "grad_norm": 1.6249210834503174, "learning_rate": 8.18045837136193e-07, "loss": 0.982, "step": 9255 }, { "epoch": 0.22301967679005805, "grad_norm": 1.4573348760604858, "learning_rate": 8.179190751445087e-07, "loss": 0.9418, "step": 9260 }, { "epoch": 0.22314009778184532, "grad_norm": 1.4732943773269653, "learning_rate": 8.177923131528242e-07, "loss": 0.9852, "step": 9265 }, { "epoch": 0.22326051877363262, "grad_norm": 1.6084719896316528, "learning_rate": 8.176655511611398e-07, "loss": 0.9526, "step": 9270 }, { "epoch": 0.2233809397654199, "grad_norm": 1.8238649368286133, "learning_rate": 8.175387891694554e-07, "loss": 0.8735, "step": 9275 }, { "epoch": 0.2235013607572072, "grad_norm": 1.6604654788970947, "learning_rate": 8.17412027177771e-07, "loss": 0.9414, "step": 9280 }, { "epoch": 0.2236217817489945, "grad_norm": 1.5664421319961548, "learning_rate": 8.172852651860866e-07, "loss": 0.9376, "step": 9285 }, { "epoch": 0.22374220274078177, "grad_norm": 1.5238959789276123, "learning_rate": 8.171585031944021e-07, "loss": 0.9551, "step": 9290 }, { "epoch": 0.22386262373256907, "grad_norm": 1.7010811567306519, "learning_rate": 8.170317412027178e-07, "loss": 0.9277, "step": 9295 }, { "epoch": 0.22398304472435634, "grad_norm": 1.8692560195922852, "learning_rate": 8.169049792110333e-07, "loss": 0.9838, "step": 9300 }, { "epoch": 0.22410346571614365, "grad_norm": 1.478436827659607, "learning_rate": 8.16778217219349e-07, "loss": 0.9273, "step": 9305 }, { "epoch": 0.22422388670793092, "grad_norm": 1.5085464715957642, "learning_rate": 8.166514552276645e-07, "loss": 0.9042, "step": 9310 }, { "epoch": 0.22434430769971822, "grad_norm": 1.468819260597229, "learning_rate": 8.1652469323598e-07, "loss": 0.9537, "step": 9315 }, { "epoch": 0.2244647286915055, "grad_norm": 1.4283561706542969, "learning_rate": 8.163979312442957e-07, "loss": 0.8855, "step": 9320 }, { "epoch": 0.2245851496832928, "grad_norm": 1.5676054954528809, "learning_rate": 8.162711692526113e-07, "loss": 0.9456, "step": 9325 }, { "epoch": 0.22470557067508007, "grad_norm": 1.615062952041626, "learning_rate": 8.161444072609269e-07, "loss": 0.9407, "step": 9330 }, { "epoch": 0.22482599166686737, "grad_norm": 1.4833568334579468, "learning_rate": 8.160176452692424e-07, "loss": 0.9134, "step": 9335 }, { "epoch": 0.22494641265865464, "grad_norm": 1.451080083847046, "learning_rate": 8.15890883277558e-07, "loss": 0.887, "step": 9340 }, { "epoch": 0.22506683365044194, "grad_norm": 1.5666835308074951, "learning_rate": 8.157641212858736e-07, "loss": 0.958, "step": 9345 }, { "epoch": 0.22518725464222925, "grad_norm": 1.4987585544586182, "learning_rate": 8.156373592941892e-07, "loss": 0.8869, "step": 9350 }, { "epoch": 0.22530767563401652, "grad_norm": 1.6401864290237427, "learning_rate": 8.155105973025049e-07, "loss": 0.9254, "step": 9355 }, { "epoch": 0.22542809662580382, "grad_norm": 3.9416544437408447, "learning_rate": 8.153838353108203e-07, "loss": 0.8688, "step": 9360 }, { "epoch": 0.2255485176175911, "grad_norm": 3.244033098220825, "learning_rate": 8.15257073319136e-07, "loss": 0.8505, "step": 9365 }, { "epoch": 0.2256689386093784, "grad_norm": 1.533593773841858, "learning_rate": 8.151303113274516e-07, "loss": 0.9816, "step": 9370 }, { "epoch": 0.22578935960116567, "grad_norm": 1.6429873704910278, "learning_rate": 8.150035493357671e-07, "loss": 0.992, "step": 9375 }, { "epoch": 0.22590978059295297, "grad_norm": 1.38799250125885, "learning_rate": 8.148767873440827e-07, "loss": 0.9864, "step": 9380 }, { "epoch": 0.22603020158474024, "grad_norm": 1.5810966491699219, "learning_rate": 8.147500253523983e-07, "loss": 0.9026, "step": 9385 }, { "epoch": 0.22615062257652754, "grad_norm": 1.738532543182373, "learning_rate": 8.146232633607139e-07, "loss": 0.9626, "step": 9390 }, { "epoch": 0.22627104356831482, "grad_norm": 1.605966329574585, "learning_rate": 8.144965013690295e-07, "loss": 0.8974, "step": 9395 }, { "epoch": 0.22639146456010212, "grad_norm": 1.7122306823730469, "learning_rate": 8.143697393773451e-07, "loss": 0.9531, "step": 9400 }, { "epoch": 0.22651188555188942, "grad_norm": 1.4698325395584106, "learning_rate": 8.142429773856606e-07, "loss": 0.9264, "step": 9405 }, { "epoch": 0.2266323065436767, "grad_norm": 1.5234861373901367, "learning_rate": 8.141162153939762e-07, "loss": 0.9472, "step": 9410 }, { "epoch": 0.226752727535464, "grad_norm": 1.6981227397918701, "learning_rate": 8.139894534022919e-07, "loss": 0.9419, "step": 9415 }, { "epoch": 0.22687314852725127, "grad_norm": 1.6163617372512817, "learning_rate": 8.138626914106074e-07, "loss": 0.9259, "step": 9420 }, { "epoch": 0.22699356951903857, "grad_norm": 1.4136073589324951, "learning_rate": 8.137359294189229e-07, "loss": 0.9281, "step": 9425 }, { "epoch": 0.22711399051082584, "grad_norm": 1.4144577980041504, "learning_rate": 8.136091674272386e-07, "loss": 0.9359, "step": 9430 }, { "epoch": 0.22723441150261314, "grad_norm": 1.7528362274169922, "learning_rate": 8.134824054355541e-07, "loss": 0.9475, "step": 9435 }, { "epoch": 0.22735483249440042, "grad_norm": 1.4299169778823853, "learning_rate": 8.133556434438698e-07, "loss": 0.9244, "step": 9440 }, { "epoch": 0.22747525348618772, "grad_norm": 1.508216381072998, "learning_rate": 8.132288814521854e-07, "loss": 0.9444, "step": 9445 }, { "epoch": 0.227595674477975, "grad_norm": 1.595263957977295, "learning_rate": 8.131021194605008e-07, "loss": 0.9829, "step": 9450 }, { "epoch": 0.2277160954697623, "grad_norm": 1.7147514820098877, "learning_rate": 8.129753574688165e-07, "loss": 0.9648, "step": 9455 }, { "epoch": 0.22783651646154957, "grad_norm": 1.4773589372634888, "learning_rate": 8.128485954771321e-07, "loss": 0.9089, "step": 9460 }, { "epoch": 0.22795693745333687, "grad_norm": 1.7763646841049194, "learning_rate": 8.127218334854477e-07, "loss": 0.8933, "step": 9465 }, { "epoch": 0.22807735844512417, "grad_norm": 1.7241098880767822, "learning_rate": 8.125950714937633e-07, "loss": 0.9405, "step": 9470 }, { "epoch": 0.22819777943691144, "grad_norm": 1.6956807374954224, "learning_rate": 8.124683095020789e-07, "loss": 0.9229, "step": 9475 }, { "epoch": 0.22831820042869874, "grad_norm": 1.601848840713501, "learning_rate": 8.123415475103944e-07, "loss": 0.9073, "step": 9480 }, { "epoch": 0.22843862142048602, "grad_norm": 1.5310746431350708, "learning_rate": 8.1221478551871e-07, "loss": 0.9413, "step": 9485 }, { "epoch": 0.22855904241227332, "grad_norm": 1.630176305770874, "learning_rate": 8.120880235270257e-07, "loss": 0.9737, "step": 9490 }, { "epoch": 0.2286794634040606, "grad_norm": 1.371692180633545, "learning_rate": 8.119612615353411e-07, "loss": 0.9324, "step": 9495 }, { "epoch": 0.2287998843958479, "grad_norm": 1.6516149044036865, "learning_rate": 8.118344995436568e-07, "loss": 0.9379, "step": 9500 }, { "epoch": 0.22892030538763516, "grad_norm": 1.6909898519515991, "learning_rate": 8.117077375519724e-07, "loss": 0.9307, "step": 9505 }, { "epoch": 0.22904072637942247, "grad_norm": 1.8337432146072388, "learning_rate": 8.11580975560288e-07, "loss": 0.9343, "step": 9510 }, { "epoch": 0.22916114737120974, "grad_norm": 1.6035581827163696, "learning_rate": 8.114542135686036e-07, "loss": 0.9654, "step": 9515 }, { "epoch": 0.22928156836299704, "grad_norm": 1.5184273719787598, "learning_rate": 8.113274515769191e-07, "loss": 0.8989, "step": 9520 }, { "epoch": 0.2294019893547843, "grad_norm": 1.56526780128479, "learning_rate": 8.112006895852347e-07, "loss": 0.9954, "step": 9525 }, { "epoch": 0.22952241034657161, "grad_norm": 1.6989341974258423, "learning_rate": 8.110739275935503e-07, "loss": 0.9337, "step": 9530 }, { "epoch": 0.22964283133835892, "grad_norm": 1.5808727741241455, "learning_rate": 8.10947165601866e-07, "loss": 0.936, "step": 9535 }, { "epoch": 0.2297632523301462, "grad_norm": 1.6222801208496094, "learning_rate": 8.108204036101814e-07, "loss": 0.9086, "step": 9540 }, { "epoch": 0.2298836733219335, "grad_norm": 1.6468894481658936, "learning_rate": 8.10693641618497e-07, "loss": 0.904, "step": 9545 }, { "epoch": 0.23000409431372076, "grad_norm": 1.5979857444763184, "learning_rate": 8.105668796268127e-07, "loss": 0.902, "step": 9550 }, { "epoch": 0.23012451530550806, "grad_norm": 1.7124053239822388, "learning_rate": 8.104401176351282e-07, "loss": 0.9355, "step": 9555 }, { "epoch": 0.23024493629729534, "grad_norm": 1.5121243000030518, "learning_rate": 8.103133556434439e-07, "loss": 0.96, "step": 9560 }, { "epoch": 0.23036535728908264, "grad_norm": 1.6419447660446167, "learning_rate": 8.101865936517594e-07, "loss": 0.913, "step": 9565 }, { "epoch": 0.2304857782808699, "grad_norm": 1.5802937746047974, "learning_rate": 8.100598316600749e-07, "loss": 0.9036, "step": 9570 }, { "epoch": 0.23060619927265721, "grad_norm": 1.6120250225067139, "learning_rate": 8.099330696683906e-07, "loss": 1.0045, "step": 9575 }, { "epoch": 0.2307266202644445, "grad_norm": 1.6308773756027222, "learning_rate": 8.098063076767062e-07, "loss": 0.9348, "step": 9580 }, { "epoch": 0.2308470412562318, "grad_norm": 1.5396705865859985, "learning_rate": 8.096795456850218e-07, "loss": 0.903, "step": 9585 }, { "epoch": 0.23096746224801906, "grad_norm": 1.4627436399459839, "learning_rate": 8.095527836933373e-07, "loss": 0.9531, "step": 9590 }, { "epoch": 0.23108788323980636, "grad_norm": 1.521868109703064, "learning_rate": 8.09426021701653e-07, "loss": 0.8989, "step": 9595 }, { "epoch": 0.23120830423159366, "grad_norm": 1.6635419130325317, "learning_rate": 8.092992597099685e-07, "loss": 0.9897, "step": 9600 }, { "epoch": 0.23132872522338094, "grad_norm": 1.906615972518921, "learning_rate": 8.091724977182841e-07, "loss": 0.9235, "step": 9605 }, { "epoch": 0.23144914621516824, "grad_norm": 1.4365830421447754, "learning_rate": 8.090457357265997e-07, "loss": 0.9304, "step": 9610 }, { "epoch": 0.2315695672069555, "grad_norm": 1.482728362083435, "learning_rate": 8.089189737349152e-07, "loss": 0.9914, "step": 9615 }, { "epoch": 0.2316899881987428, "grad_norm": 1.4797344207763672, "learning_rate": 8.087922117432309e-07, "loss": 0.9271, "step": 9620 }, { "epoch": 0.2318104091905301, "grad_norm": 1.562041163444519, "learning_rate": 8.086654497515465e-07, "loss": 0.9269, "step": 9625 }, { "epoch": 0.2319308301823174, "grad_norm": 1.515610694885254, "learning_rate": 8.08538687759862e-07, "loss": 0.9559, "step": 9630 }, { "epoch": 0.23205125117410466, "grad_norm": 1.6904336214065552, "learning_rate": 8.084119257681776e-07, "loss": 0.9103, "step": 9635 }, { "epoch": 0.23217167216589196, "grad_norm": 1.5302565097808838, "learning_rate": 8.082851637764932e-07, "loss": 0.9336, "step": 9640 }, { "epoch": 0.23229209315767924, "grad_norm": 1.7606114149093628, "learning_rate": 8.081584017848088e-07, "loss": 0.8815, "step": 9645 }, { "epoch": 0.23241251414946654, "grad_norm": 1.400367259979248, "learning_rate": 8.080316397931244e-07, "loss": 0.9513, "step": 9650 }, { "epoch": 0.2325329351412538, "grad_norm": 1.7276761531829834, "learning_rate": 8.079048778014401e-07, "loss": 0.9295, "step": 9655 }, { "epoch": 0.2326533561330411, "grad_norm": 1.7191526889801025, "learning_rate": 8.077781158097555e-07, "loss": 0.8973, "step": 9660 }, { "epoch": 0.2327737771248284, "grad_norm": 1.4676998853683472, "learning_rate": 8.076513538180711e-07, "loss": 0.8989, "step": 9665 }, { "epoch": 0.23289419811661569, "grad_norm": 1.5539352893829346, "learning_rate": 8.075245918263868e-07, "loss": 0.9475, "step": 9670 }, { "epoch": 0.233014619108403, "grad_norm": 1.6929291486740112, "learning_rate": 8.073978298347023e-07, "loss": 0.943, "step": 9675 }, { "epoch": 0.23313504010019026, "grad_norm": 1.5347380638122559, "learning_rate": 8.072710678430179e-07, "loss": 0.9108, "step": 9680 }, { "epoch": 0.23325546109197756, "grad_norm": 1.5121805667877197, "learning_rate": 8.071443058513335e-07, "loss": 0.9501, "step": 9685 }, { "epoch": 0.23337588208376484, "grad_norm": 1.4723517894744873, "learning_rate": 8.07017543859649e-07, "loss": 0.9007, "step": 9690 }, { "epoch": 0.23349630307555214, "grad_norm": 1.4446206092834473, "learning_rate": 8.068907818679647e-07, "loss": 0.95, "step": 9695 }, { "epoch": 0.2336167240673394, "grad_norm": 1.6316677331924438, "learning_rate": 8.067640198762803e-07, "loss": 0.9776, "step": 9700 }, { "epoch": 0.2337371450591267, "grad_norm": 1.6081719398498535, "learning_rate": 8.066372578845958e-07, "loss": 0.9209, "step": 9705 }, { "epoch": 0.23385756605091398, "grad_norm": 1.6020768880844116, "learning_rate": 8.065104958929114e-07, "loss": 0.9266, "step": 9710 }, { "epoch": 0.23397798704270129, "grad_norm": 1.450314998626709, "learning_rate": 8.063837339012271e-07, "loss": 0.9394, "step": 9715 }, { "epoch": 0.23409840803448856, "grad_norm": 1.5475916862487793, "learning_rate": 8.062569719095426e-07, "loss": 0.8923, "step": 9720 }, { "epoch": 0.23421882902627586, "grad_norm": 1.7144578695297241, "learning_rate": 8.061302099178581e-07, "loss": 0.9136, "step": 9725 }, { "epoch": 0.23433925001806316, "grad_norm": 1.5156981945037842, "learning_rate": 8.060034479261738e-07, "loss": 0.9153, "step": 9730 }, { "epoch": 0.23445967100985043, "grad_norm": 1.4762510061264038, "learning_rate": 8.058766859344893e-07, "loss": 0.9274, "step": 9735 }, { "epoch": 0.23458009200163774, "grad_norm": 1.5367364883422852, "learning_rate": 8.05749923942805e-07, "loss": 0.9553, "step": 9740 }, { "epoch": 0.234700512993425, "grad_norm": 1.4681675434112549, "learning_rate": 8.056231619511206e-07, "loss": 0.9167, "step": 9745 }, { "epoch": 0.2348209339852123, "grad_norm": 1.4918434619903564, "learning_rate": 8.05496399959436e-07, "loss": 0.963, "step": 9750 }, { "epoch": 0.23494135497699958, "grad_norm": 1.7512080669403076, "learning_rate": 8.053696379677517e-07, "loss": 0.9161, "step": 9755 }, { "epoch": 0.23506177596878688, "grad_norm": 1.8454680442810059, "learning_rate": 8.052428759760673e-07, "loss": 0.9616, "step": 9760 }, { "epoch": 0.23518219696057416, "grad_norm": 1.601298213005066, "learning_rate": 8.051161139843829e-07, "loss": 0.9286, "step": 9765 }, { "epoch": 0.23530261795236146, "grad_norm": 1.5492949485778809, "learning_rate": 8.049893519926985e-07, "loss": 0.9517, "step": 9770 }, { "epoch": 0.23542303894414873, "grad_norm": 2.002368688583374, "learning_rate": 8.04862590001014e-07, "loss": 0.9469, "step": 9775 }, { "epoch": 0.23554345993593603, "grad_norm": 1.533035159111023, "learning_rate": 8.047358280093296e-07, "loss": 0.9728, "step": 9780 }, { "epoch": 0.2356638809277233, "grad_norm": 1.7091615200042725, "learning_rate": 8.046090660176452e-07, "loss": 0.915, "step": 9785 }, { "epoch": 0.2357843019195106, "grad_norm": 1.8280829191207886, "learning_rate": 8.044823040259609e-07, "loss": 0.9154, "step": 9790 }, { "epoch": 0.2359047229112979, "grad_norm": 1.37148916721344, "learning_rate": 8.043555420342763e-07, "loss": 0.9121, "step": 9795 }, { "epoch": 0.23602514390308518, "grad_norm": 1.4839376211166382, "learning_rate": 8.04228780042592e-07, "loss": 0.8989, "step": 9800 }, { "epoch": 0.23614556489487248, "grad_norm": 1.599485993385315, "learning_rate": 8.041020180509076e-07, "loss": 0.9382, "step": 9805 }, { "epoch": 0.23626598588665976, "grad_norm": 1.343124270439148, "learning_rate": 8.039752560592231e-07, "loss": 0.9264, "step": 9810 }, { "epoch": 0.23638640687844706, "grad_norm": 1.5841120481491089, "learning_rate": 8.038484940675388e-07, "loss": 0.9461, "step": 9815 }, { "epoch": 0.23650682787023433, "grad_norm": 1.5181254148483276, "learning_rate": 8.037217320758543e-07, "loss": 0.9149, "step": 9820 }, { "epoch": 0.23662724886202163, "grad_norm": 1.5661225318908691, "learning_rate": 8.035949700841699e-07, "loss": 0.9626, "step": 9825 }, { "epoch": 0.2367476698538089, "grad_norm": 1.4676679372787476, "learning_rate": 8.034682080924855e-07, "loss": 0.968, "step": 9830 }, { "epoch": 0.2368680908455962, "grad_norm": 1.5806342363357544, "learning_rate": 8.033414461008012e-07, "loss": 0.9607, "step": 9835 }, { "epoch": 0.23698851183738348, "grad_norm": 1.432308316230774, "learning_rate": 8.032146841091166e-07, "loss": 0.9342, "step": 9840 }, { "epoch": 0.23710893282917078, "grad_norm": 1.5676426887512207, "learning_rate": 8.030879221174322e-07, "loss": 0.96, "step": 9845 }, { "epoch": 0.23722935382095808, "grad_norm": 1.8210023641586304, "learning_rate": 8.029611601257479e-07, "loss": 0.9001, "step": 9850 }, { "epoch": 0.23734977481274536, "grad_norm": 1.5412278175354004, "learning_rate": 8.028343981340634e-07, "loss": 0.931, "step": 9855 }, { "epoch": 0.23747019580453266, "grad_norm": 1.582090139389038, "learning_rate": 8.027076361423791e-07, "loss": 0.893, "step": 9860 }, { "epoch": 0.23759061679631993, "grad_norm": 1.480709433555603, "learning_rate": 8.025808741506946e-07, "loss": 0.8721, "step": 9865 }, { "epoch": 0.23771103778810723, "grad_norm": 1.7517716884613037, "learning_rate": 8.024541121590101e-07, "loss": 0.8898, "step": 9870 }, { "epoch": 0.2378314587798945, "grad_norm": 1.5809065103530884, "learning_rate": 8.023273501673258e-07, "loss": 0.9582, "step": 9875 }, { "epoch": 0.2379518797716818, "grad_norm": 1.604315161705017, "learning_rate": 8.022005881756414e-07, "loss": 0.9441, "step": 9880 }, { "epoch": 0.23807230076346908, "grad_norm": 1.5863564014434814, "learning_rate": 8.02073826183957e-07, "loss": 0.9584, "step": 9885 }, { "epoch": 0.23819272175525638, "grad_norm": 1.621246337890625, "learning_rate": 8.019470641922725e-07, "loss": 0.9417, "step": 9890 }, { "epoch": 0.23831314274704365, "grad_norm": 2.1980340480804443, "learning_rate": 8.018203022005881e-07, "loss": 0.9585, "step": 9895 }, { "epoch": 0.23843356373883096, "grad_norm": 1.7182064056396484, "learning_rate": 8.016935402089037e-07, "loss": 0.9086, "step": 9900 }, { "epoch": 0.23855398473061823, "grad_norm": 1.6237313747406006, "learning_rate": 8.015667782172193e-07, "loss": 0.9415, "step": 9905 }, { "epoch": 0.23867440572240553, "grad_norm": 1.5629863739013672, "learning_rate": 8.014400162255349e-07, "loss": 0.8983, "step": 9910 }, { "epoch": 0.23879482671419283, "grad_norm": 1.561966896057129, "learning_rate": 8.013132542338504e-07, "loss": 0.9293, "step": 9915 }, { "epoch": 0.2389152477059801, "grad_norm": 1.643320083618164, "learning_rate": 8.011864922421661e-07, "loss": 0.9716, "step": 9920 }, { "epoch": 0.2390356686977674, "grad_norm": 1.4515066146850586, "learning_rate": 8.010597302504817e-07, "loss": 0.9052, "step": 9925 }, { "epoch": 0.23915608968955468, "grad_norm": 1.4326725006103516, "learning_rate": 8.009329682587972e-07, "loss": 0.9318, "step": 9930 }, { "epoch": 0.23927651068134198, "grad_norm": 1.5089689493179321, "learning_rate": 8.008062062671128e-07, "loss": 0.9239, "step": 9935 }, { "epoch": 0.23939693167312925, "grad_norm": 1.687284231185913, "learning_rate": 8.006794442754284e-07, "loss": 0.9622, "step": 9940 }, { "epoch": 0.23951735266491656, "grad_norm": 1.4424015283584595, "learning_rate": 8.00552682283744e-07, "loss": 0.928, "step": 9945 }, { "epoch": 0.23963777365670383, "grad_norm": 1.5909708738327026, "learning_rate": 8.004259202920596e-07, "loss": 0.9228, "step": 9950 }, { "epoch": 0.23975819464849113, "grad_norm": 1.5277528762817383, "learning_rate": 8.002991583003752e-07, "loss": 0.9105, "step": 9955 }, { "epoch": 0.2398786156402784, "grad_norm": 1.580113172531128, "learning_rate": 8.001723963086907e-07, "loss": 0.9575, "step": 9960 }, { "epoch": 0.2399990366320657, "grad_norm": 1.5940556526184082, "learning_rate": 8.000456343170063e-07, "loss": 0.9305, "step": 9965 }, { "epoch": 0.24011945762385298, "grad_norm": 1.5910226106643677, "learning_rate": 7.99918872325322e-07, "loss": 1.0218, "step": 9970 }, { "epoch": 0.24023987861564028, "grad_norm": 1.6788511276245117, "learning_rate": 7.997921103336375e-07, "loss": 0.9411, "step": 9975 }, { "epoch": 0.24036029960742758, "grad_norm": 1.4881618022918701, "learning_rate": 7.99665348341953e-07, "loss": 0.8998, "step": 9980 }, { "epoch": 0.24048072059921485, "grad_norm": 2.371474266052246, "learning_rate": 7.995385863502687e-07, "loss": 0.8955, "step": 9985 }, { "epoch": 0.24060114159100215, "grad_norm": 1.70774245262146, "learning_rate": 7.994118243585843e-07, "loss": 0.987, "step": 9990 }, { "epoch": 0.24072156258278943, "grad_norm": 1.4561922550201416, "learning_rate": 7.992850623668999e-07, "loss": 0.8907, "step": 9995 }, { "epoch": 0.24084198357457673, "grad_norm": 2.2680671215057373, "learning_rate": 7.991583003752155e-07, "loss": 0.9029, "step": 10000 }, { "epoch": 0.240962404566364, "grad_norm": 1.4901586771011353, "learning_rate": 7.990315383835311e-07, "loss": 0.9067, "step": 10005 }, { "epoch": 0.2410828255581513, "grad_norm": 1.5713847875595093, "learning_rate": 7.989047763918466e-07, "loss": 0.9342, "step": 10010 }, { "epoch": 0.24120324654993858, "grad_norm": 1.7380549907684326, "learning_rate": 7.987780144001622e-07, "loss": 0.9784, "step": 10015 }, { "epoch": 0.24132366754172588, "grad_norm": 1.6125047206878662, "learning_rate": 7.986512524084779e-07, "loss": 0.9062, "step": 10020 }, { "epoch": 0.24144408853351315, "grad_norm": 1.498434066772461, "learning_rate": 7.985244904167933e-07, "loss": 0.9183, "step": 10025 }, { "epoch": 0.24156450952530045, "grad_norm": 1.5576609373092651, "learning_rate": 7.98397728425109e-07, "loss": 0.9168, "step": 10030 }, { "epoch": 0.24168493051708773, "grad_norm": 1.649990439414978, "learning_rate": 7.982709664334246e-07, "loss": 0.972, "step": 10035 }, { "epoch": 0.24180535150887503, "grad_norm": 1.5411405563354492, "learning_rate": 7.981442044417402e-07, "loss": 0.9177, "step": 10040 }, { "epoch": 0.24192577250066233, "grad_norm": 1.6077088117599487, "learning_rate": 7.980174424500558e-07, "loss": 0.8901, "step": 10045 }, { "epoch": 0.2420461934924496, "grad_norm": 1.8830534219741821, "learning_rate": 7.978906804583713e-07, "loss": 0.9117, "step": 10050 }, { "epoch": 0.2421666144842369, "grad_norm": 1.7061887979507446, "learning_rate": 7.977639184666869e-07, "loss": 0.886, "step": 10055 }, { "epoch": 0.24228703547602418, "grad_norm": 1.4365276098251343, "learning_rate": 7.976371564750025e-07, "loss": 0.8753, "step": 10060 }, { "epoch": 0.24240745646781148, "grad_norm": 1.485558032989502, "learning_rate": 7.975103944833182e-07, "loss": 0.9064, "step": 10065 }, { "epoch": 0.24252787745959875, "grad_norm": 1.5110009908676147, "learning_rate": 7.973836324916337e-07, "loss": 0.9667, "step": 10070 }, { "epoch": 0.24264829845138605, "grad_norm": 1.6972064971923828, "learning_rate": 7.972568704999492e-07, "loss": 0.9475, "step": 10075 }, { "epoch": 0.24276871944317333, "grad_norm": 2.102038621902466, "learning_rate": 7.971301085082649e-07, "loss": 0.9135, "step": 10080 }, { "epoch": 0.24288914043496063, "grad_norm": 1.4368513822555542, "learning_rate": 7.970033465165804e-07, "loss": 0.9098, "step": 10085 }, { "epoch": 0.2430095614267479, "grad_norm": 1.52772057056427, "learning_rate": 7.968765845248961e-07, "loss": 0.8988, "step": 10090 }, { "epoch": 0.2431299824185352, "grad_norm": 1.4237383604049683, "learning_rate": 7.967498225332116e-07, "loss": 0.9366, "step": 10095 }, { "epoch": 0.24325040341032247, "grad_norm": 1.4499160051345825, "learning_rate": 7.966230605415271e-07, "loss": 0.9305, "step": 10100 }, { "epoch": 0.24337082440210978, "grad_norm": 1.6376252174377441, "learning_rate": 7.964962985498428e-07, "loss": 0.9434, "step": 10105 }, { "epoch": 0.24349124539389708, "grad_norm": 1.7884248495101929, "learning_rate": 7.963695365581584e-07, "loss": 0.9995, "step": 10110 }, { "epoch": 0.24361166638568435, "grad_norm": 1.5183720588684082, "learning_rate": 7.96242774566474e-07, "loss": 0.9227, "step": 10115 }, { "epoch": 0.24373208737747165, "grad_norm": 1.5874552726745605, "learning_rate": 7.961160125747895e-07, "loss": 0.9153, "step": 10120 }, { "epoch": 0.24385250836925892, "grad_norm": 1.6614394187927246, "learning_rate": 7.959892505831052e-07, "loss": 0.978, "step": 10125 }, { "epoch": 0.24397292936104623, "grad_norm": 1.417792558670044, "learning_rate": 7.958624885914207e-07, "loss": 0.9653, "step": 10130 }, { "epoch": 0.2440933503528335, "grad_norm": 1.610689401626587, "learning_rate": 7.957357265997363e-07, "loss": 0.9511, "step": 10135 }, { "epoch": 0.2442137713446208, "grad_norm": 1.515844702720642, "learning_rate": 7.95608964608052e-07, "loss": 0.9546, "step": 10140 }, { "epoch": 0.24433419233640807, "grad_norm": 1.5553603172302246, "learning_rate": 7.954822026163674e-07, "loss": 0.9197, "step": 10145 }, { "epoch": 0.24445461332819537, "grad_norm": 1.635810375213623, "learning_rate": 7.953554406246831e-07, "loss": 0.8908, "step": 10150 }, { "epoch": 0.24457503431998265, "grad_norm": 1.8756006956100464, "learning_rate": 7.952286786329987e-07, "loss": 0.9145, "step": 10155 }, { "epoch": 0.24469545531176995, "grad_norm": 1.562914490699768, "learning_rate": 7.951019166413142e-07, "loss": 0.9254, "step": 10160 }, { "epoch": 0.24481587630355722, "grad_norm": 1.4989361763000488, "learning_rate": 7.949751546496298e-07, "loss": 0.9285, "step": 10165 }, { "epoch": 0.24493629729534452, "grad_norm": 1.4218214750289917, "learning_rate": 7.948483926579454e-07, "loss": 0.9399, "step": 10170 }, { "epoch": 0.24505671828713183, "grad_norm": 1.5706713199615479, "learning_rate": 7.94721630666261e-07, "loss": 0.8914, "step": 10175 }, { "epoch": 0.2451771392789191, "grad_norm": 1.4235652685165405, "learning_rate": 7.945948686745766e-07, "loss": 0.8733, "step": 10180 }, { "epoch": 0.2452975602707064, "grad_norm": 1.735122561454773, "learning_rate": 7.944681066828923e-07, "loss": 0.9394, "step": 10185 }, { "epoch": 0.24541798126249367, "grad_norm": 1.5530369281768799, "learning_rate": 7.943413446912077e-07, "loss": 0.9979, "step": 10190 }, { "epoch": 0.24553840225428097, "grad_norm": 1.6291742324829102, "learning_rate": 7.942145826995233e-07, "loss": 0.9256, "step": 10195 }, { "epoch": 0.24565882324606825, "grad_norm": 1.5617204904556274, "learning_rate": 7.94087820707839e-07, "loss": 0.9474, "step": 10200 }, { "epoch": 0.24577924423785555, "grad_norm": 1.5999999046325684, "learning_rate": 7.939610587161545e-07, "loss": 0.9339, "step": 10205 }, { "epoch": 0.24589966522964282, "grad_norm": 1.3905305862426758, "learning_rate": 7.938342967244701e-07, "loss": 0.8518, "step": 10210 }, { "epoch": 0.24602008622143012, "grad_norm": 1.569653034210205, "learning_rate": 7.937075347327857e-07, "loss": 0.9487, "step": 10215 }, { "epoch": 0.2461405072132174, "grad_norm": 1.578831434249878, "learning_rate": 7.935807727411012e-07, "loss": 0.9662, "step": 10220 }, { "epoch": 0.2462609282050047, "grad_norm": 1.528179407119751, "learning_rate": 7.934540107494169e-07, "loss": 0.9369, "step": 10225 }, { "epoch": 0.24638134919679197, "grad_norm": 1.5490789413452148, "learning_rate": 7.933272487577325e-07, "loss": 0.9889, "step": 10230 }, { "epoch": 0.24650177018857927, "grad_norm": 1.6813278198242188, "learning_rate": 7.93200486766048e-07, "loss": 0.9001, "step": 10235 }, { "epoch": 0.24662219118036657, "grad_norm": 1.4886800050735474, "learning_rate": 7.930737247743636e-07, "loss": 0.923, "step": 10240 }, { "epoch": 0.24674261217215385, "grad_norm": 1.586333990097046, "learning_rate": 7.929469627826793e-07, "loss": 0.951, "step": 10245 }, { "epoch": 0.24686303316394115, "grad_norm": 1.4950419664382935, "learning_rate": 7.928202007909948e-07, "loss": 0.9323, "step": 10250 }, { "epoch": 0.24698345415572842, "grad_norm": 1.5455451011657715, "learning_rate": 7.926934387993104e-07, "loss": 0.9356, "step": 10255 }, { "epoch": 0.24710387514751572, "grad_norm": 1.5218327045440674, "learning_rate": 7.92566676807626e-07, "loss": 0.9828, "step": 10260 }, { "epoch": 0.247224296139303, "grad_norm": 1.6908859014511108, "learning_rate": 7.924399148159415e-07, "loss": 0.9144, "step": 10265 }, { "epoch": 0.2473447171310903, "grad_norm": 1.6348868608474731, "learning_rate": 7.923131528242572e-07, "loss": 0.9528, "step": 10270 }, { "epoch": 0.24746513812287757, "grad_norm": 1.3725874423980713, "learning_rate": 7.921863908325728e-07, "loss": 0.8838, "step": 10275 }, { "epoch": 0.24758555911466487, "grad_norm": 1.5570913553237915, "learning_rate": 7.920596288408882e-07, "loss": 0.9295, "step": 10280 }, { "epoch": 0.24770598010645214, "grad_norm": 1.600831151008606, "learning_rate": 7.919328668492039e-07, "loss": 0.9013, "step": 10285 }, { "epoch": 0.24782640109823945, "grad_norm": 1.507856011390686, "learning_rate": 7.918061048575195e-07, "loss": 0.9489, "step": 10290 }, { "epoch": 0.24794682209002675, "grad_norm": 1.6118507385253906, "learning_rate": 7.916793428658351e-07, "loss": 0.9631, "step": 10295 }, { "epoch": 0.24806724308181402, "grad_norm": 1.5403980016708374, "learning_rate": 7.915525808741507e-07, "loss": 0.8982, "step": 10300 }, { "epoch": 0.24818766407360132, "grad_norm": 1.4400405883789062, "learning_rate": 7.914258188824663e-07, "loss": 0.8841, "step": 10305 }, { "epoch": 0.2483080850653886, "grad_norm": 1.6470706462860107, "learning_rate": 7.912990568907818e-07, "loss": 0.895, "step": 10310 }, { "epoch": 0.2484285060571759, "grad_norm": 1.5725401639938354, "learning_rate": 7.911722948990974e-07, "loss": 0.9149, "step": 10315 }, { "epoch": 0.24854892704896317, "grad_norm": 1.4755620956420898, "learning_rate": 7.910455329074131e-07, "loss": 0.9526, "step": 10320 }, { "epoch": 0.24866934804075047, "grad_norm": 1.624035120010376, "learning_rate": 7.909187709157285e-07, "loss": 0.9191, "step": 10325 }, { "epoch": 0.24878976903253774, "grad_norm": 1.6740853786468506, "learning_rate": 7.907920089240442e-07, "loss": 0.8927, "step": 10330 }, { "epoch": 0.24891019002432505, "grad_norm": 1.940212607383728, "learning_rate": 7.906652469323598e-07, "loss": 0.9221, "step": 10335 }, { "epoch": 0.24903061101611232, "grad_norm": 1.5053337812423706, "learning_rate": 7.905384849406753e-07, "loss": 0.8715, "step": 10340 }, { "epoch": 0.24915103200789962, "grad_norm": 1.6379318237304688, "learning_rate": 7.90411722948991e-07, "loss": 0.9527, "step": 10345 }, { "epoch": 0.2492714529996869, "grad_norm": 1.5555362701416016, "learning_rate": 7.902849609573065e-07, "loss": 0.9096, "step": 10350 }, { "epoch": 0.2493918739914742, "grad_norm": 1.6231192350387573, "learning_rate": 7.901581989656221e-07, "loss": 0.9344, "step": 10355 }, { "epoch": 0.2495122949832615, "grad_norm": 2.2351765632629395, "learning_rate": 7.900314369739377e-07, "loss": 0.9563, "step": 10360 }, { "epoch": 0.24963271597504877, "grad_norm": 1.3904240131378174, "learning_rate": 7.899046749822534e-07, "loss": 0.9499, "step": 10365 }, { "epoch": 0.24975313696683607, "grad_norm": 2.1326162815093994, "learning_rate": 7.897779129905689e-07, "loss": 0.922, "step": 10370 }, { "epoch": 0.24987355795862334, "grad_norm": 1.416149377822876, "learning_rate": 7.896511509988844e-07, "loss": 0.9077, "step": 10375 }, { "epoch": 0.24999397895041064, "grad_norm": 1.636558175086975, "learning_rate": 7.895243890072001e-07, "loss": 0.9489, "step": 10380 }, { "epoch": 0.2501143999421979, "grad_norm": 1.5372464656829834, "learning_rate": 7.893976270155156e-07, "loss": 0.9633, "step": 10385 }, { "epoch": 0.2502348209339852, "grad_norm": 1.4980571269989014, "learning_rate": 7.892708650238313e-07, "loss": 0.9452, "step": 10390 }, { "epoch": 0.2503552419257725, "grad_norm": 1.3831257820129395, "learning_rate": 7.891441030321468e-07, "loss": 0.9252, "step": 10395 }, { "epoch": 0.2504756629175598, "grad_norm": 1.4749293327331543, "learning_rate": 7.890173410404623e-07, "loss": 0.9759, "step": 10400 }, { "epoch": 0.25059608390934707, "grad_norm": 1.7515690326690674, "learning_rate": 7.88890579048778e-07, "loss": 1.0089, "step": 10405 }, { "epoch": 0.25071650490113434, "grad_norm": 1.518198013305664, "learning_rate": 7.887638170570936e-07, "loss": 0.9502, "step": 10410 }, { "epoch": 0.25083692589292167, "grad_norm": 1.4665954113006592, "learning_rate": 7.886370550654092e-07, "loss": 0.9368, "step": 10415 }, { "epoch": 0.25095734688470894, "grad_norm": 1.6156924962997437, "learning_rate": 7.885102930737247e-07, "loss": 0.9023, "step": 10420 }, { "epoch": 0.2510777678764962, "grad_norm": 1.5096485614776611, "learning_rate": 7.883835310820403e-07, "loss": 0.9287, "step": 10425 }, { "epoch": 0.25119818886828355, "grad_norm": 1.555070400238037, "learning_rate": 7.882567690903559e-07, "loss": 0.9033, "step": 10430 }, { "epoch": 0.2513186098600708, "grad_norm": 1.5974732637405396, "learning_rate": 7.881300070986715e-07, "loss": 0.9553, "step": 10435 }, { "epoch": 0.2514390308518581, "grad_norm": 1.684948205947876, "learning_rate": 7.880032451069872e-07, "loss": 0.9587, "step": 10440 }, { "epoch": 0.25155945184364537, "grad_norm": 1.6145825386047363, "learning_rate": 7.878764831153026e-07, "loss": 0.9151, "step": 10445 }, { "epoch": 0.2516798728354327, "grad_norm": 1.5612765550613403, "learning_rate": 7.877497211236183e-07, "loss": 0.9207, "step": 10450 }, { "epoch": 0.25180029382721997, "grad_norm": 1.639469027519226, "learning_rate": 7.876229591319339e-07, "loss": 0.9319, "step": 10455 }, { "epoch": 0.25192071481900724, "grad_norm": 1.57554292678833, "learning_rate": 7.874961971402494e-07, "loss": 0.8741, "step": 10460 }, { "epoch": 0.2520411358107945, "grad_norm": 1.6669059991836548, "learning_rate": 7.87369435148565e-07, "loss": 0.9171, "step": 10465 }, { "epoch": 0.25216155680258184, "grad_norm": 1.6609137058258057, "learning_rate": 7.872426731568806e-07, "loss": 0.947, "step": 10470 }, { "epoch": 0.2522819777943691, "grad_norm": 1.5579173564910889, "learning_rate": 7.871159111651962e-07, "loss": 0.9089, "step": 10475 }, { "epoch": 0.2524023987861564, "grad_norm": 1.6061656475067139, "learning_rate": 7.869891491735118e-07, "loss": 0.9144, "step": 10480 }, { "epoch": 0.25252281977794366, "grad_norm": 1.5641448497772217, "learning_rate": 7.868623871818275e-07, "loss": 0.9066, "step": 10485 }, { "epoch": 0.252643240769731, "grad_norm": 1.3542393445968628, "learning_rate": 7.867356251901429e-07, "loss": 0.9641, "step": 10490 }, { "epoch": 0.25276366176151827, "grad_norm": 1.599341869354248, "learning_rate": 7.866088631984585e-07, "loss": 0.8958, "step": 10495 }, { "epoch": 0.25288408275330554, "grad_norm": 1.5220602750778198, "learning_rate": 7.864821012067742e-07, "loss": 0.9044, "step": 10500 }, { "epoch": 0.25300450374509287, "grad_norm": 1.562246561050415, "learning_rate": 7.863553392150897e-07, "loss": 0.8578, "step": 10505 }, { "epoch": 0.25312492473688014, "grad_norm": 1.6219570636749268, "learning_rate": 7.862285772234053e-07, "loss": 0.9255, "step": 10510 }, { "epoch": 0.2532453457286674, "grad_norm": 1.7990187406539917, "learning_rate": 7.861018152317209e-07, "loss": 0.9217, "step": 10515 }, { "epoch": 0.2533657667204547, "grad_norm": 1.477607250213623, "learning_rate": 7.859750532400364e-07, "loss": 0.9559, "step": 10520 }, { "epoch": 0.253486187712242, "grad_norm": 1.5525449514389038, "learning_rate": 7.858482912483521e-07, "loss": 0.8984, "step": 10525 }, { "epoch": 0.2536066087040293, "grad_norm": 1.4910857677459717, "learning_rate": 7.857215292566677e-07, "loss": 0.8982, "step": 10530 }, { "epoch": 0.25372702969581656, "grad_norm": 1.5159627199172974, "learning_rate": 7.855947672649832e-07, "loss": 0.9465, "step": 10535 }, { "epoch": 0.25384745068760384, "grad_norm": 1.797110676765442, "learning_rate": 7.854680052732988e-07, "loss": 0.9194, "step": 10540 }, { "epoch": 0.25396787167939117, "grad_norm": 1.5768470764160156, "learning_rate": 7.853412432816144e-07, "loss": 0.9684, "step": 10545 }, { "epoch": 0.25408829267117844, "grad_norm": 1.5663419961929321, "learning_rate": 7.8521448128993e-07, "loss": 0.9466, "step": 10550 }, { "epoch": 0.2542087136629657, "grad_norm": 1.6284027099609375, "learning_rate": 7.850877192982455e-07, "loss": 0.9652, "step": 10555 }, { "epoch": 0.25432913465475304, "grad_norm": 1.629702091217041, "learning_rate": 7.849609573065612e-07, "loss": 0.9266, "step": 10560 }, { "epoch": 0.2544495556465403, "grad_norm": 1.8558638095855713, "learning_rate": 7.848341953148767e-07, "loss": 0.9305, "step": 10565 }, { "epoch": 0.2545699766383276, "grad_norm": 1.5336354970932007, "learning_rate": 7.847074333231924e-07, "loss": 0.9098, "step": 10570 }, { "epoch": 0.25469039763011486, "grad_norm": 1.4641679525375366, "learning_rate": 7.84580671331508e-07, "loss": 0.9908, "step": 10575 }, { "epoch": 0.2548108186219022, "grad_norm": 1.5101847648620605, "learning_rate": 7.844539093398234e-07, "loss": 0.9182, "step": 10580 }, { "epoch": 0.25493123961368946, "grad_norm": 1.5551209449768066, "learning_rate": 7.843271473481391e-07, "loss": 0.9541, "step": 10585 }, { "epoch": 0.25505166060547674, "grad_norm": 1.5243884325027466, "learning_rate": 7.842003853564547e-07, "loss": 0.9704, "step": 10590 }, { "epoch": 0.255172081597264, "grad_norm": 1.464455485343933, "learning_rate": 7.840736233647703e-07, "loss": 0.8812, "step": 10595 }, { "epoch": 0.25529250258905134, "grad_norm": 1.4903156757354736, "learning_rate": 7.839468613730859e-07, "loss": 0.8868, "step": 10600 }, { "epoch": 0.2554129235808386, "grad_norm": 1.581946611404419, "learning_rate": 7.838200993814014e-07, "loss": 0.8508, "step": 10605 }, { "epoch": 0.2555333445726259, "grad_norm": 1.4839967489242554, "learning_rate": 7.83693337389717e-07, "loss": 0.9232, "step": 10610 }, { "epoch": 0.2556537655644132, "grad_norm": 1.7015328407287598, "learning_rate": 7.835665753980326e-07, "loss": 0.9166, "step": 10615 }, { "epoch": 0.2557741865562005, "grad_norm": 1.6252477169036865, "learning_rate": 7.834398134063483e-07, "loss": 0.9375, "step": 10620 }, { "epoch": 0.25589460754798776, "grad_norm": 1.5471183061599731, "learning_rate": 7.833130514146637e-07, "loss": 0.918, "step": 10625 }, { "epoch": 0.25601502853977504, "grad_norm": 1.4048386812210083, "learning_rate": 7.831862894229793e-07, "loss": 0.9263, "step": 10630 }, { "epoch": 0.25613544953156236, "grad_norm": 1.5877938270568848, "learning_rate": 7.83059527431295e-07, "loss": 0.9965, "step": 10635 }, { "epoch": 0.25625587052334964, "grad_norm": 1.8080618381500244, "learning_rate": 7.829327654396105e-07, "loss": 0.9158, "step": 10640 }, { "epoch": 0.2563762915151369, "grad_norm": 1.533905267715454, "learning_rate": 7.828060034479262e-07, "loss": 0.9173, "step": 10645 }, { "epoch": 0.2564967125069242, "grad_norm": 1.73739492893219, "learning_rate": 7.826792414562417e-07, "loss": 0.9389, "step": 10650 }, { "epoch": 0.2566171334987115, "grad_norm": 1.486688494682312, "learning_rate": 7.825524794645573e-07, "loss": 0.959, "step": 10655 }, { "epoch": 0.2567375544904988, "grad_norm": 1.5035080909729004, "learning_rate": 7.824257174728729e-07, "loss": 0.9039, "step": 10660 }, { "epoch": 0.25685797548228606, "grad_norm": 1.618054986000061, "learning_rate": 7.822989554811885e-07, "loss": 0.9412, "step": 10665 }, { "epoch": 0.25697839647407333, "grad_norm": 1.6609909534454346, "learning_rate": 7.82172193489504e-07, "loss": 0.8911, "step": 10670 }, { "epoch": 0.25709881746586066, "grad_norm": 1.6681872606277466, "learning_rate": 7.820454314978196e-07, "loss": 0.9248, "step": 10675 }, { "epoch": 0.25721923845764794, "grad_norm": 1.5016465187072754, "learning_rate": 7.819186695061353e-07, "loss": 0.9441, "step": 10680 }, { "epoch": 0.2573396594494352, "grad_norm": 1.589369297027588, "learning_rate": 7.817919075144508e-07, "loss": 0.9639, "step": 10685 }, { "epoch": 0.25746008044122254, "grad_norm": 1.712123155593872, "learning_rate": 7.816651455227664e-07, "loss": 0.9627, "step": 10690 }, { "epoch": 0.2575805014330098, "grad_norm": 1.4570205211639404, "learning_rate": 7.81538383531082e-07, "loss": 0.8918, "step": 10695 }, { "epoch": 0.2577009224247971, "grad_norm": 1.4842736721038818, "learning_rate": 7.814116215393975e-07, "loss": 0.9268, "step": 10700 }, { "epoch": 0.25782134341658436, "grad_norm": 1.4311498403549194, "learning_rate": 7.812848595477132e-07, "loss": 0.9513, "step": 10705 }, { "epoch": 0.2579417644083717, "grad_norm": 1.791871190071106, "learning_rate": 7.811580975560288e-07, "loss": 0.9687, "step": 10710 }, { "epoch": 0.25806218540015896, "grad_norm": 1.471676230430603, "learning_rate": 7.810313355643444e-07, "loss": 0.9213, "step": 10715 }, { "epoch": 0.25818260639194623, "grad_norm": 1.6426857709884644, "learning_rate": 7.809045735726599e-07, "loss": 0.9445, "step": 10720 }, { "epoch": 0.2583030273837335, "grad_norm": 1.7955909967422485, "learning_rate": 7.807778115809755e-07, "loss": 0.8952, "step": 10725 }, { "epoch": 0.25842344837552084, "grad_norm": 1.5316821336746216, "learning_rate": 7.806510495892911e-07, "loss": 0.8781, "step": 10730 }, { "epoch": 0.2585438693673081, "grad_norm": 1.5494688749313354, "learning_rate": 7.805242875976067e-07, "loss": 0.9542, "step": 10735 }, { "epoch": 0.2586642903590954, "grad_norm": 1.5220109224319458, "learning_rate": 7.803975256059223e-07, "loss": 0.9104, "step": 10740 }, { "epoch": 0.2587847113508827, "grad_norm": 1.5227622985839844, "learning_rate": 7.802707636142378e-07, "loss": 0.9549, "step": 10745 }, { "epoch": 0.25890513234267, "grad_norm": 1.5300692319869995, "learning_rate": 7.801440016225534e-07, "loss": 0.9111, "step": 10750 }, { "epoch": 0.25902555333445726, "grad_norm": 1.4417011737823486, "learning_rate": 7.800172396308691e-07, "loss": 0.8647, "step": 10755 }, { "epoch": 0.25914597432624453, "grad_norm": 1.893277645111084, "learning_rate": 7.798904776391846e-07, "loss": 0.9442, "step": 10760 }, { "epoch": 0.25926639531803186, "grad_norm": 1.6196260452270508, "learning_rate": 7.797637156475002e-07, "loss": 0.9151, "step": 10765 }, { "epoch": 0.25938681630981913, "grad_norm": 1.7305158376693726, "learning_rate": 7.796369536558158e-07, "loss": 0.9412, "step": 10770 }, { "epoch": 0.2595072373016064, "grad_norm": 1.6203160285949707, "learning_rate": 7.795101916641314e-07, "loss": 0.9353, "step": 10775 }, { "epoch": 0.2596276582933937, "grad_norm": 1.3651759624481201, "learning_rate": 7.79383429672447e-07, "loss": 0.9142, "step": 10780 }, { "epoch": 0.259748079285181, "grad_norm": 1.571115493774414, "learning_rate": 7.792566676807626e-07, "loss": 0.9168, "step": 10785 }, { "epoch": 0.2598685002769683, "grad_norm": 1.6559398174285889, "learning_rate": 7.791299056890781e-07, "loss": 0.9628, "step": 10790 }, { "epoch": 0.25998892126875556, "grad_norm": 1.8212372064590454, "learning_rate": 7.790031436973937e-07, "loss": 0.9273, "step": 10795 }, { "epoch": 0.26010934226054283, "grad_norm": 1.6014947891235352, "learning_rate": 7.788763817057094e-07, "loss": 0.9179, "step": 10800 }, { "epoch": 0.26022976325233016, "grad_norm": 2.0242059230804443, "learning_rate": 7.787496197140249e-07, "loss": 0.9219, "step": 10805 }, { "epoch": 0.26035018424411743, "grad_norm": 1.4445858001708984, "learning_rate": 7.786228577223404e-07, "loss": 0.9384, "step": 10810 }, { "epoch": 0.2604706052359047, "grad_norm": 1.4920601844787598, "learning_rate": 7.784960957306561e-07, "loss": 0.9833, "step": 10815 }, { "epoch": 0.26059102622769204, "grad_norm": 1.5067740678787231, "learning_rate": 7.783693337389716e-07, "loss": 0.8776, "step": 10820 }, { "epoch": 0.2607114472194793, "grad_norm": 1.4836264848709106, "learning_rate": 7.782425717472873e-07, "loss": 0.9002, "step": 10825 }, { "epoch": 0.2608318682112666, "grad_norm": 1.4572845697402954, "learning_rate": 7.781158097556029e-07, "loss": 0.9199, "step": 10830 }, { "epoch": 0.26095228920305386, "grad_norm": 1.5106346607208252, "learning_rate": 7.779890477639183e-07, "loss": 0.8979, "step": 10835 }, { "epoch": 0.2610727101948412, "grad_norm": 1.5475988388061523, "learning_rate": 7.77862285772234e-07, "loss": 0.9266, "step": 10840 }, { "epoch": 0.26119313118662846, "grad_norm": 1.5073845386505127, "learning_rate": 7.777355237805496e-07, "loss": 0.9067, "step": 10845 }, { "epoch": 0.26131355217841573, "grad_norm": 1.6924643516540527, "learning_rate": 7.776087617888652e-07, "loss": 0.9067, "step": 10850 }, { "epoch": 0.261433973170203, "grad_norm": 1.3849414587020874, "learning_rate": 7.774819997971807e-07, "loss": 0.9451, "step": 10855 }, { "epoch": 0.26155439416199033, "grad_norm": 1.6200286149978638, "learning_rate": 7.773552378054964e-07, "loss": 0.9167, "step": 10860 }, { "epoch": 0.2616748151537776, "grad_norm": 1.5841484069824219, "learning_rate": 7.772284758138119e-07, "loss": 0.9103, "step": 10865 }, { "epoch": 0.2617952361455649, "grad_norm": 1.4382582902908325, "learning_rate": 7.771017138221275e-07, "loss": 0.9266, "step": 10870 }, { "epoch": 0.2619156571373522, "grad_norm": 1.5426464080810547, "learning_rate": 7.769749518304432e-07, "loss": 0.8977, "step": 10875 }, { "epoch": 0.2620360781291395, "grad_norm": 3.087616443634033, "learning_rate": 7.768481898387586e-07, "loss": 0.9505, "step": 10880 }, { "epoch": 0.26215649912092676, "grad_norm": 1.3389363288879395, "learning_rate": 7.767214278470743e-07, "loss": 0.9173, "step": 10885 }, { "epoch": 0.26227692011271403, "grad_norm": 1.723882794380188, "learning_rate": 7.765946658553899e-07, "loss": 0.9164, "step": 10890 }, { "epoch": 0.26239734110450136, "grad_norm": 1.563045620918274, "learning_rate": 7.764679038637054e-07, "loss": 0.8643, "step": 10895 }, { "epoch": 0.26251776209628863, "grad_norm": 1.4408845901489258, "learning_rate": 7.763411418720211e-07, "loss": 0.9516, "step": 10900 }, { "epoch": 0.2626381830880759, "grad_norm": 1.5494115352630615, "learning_rate": 7.762143798803366e-07, "loss": 0.9362, "step": 10905 }, { "epoch": 0.2627586040798632, "grad_norm": 1.701547384262085, "learning_rate": 7.760876178886522e-07, "loss": 0.9754, "step": 10910 }, { "epoch": 0.2628790250716505, "grad_norm": 1.7622368335723877, "learning_rate": 7.759608558969678e-07, "loss": 0.9356, "step": 10915 }, { "epoch": 0.2629994460634378, "grad_norm": 1.4967761039733887, "learning_rate": 7.758340939052835e-07, "loss": 0.9762, "step": 10920 }, { "epoch": 0.26311986705522505, "grad_norm": 1.4345545768737793, "learning_rate": 7.757073319135989e-07, "loss": 0.9089, "step": 10925 }, { "epoch": 0.2632402880470124, "grad_norm": 1.3745805025100708, "learning_rate": 7.755805699219145e-07, "loss": 0.9402, "step": 10930 }, { "epoch": 0.26336070903879966, "grad_norm": 1.5181835889816284, "learning_rate": 7.754538079302302e-07, "loss": 0.9539, "step": 10935 }, { "epoch": 0.26348113003058693, "grad_norm": 1.359045386314392, "learning_rate": 7.753270459385457e-07, "loss": 0.8787, "step": 10940 }, { "epoch": 0.2636015510223742, "grad_norm": 1.57675302028656, "learning_rate": 7.752002839468614e-07, "loss": 0.9159, "step": 10945 }, { "epoch": 0.26372197201416153, "grad_norm": 1.686819314956665, "learning_rate": 7.750735219551769e-07, "loss": 0.9341, "step": 10950 }, { "epoch": 0.2638423930059488, "grad_norm": 1.331228256225586, "learning_rate": 7.749467599634924e-07, "loss": 0.9419, "step": 10955 }, { "epoch": 0.2639628139977361, "grad_norm": 1.553196907043457, "learning_rate": 7.748199979718081e-07, "loss": 0.9446, "step": 10960 }, { "epoch": 0.26408323498952335, "grad_norm": 1.725075125694275, "learning_rate": 7.746932359801237e-07, "loss": 0.9115, "step": 10965 }, { "epoch": 0.2642036559813107, "grad_norm": 1.7521941661834717, "learning_rate": 7.745664739884392e-07, "loss": 0.9618, "step": 10970 }, { "epoch": 0.26432407697309795, "grad_norm": 1.7503281831741333, "learning_rate": 7.744397119967548e-07, "loss": 0.9227, "step": 10975 }, { "epoch": 0.26444449796488523, "grad_norm": 1.615695595741272, "learning_rate": 7.743129500050705e-07, "loss": 0.9125, "step": 10980 }, { "epoch": 0.2645649189566725, "grad_norm": 1.4781994819641113, "learning_rate": 7.74186188013386e-07, "loss": 0.9136, "step": 10985 }, { "epoch": 0.26468533994845983, "grad_norm": 1.5761228799819946, "learning_rate": 7.740594260217016e-07, "loss": 0.9014, "step": 10990 }, { "epoch": 0.2648057609402471, "grad_norm": 1.7451871633529663, "learning_rate": 7.739326640300172e-07, "loss": 0.9402, "step": 10995 }, { "epoch": 0.2649261819320344, "grad_norm": 1.504990577697754, "learning_rate": 7.738059020383327e-07, "loss": 0.9071, "step": 11000 }, { "epoch": 0.2650466029238217, "grad_norm": 1.5758501291275024, "learning_rate": 7.736791400466484e-07, "loss": 0.9766, "step": 11005 }, { "epoch": 0.265167023915609, "grad_norm": 1.4429376125335693, "learning_rate": 7.73552378054964e-07, "loss": 0.9063, "step": 11010 }, { "epoch": 0.26528744490739625, "grad_norm": 1.5816013813018799, "learning_rate": 7.734256160632797e-07, "loss": 0.9162, "step": 11015 }, { "epoch": 0.2654078658991835, "grad_norm": 1.6807634830474854, "learning_rate": 7.732988540715951e-07, "loss": 0.938, "step": 11020 }, { "epoch": 0.26552828689097085, "grad_norm": 1.447342038154602, "learning_rate": 7.731720920799107e-07, "loss": 0.9361, "step": 11025 }, { "epoch": 0.26564870788275813, "grad_norm": 1.5453587770462036, "learning_rate": 7.730453300882264e-07, "loss": 0.9549, "step": 11030 }, { "epoch": 0.2657691288745454, "grad_norm": 1.658867597579956, "learning_rate": 7.729185680965419e-07, "loss": 0.9225, "step": 11035 }, { "epoch": 0.2658895498663327, "grad_norm": 1.4685916900634766, "learning_rate": 7.727918061048575e-07, "loss": 0.8776, "step": 11040 }, { "epoch": 0.26600997085812, "grad_norm": 1.5075732469558716, "learning_rate": 7.726650441131731e-07, "loss": 0.9686, "step": 11045 }, { "epoch": 0.2661303918499073, "grad_norm": 1.5110318660736084, "learning_rate": 7.725382821214886e-07, "loss": 0.9034, "step": 11050 }, { "epoch": 0.26625081284169455, "grad_norm": 1.489485740661621, "learning_rate": 7.724115201298043e-07, "loss": 0.9587, "step": 11055 }, { "epoch": 0.2663712338334819, "grad_norm": 1.6650078296661377, "learning_rate": 7.722847581381199e-07, "loss": 0.9401, "step": 11060 }, { "epoch": 0.26649165482526915, "grad_norm": 1.5072864294052124, "learning_rate": 7.721579961464354e-07, "loss": 0.8722, "step": 11065 }, { "epoch": 0.2666120758170564, "grad_norm": 1.5710713863372803, "learning_rate": 7.72031234154751e-07, "loss": 0.9457, "step": 11070 }, { "epoch": 0.2667324968088437, "grad_norm": 1.441070795059204, "learning_rate": 7.719044721630666e-07, "loss": 0.9234, "step": 11075 }, { "epoch": 0.26685291780063103, "grad_norm": 1.599685549736023, "learning_rate": 7.717777101713822e-07, "loss": 0.939, "step": 11080 }, { "epoch": 0.2669733387924183, "grad_norm": 1.5153743028640747, "learning_rate": 7.716509481796978e-07, "loss": 0.9616, "step": 11085 }, { "epoch": 0.2670937597842056, "grad_norm": 1.5871284008026123, "learning_rate": 7.715241861880134e-07, "loss": 0.9913, "step": 11090 }, { "epoch": 0.26721418077599285, "grad_norm": 1.5965393781661987, "learning_rate": 7.713974241963289e-07, "loss": 0.9124, "step": 11095 }, { "epoch": 0.2673346017677802, "grad_norm": 1.5728003978729248, "learning_rate": 7.712706622046446e-07, "loss": 0.9236, "step": 11100 }, { "epoch": 0.26745502275956745, "grad_norm": 1.4616875648498535, "learning_rate": 7.711439002129602e-07, "loss": 0.941, "step": 11105 }, { "epoch": 0.2675754437513547, "grad_norm": 1.7237117290496826, "learning_rate": 7.710171382212756e-07, "loss": 0.9377, "step": 11110 }, { "epoch": 0.267695864743142, "grad_norm": 1.513432502746582, "learning_rate": 7.708903762295913e-07, "loss": 0.9646, "step": 11115 }, { "epoch": 0.2678162857349293, "grad_norm": 1.4135527610778809, "learning_rate": 7.707636142379069e-07, "loss": 0.9422, "step": 11120 }, { "epoch": 0.2679367067267166, "grad_norm": 1.4501285552978516, "learning_rate": 7.706368522462225e-07, "loss": 0.9505, "step": 11125 }, { "epoch": 0.2680571277185039, "grad_norm": 1.7850518226623535, "learning_rate": 7.705100902545381e-07, "loss": 0.9199, "step": 11130 }, { "epoch": 0.2681775487102912, "grad_norm": 1.5002425909042358, "learning_rate": 7.703833282628536e-07, "loss": 0.9142, "step": 11135 }, { "epoch": 0.2682979697020785, "grad_norm": 1.6325747966766357, "learning_rate": 7.702565662711692e-07, "loss": 0.9115, "step": 11140 }, { "epoch": 0.26841839069386575, "grad_norm": 1.522478699684143, "learning_rate": 7.701298042794848e-07, "loss": 0.9923, "step": 11145 }, { "epoch": 0.268538811685653, "grad_norm": 1.7870874404907227, "learning_rate": 7.700030422878005e-07, "loss": 0.9462, "step": 11150 }, { "epoch": 0.26865923267744035, "grad_norm": 1.5112918615341187, "learning_rate": 7.698762802961159e-07, "loss": 0.8391, "step": 11155 }, { "epoch": 0.2687796536692276, "grad_norm": 1.603967308998108, "learning_rate": 7.697495183044315e-07, "loss": 0.9103, "step": 11160 }, { "epoch": 0.2689000746610149, "grad_norm": 1.631317377090454, "learning_rate": 7.696227563127472e-07, "loss": 0.9129, "step": 11165 }, { "epoch": 0.26902049565280217, "grad_norm": 1.62999427318573, "learning_rate": 7.694959943210627e-07, "loss": 0.9031, "step": 11170 }, { "epoch": 0.2691409166445895, "grad_norm": 1.3878083229064941, "learning_rate": 7.693692323293784e-07, "loss": 0.919, "step": 11175 }, { "epoch": 0.2692613376363768, "grad_norm": 1.516343355178833, "learning_rate": 7.692424703376939e-07, "loss": 0.9456, "step": 11180 }, { "epoch": 0.26938175862816405, "grad_norm": 1.4829399585723877, "learning_rate": 7.691157083460095e-07, "loss": 0.915, "step": 11185 }, { "epoch": 0.2695021796199514, "grad_norm": 1.9153403043746948, "learning_rate": 7.689889463543251e-07, "loss": 0.9071, "step": 11190 }, { "epoch": 0.26962260061173865, "grad_norm": 1.5353426933288574, "learning_rate": 7.688621843626407e-07, "loss": 0.9368, "step": 11195 }, { "epoch": 0.2697430216035259, "grad_norm": 1.4356238842010498, "learning_rate": 7.687354223709563e-07, "loss": 0.9369, "step": 11200 }, { "epoch": 0.2698634425953132, "grad_norm": 1.9013326168060303, "learning_rate": 7.686086603792718e-07, "loss": 0.9614, "step": 11205 }, { "epoch": 0.2699838635871005, "grad_norm": 1.7938717603683472, "learning_rate": 7.684818983875875e-07, "loss": 0.9126, "step": 11210 }, { "epoch": 0.2701042845788878, "grad_norm": 1.5766910314559937, "learning_rate": 7.68355136395903e-07, "loss": 0.9517, "step": 11215 }, { "epoch": 0.27022470557067507, "grad_norm": 1.5627079010009766, "learning_rate": 7.682283744042186e-07, "loss": 0.9558, "step": 11220 }, { "epoch": 0.27034512656246235, "grad_norm": 1.4922775030136108, "learning_rate": 7.681016124125342e-07, "loss": 0.9098, "step": 11225 }, { "epoch": 0.2704655475542497, "grad_norm": 1.735756278038025, "learning_rate": 7.679748504208497e-07, "loss": 0.9373, "step": 11230 }, { "epoch": 0.27058596854603695, "grad_norm": 1.7026416063308716, "learning_rate": 7.678480884291654e-07, "loss": 0.9146, "step": 11235 }, { "epoch": 0.2707063895378242, "grad_norm": 1.3560659885406494, "learning_rate": 7.67721326437481e-07, "loss": 0.9555, "step": 11240 }, { "epoch": 0.2708268105296115, "grad_norm": 1.432887315750122, "learning_rate": 7.675945644457966e-07, "loss": 0.9551, "step": 11245 }, { "epoch": 0.2709472315213988, "grad_norm": 1.6378475427627563, "learning_rate": 7.674678024541121e-07, "loss": 0.8897, "step": 11250 }, { "epoch": 0.2710676525131861, "grad_norm": 1.5451323986053467, "learning_rate": 7.673410404624277e-07, "loss": 0.9499, "step": 11255 }, { "epoch": 0.27118807350497337, "grad_norm": 1.508756160736084, "learning_rate": 7.672142784707433e-07, "loss": 0.9058, "step": 11260 }, { "epoch": 0.2713084944967607, "grad_norm": 1.5754610300064087, "learning_rate": 7.670875164790589e-07, "loss": 0.9651, "step": 11265 }, { "epoch": 0.271428915488548, "grad_norm": 1.5599875450134277, "learning_rate": 7.669607544873746e-07, "loss": 0.9101, "step": 11270 }, { "epoch": 0.27154933648033525, "grad_norm": 1.5504896640777588, "learning_rate": 7.6683399249569e-07, "loss": 0.9511, "step": 11275 }, { "epoch": 0.2716697574721225, "grad_norm": 1.6675881147384644, "learning_rate": 7.667072305040056e-07, "loss": 0.9192, "step": 11280 }, { "epoch": 0.27179017846390985, "grad_norm": 1.3830749988555908, "learning_rate": 7.665804685123213e-07, "loss": 0.922, "step": 11285 }, { "epoch": 0.2719105994556971, "grad_norm": 1.6686891317367554, "learning_rate": 7.664537065206368e-07, "loss": 0.9337, "step": 11290 }, { "epoch": 0.2720310204474844, "grad_norm": 1.8631843328475952, "learning_rate": 7.663269445289524e-07, "loss": 0.9199, "step": 11295 }, { "epoch": 0.27215144143927167, "grad_norm": 1.4798780679702759, "learning_rate": 7.66200182537268e-07, "loss": 0.958, "step": 11300 }, { "epoch": 0.272271862431059, "grad_norm": 1.3518550395965576, "learning_rate": 7.660734205455836e-07, "loss": 0.8929, "step": 11305 }, { "epoch": 0.27239228342284627, "grad_norm": 1.6929333209991455, "learning_rate": 7.659466585538992e-07, "loss": 0.916, "step": 11310 }, { "epoch": 0.27251270441463354, "grad_norm": 1.4561303853988647, "learning_rate": 7.658198965622148e-07, "loss": 0.9634, "step": 11315 }, { "epoch": 0.2726331254064209, "grad_norm": 1.59073007106781, "learning_rate": 7.656931345705303e-07, "loss": 0.9048, "step": 11320 }, { "epoch": 0.27275354639820815, "grad_norm": 1.526777744293213, "learning_rate": 7.655663725788459e-07, "loss": 0.9254, "step": 11325 }, { "epoch": 0.2728739673899954, "grad_norm": 1.476694107055664, "learning_rate": 7.654396105871616e-07, "loss": 0.9151, "step": 11330 }, { "epoch": 0.2729943883817827, "grad_norm": 1.5337563753128052, "learning_rate": 7.653128485954771e-07, "loss": 0.9102, "step": 11335 }, { "epoch": 0.27311480937357, "grad_norm": 1.5907397270202637, "learning_rate": 7.651860866037926e-07, "loss": 0.9202, "step": 11340 }, { "epoch": 0.2732352303653573, "grad_norm": 1.7442792654037476, "learning_rate": 7.650593246121083e-07, "loss": 0.9326, "step": 11345 }, { "epoch": 0.27335565135714457, "grad_norm": 1.5198930501937866, "learning_rate": 7.649325626204238e-07, "loss": 0.9246, "step": 11350 }, { "epoch": 0.27347607234893184, "grad_norm": 1.7051467895507812, "learning_rate": 7.648058006287395e-07, "loss": 0.9402, "step": 11355 }, { "epoch": 0.27359649334071917, "grad_norm": 1.5837024450302124, "learning_rate": 7.646790386370551e-07, "loss": 0.8917, "step": 11360 }, { "epoch": 0.27371691433250644, "grad_norm": 2.134157419204712, "learning_rate": 7.645522766453705e-07, "loss": 0.9517, "step": 11365 }, { "epoch": 0.2738373353242937, "grad_norm": 1.6315033435821533, "learning_rate": 7.644255146536862e-07, "loss": 0.9348, "step": 11370 }, { "epoch": 0.27395775631608105, "grad_norm": 1.4641317129135132, "learning_rate": 7.642987526620018e-07, "loss": 0.9088, "step": 11375 }, { "epoch": 0.2740781773078683, "grad_norm": 1.6204805374145508, "learning_rate": 7.641719906703174e-07, "loss": 0.9057, "step": 11380 }, { "epoch": 0.2741985982996556, "grad_norm": 1.5819618701934814, "learning_rate": 7.64045228678633e-07, "loss": 0.9122, "step": 11385 }, { "epoch": 0.27431901929144287, "grad_norm": 1.5845000743865967, "learning_rate": 7.639184666869486e-07, "loss": 0.9032, "step": 11390 }, { "epoch": 0.2744394402832302, "grad_norm": 1.9233126640319824, "learning_rate": 7.637917046952641e-07, "loss": 0.9111, "step": 11395 }, { "epoch": 0.27455986127501747, "grad_norm": 1.5648930072784424, "learning_rate": 7.636649427035797e-07, "loss": 0.9242, "step": 11400 }, { "epoch": 0.27468028226680474, "grad_norm": 1.3369144201278687, "learning_rate": 7.635381807118954e-07, "loss": 0.8929, "step": 11405 }, { "epoch": 0.274800703258592, "grad_norm": 1.5446151494979858, "learning_rate": 7.634114187202108e-07, "loss": 0.9359, "step": 11410 }, { "epoch": 0.27492112425037935, "grad_norm": 1.5810915231704712, "learning_rate": 7.632846567285265e-07, "loss": 0.9423, "step": 11415 }, { "epoch": 0.2750415452421666, "grad_norm": 1.6881409883499146, "learning_rate": 7.631578947368421e-07, "loss": 0.9508, "step": 11420 }, { "epoch": 0.2751619662339539, "grad_norm": 1.5262564420700073, "learning_rate": 7.630311327451576e-07, "loss": 0.9414, "step": 11425 }, { "epoch": 0.27528238722574117, "grad_norm": 1.4334099292755127, "learning_rate": 7.629043707534733e-07, "loss": 0.9007, "step": 11430 }, { "epoch": 0.2754028082175285, "grad_norm": 1.3691715002059937, "learning_rate": 7.627776087617888e-07, "loss": 0.8882, "step": 11435 }, { "epoch": 0.27552322920931577, "grad_norm": 1.717207670211792, "learning_rate": 7.626508467701044e-07, "loss": 0.8711, "step": 11440 }, { "epoch": 0.27564365020110304, "grad_norm": 1.5524109601974487, "learning_rate": 7.6252408477842e-07, "loss": 0.8835, "step": 11445 }, { "epoch": 0.27576407119289037, "grad_norm": 1.6486324071884155, "learning_rate": 7.623973227867357e-07, "loss": 0.8512, "step": 11450 }, { "epoch": 0.27588449218467764, "grad_norm": 1.8091490268707275, "learning_rate": 7.622705607950511e-07, "loss": 0.9348, "step": 11455 }, { "epoch": 0.2760049131764649, "grad_norm": 1.6140121221542358, "learning_rate": 7.621437988033667e-07, "loss": 0.9328, "step": 11460 }, { "epoch": 0.2761253341682522, "grad_norm": 1.4895670413970947, "learning_rate": 7.620170368116824e-07, "loss": 0.8896, "step": 11465 }, { "epoch": 0.2762457551600395, "grad_norm": 1.525490403175354, "learning_rate": 7.618902748199979e-07, "loss": 0.969, "step": 11470 }, { "epoch": 0.2763661761518268, "grad_norm": 1.5126816034317017, "learning_rate": 7.617635128283136e-07, "loss": 0.9383, "step": 11475 }, { "epoch": 0.27648659714361407, "grad_norm": 1.466437578201294, "learning_rate": 7.616367508366291e-07, "loss": 0.9023, "step": 11480 }, { "epoch": 0.27660701813540134, "grad_norm": 1.477810263633728, "learning_rate": 7.615099888449446e-07, "loss": 0.9952, "step": 11485 }, { "epoch": 0.27672743912718867, "grad_norm": 1.444549798965454, "learning_rate": 7.613832268532603e-07, "loss": 0.969, "step": 11490 }, { "epoch": 0.27684786011897594, "grad_norm": 1.5766974687576294, "learning_rate": 7.612564648615759e-07, "loss": 0.8708, "step": 11495 }, { "epoch": 0.2769682811107632, "grad_norm": 1.6157164573669434, "learning_rate": 7.611297028698915e-07, "loss": 0.9892, "step": 11500 }, { "epoch": 0.27708870210255054, "grad_norm": 1.6714296340942383, "learning_rate": 7.61002940878207e-07, "loss": 0.8914, "step": 11505 }, { "epoch": 0.2772091230943378, "grad_norm": 1.6638739109039307, "learning_rate": 7.608761788865227e-07, "loss": 0.9758, "step": 11510 }, { "epoch": 0.2773295440861251, "grad_norm": 1.5034291744232178, "learning_rate": 7.607494168948382e-07, "loss": 0.9094, "step": 11515 }, { "epoch": 0.27744996507791236, "grad_norm": 1.6141936779022217, "learning_rate": 7.606226549031538e-07, "loss": 0.973, "step": 11520 }, { "epoch": 0.2775703860696997, "grad_norm": 1.4471684694290161, "learning_rate": 7.604958929114694e-07, "loss": 0.934, "step": 11525 }, { "epoch": 0.27769080706148697, "grad_norm": 1.5534322261810303, "learning_rate": 7.603691309197849e-07, "loss": 0.935, "step": 11530 }, { "epoch": 0.27781122805327424, "grad_norm": 1.5422011613845825, "learning_rate": 7.602423689281006e-07, "loss": 0.8949, "step": 11535 }, { "epoch": 0.2779316490450615, "grad_norm": 1.6572320461273193, "learning_rate": 7.601156069364162e-07, "loss": 0.9403, "step": 11540 }, { "epoch": 0.27805207003684884, "grad_norm": 1.4555169343948364, "learning_rate": 7.599888449447317e-07, "loss": 0.9631, "step": 11545 }, { "epoch": 0.2781724910286361, "grad_norm": 1.4223170280456543, "learning_rate": 7.598620829530473e-07, "loss": 0.9567, "step": 11550 }, { "epoch": 0.2782929120204234, "grad_norm": 1.6159312725067139, "learning_rate": 7.597353209613629e-07, "loss": 0.9088, "step": 11555 }, { "epoch": 0.27841333301221066, "grad_norm": 1.6263511180877686, "learning_rate": 7.596085589696785e-07, "loss": 0.9317, "step": 11560 }, { "epoch": 0.278533754003998, "grad_norm": 1.8005136251449585, "learning_rate": 7.594817969779941e-07, "loss": 0.9596, "step": 11565 }, { "epoch": 0.27865417499578526, "grad_norm": 1.4669032096862793, "learning_rate": 7.593550349863098e-07, "loss": 0.9032, "step": 11570 }, { "epoch": 0.27877459598757254, "grad_norm": 1.738063097000122, "learning_rate": 7.592282729946252e-07, "loss": 0.8592, "step": 11575 }, { "epoch": 0.27889501697935987, "grad_norm": 1.4538829326629639, "learning_rate": 7.591015110029408e-07, "loss": 0.8801, "step": 11580 }, { "epoch": 0.27901543797114714, "grad_norm": 1.568163514137268, "learning_rate": 7.589747490112565e-07, "loss": 0.9436, "step": 11585 }, { "epoch": 0.2791358589629344, "grad_norm": 1.8308765888214111, "learning_rate": 7.58847987019572e-07, "loss": 0.8998, "step": 11590 }, { "epoch": 0.2792562799547217, "grad_norm": 1.699054479598999, "learning_rate": 7.587212250278876e-07, "loss": 0.9425, "step": 11595 }, { "epoch": 0.279376700946509, "grad_norm": 1.5109950304031372, "learning_rate": 7.585944630362032e-07, "loss": 0.9135, "step": 11600 }, { "epoch": 0.2794971219382963, "grad_norm": 1.4689977169036865, "learning_rate": 7.584677010445187e-07, "loss": 0.9104, "step": 11605 }, { "epoch": 0.27961754293008356, "grad_norm": 1.4785994291305542, "learning_rate": 7.583409390528344e-07, "loss": 0.9532, "step": 11610 }, { "epoch": 0.27973796392187084, "grad_norm": 1.7106760740280151, "learning_rate": 7.5821417706115e-07, "loss": 0.9276, "step": 11615 }, { "epoch": 0.27985838491365816, "grad_norm": 1.4372215270996094, "learning_rate": 7.580874150694655e-07, "loss": 0.8959, "step": 11620 }, { "epoch": 0.27997880590544544, "grad_norm": 1.5852820873260498, "learning_rate": 7.579606530777811e-07, "loss": 0.9093, "step": 11625 }, { "epoch": 0.2800992268972327, "grad_norm": 1.6567399501800537, "learning_rate": 7.578338910860968e-07, "loss": 0.8969, "step": 11630 }, { "epoch": 0.28021964788902004, "grad_norm": 1.4735957384109497, "learning_rate": 7.577071290944123e-07, "loss": 0.9294, "step": 11635 }, { "epoch": 0.2803400688808073, "grad_norm": 1.548314094543457, "learning_rate": 7.575803671027278e-07, "loss": 0.9493, "step": 11640 }, { "epoch": 0.2804604898725946, "grad_norm": 1.645434021949768, "learning_rate": 7.574536051110435e-07, "loss": 0.9196, "step": 11645 }, { "epoch": 0.28058091086438186, "grad_norm": 1.3293720483779907, "learning_rate": 7.57326843119359e-07, "loss": 0.9154, "step": 11650 }, { "epoch": 0.2807013318561692, "grad_norm": 1.407025694847107, "learning_rate": 7.572000811276747e-07, "loss": 0.9089, "step": 11655 }, { "epoch": 0.28082175284795646, "grad_norm": 1.4637197256088257, "learning_rate": 7.570733191359903e-07, "loss": 0.9169, "step": 11660 }, { "epoch": 0.28094217383974374, "grad_norm": 1.6624650955200195, "learning_rate": 7.569465571443057e-07, "loss": 0.9073, "step": 11665 }, { "epoch": 0.281062594831531, "grad_norm": 1.5180774927139282, "learning_rate": 7.568197951526214e-07, "loss": 0.9752, "step": 11670 }, { "epoch": 0.28118301582331834, "grad_norm": 1.5979740619659424, "learning_rate": 7.56693033160937e-07, "loss": 0.9037, "step": 11675 }, { "epoch": 0.2813034368151056, "grad_norm": 1.5818020105361938, "learning_rate": 7.565662711692526e-07, "loss": 0.9473, "step": 11680 }, { "epoch": 0.2814238578068929, "grad_norm": 1.7185585498809814, "learning_rate": 7.564395091775682e-07, "loss": 0.9279, "step": 11685 }, { "epoch": 0.28154427879868016, "grad_norm": 1.6393964290618896, "learning_rate": 7.563127471858837e-07, "loss": 0.8911, "step": 11690 }, { "epoch": 0.2816646997904675, "grad_norm": 1.623339056968689, "learning_rate": 7.561859851941993e-07, "loss": 0.9645, "step": 11695 }, { "epoch": 0.28178512078225476, "grad_norm": 1.5230019092559814, "learning_rate": 7.560592232025149e-07, "loss": 0.9363, "step": 11700 }, { "epoch": 0.28190554177404203, "grad_norm": 1.6126532554626465, "learning_rate": 7.559324612108306e-07, "loss": 0.9706, "step": 11705 }, { "epoch": 0.28202596276582936, "grad_norm": 1.8683366775512695, "learning_rate": 7.55805699219146e-07, "loss": 0.9447, "step": 11710 }, { "epoch": 0.28214638375761664, "grad_norm": 1.5551618337631226, "learning_rate": 7.556789372274617e-07, "loss": 0.9209, "step": 11715 }, { "epoch": 0.2822668047494039, "grad_norm": 1.4535892009735107, "learning_rate": 7.555521752357773e-07, "loss": 0.9299, "step": 11720 }, { "epoch": 0.2823872257411912, "grad_norm": 2.7465555667877197, "learning_rate": 7.554254132440928e-07, "loss": 0.9016, "step": 11725 }, { "epoch": 0.2825076467329785, "grad_norm": 1.6401972770690918, "learning_rate": 7.552986512524085e-07, "loss": 0.8983, "step": 11730 }, { "epoch": 0.2826280677247658, "grad_norm": 1.655357003211975, "learning_rate": 7.55171889260724e-07, "loss": 0.9406, "step": 11735 }, { "epoch": 0.28274848871655306, "grad_norm": 1.4839775562286377, "learning_rate": 7.550451272690396e-07, "loss": 0.9209, "step": 11740 }, { "epoch": 0.28286890970834033, "grad_norm": 1.6104551553726196, "learning_rate": 7.549183652773552e-07, "loss": 0.9115, "step": 11745 }, { "epoch": 0.28298933070012766, "grad_norm": 1.6464667320251465, "learning_rate": 7.547916032856709e-07, "loss": 0.9345, "step": 11750 }, { "epoch": 0.28310975169191493, "grad_norm": 1.4681965112686157, "learning_rate": 7.546648412939863e-07, "loss": 0.9639, "step": 11755 }, { "epoch": 0.2832301726837022, "grad_norm": 1.4175541400909424, "learning_rate": 7.545380793023019e-07, "loss": 0.938, "step": 11760 }, { "epoch": 0.28335059367548954, "grad_norm": 1.8389374017715454, "learning_rate": 7.544113173106176e-07, "loss": 0.9199, "step": 11765 }, { "epoch": 0.2834710146672768, "grad_norm": 1.4778343439102173, "learning_rate": 7.542845553189331e-07, "loss": 0.9656, "step": 11770 }, { "epoch": 0.2835914356590641, "grad_norm": 1.595827341079712, "learning_rate": 7.541577933272488e-07, "loss": 0.9093, "step": 11775 }, { "epoch": 0.28371185665085136, "grad_norm": 1.5474449396133423, "learning_rate": 7.540310313355643e-07, "loss": 0.9245, "step": 11780 }, { "epoch": 0.2838322776426387, "grad_norm": 1.8880990743637085, "learning_rate": 7.539042693438798e-07, "loss": 0.9379, "step": 11785 }, { "epoch": 0.28395269863442596, "grad_norm": 1.5933219194412231, "learning_rate": 7.537775073521955e-07, "loss": 0.9089, "step": 11790 }, { "epoch": 0.28407311962621323, "grad_norm": 1.6476610898971558, "learning_rate": 7.536507453605111e-07, "loss": 0.947, "step": 11795 }, { "epoch": 0.2841935406180005, "grad_norm": 1.3580931425094604, "learning_rate": 7.535239833688267e-07, "loss": 0.935, "step": 11800 }, { "epoch": 0.28431396160978784, "grad_norm": 1.3850847482681274, "learning_rate": 7.533972213771422e-07, "loss": 0.9051, "step": 11805 }, { "epoch": 0.2844343826015751, "grad_norm": 1.4011439085006714, "learning_rate": 7.532704593854578e-07, "loss": 0.8799, "step": 11810 }, { "epoch": 0.2845548035933624, "grad_norm": 1.6950008869171143, "learning_rate": 7.531436973937734e-07, "loss": 0.9325, "step": 11815 }, { "epoch": 0.2846752245851497, "grad_norm": 1.5124635696411133, "learning_rate": 7.53016935402089e-07, "loss": 0.9198, "step": 11820 }, { "epoch": 0.284795645576937, "grad_norm": 1.481153964996338, "learning_rate": 7.528901734104046e-07, "loss": 0.9484, "step": 11825 }, { "epoch": 0.28491606656872426, "grad_norm": 1.6342970132827759, "learning_rate": 7.527634114187201e-07, "loss": 0.9259, "step": 11830 }, { "epoch": 0.28503648756051153, "grad_norm": 1.6247309446334839, "learning_rate": 7.526366494270358e-07, "loss": 0.9414, "step": 11835 }, { "epoch": 0.28515690855229886, "grad_norm": 1.7598034143447876, "learning_rate": 7.525098874353514e-07, "loss": 0.9578, "step": 11840 }, { "epoch": 0.28527732954408613, "grad_norm": 1.5112534761428833, "learning_rate": 7.523831254436669e-07, "loss": 0.9242, "step": 11845 }, { "epoch": 0.2853977505358734, "grad_norm": 1.48063063621521, "learning_rate": 7.522563634519825e-07, "loss": 0.9513, "step": 11850 }, { "epoch": 0.2855181715276607, "grad_norm": 1.4742642641067505, "learning_rate": 7.521296014602981e-07, "loss": 0.9896, "step": 11855 }, { "epoch": 0.285638592519448, "grad_norm": 1.7979505062103271, "learning_rate": 7.520028394686137e-07, "loss": 0.9469, "step": 11860 }, { "epoch": 0.2857590135112353, "grad_norm": 1.6058517694473267, "learning_rate": 7.518760774769293e-07, "loss": 0.9196, "step": 11865 }, { "epoch": 0.28587943450302256, "grad_norm": 1.6035789251327515, "learning_rate": 7.51749315485245e-07, "loss": 0.9473, "step": 11870 }, { "epoch": 0.28599985549480983, "grad_norm": 1.6348637342453003, "learning_rate": 7.516225534935604e-07, "loss": 0.9306, "step": 11875 }, { "epoch": 0.28612027648659716, "grad_norm": 1.7751243114471436, "learning_rate": 7.51495791501876e-07, "loss": 0.967, "step": 11880 }, { "epoch": 0.28624069747838443, "grad_norm": 1.681087613105774, "learning_rate": 7.513690295101917e-07, "loss": 0.9591, "step": 11885 }, { "epoch": 0.2863611184701717, "grad_norm": 1.6238495111465454, "learning_rate": 7.512422675185072e-07, "loss": 0.9472, "step": 11890 }, { "epoch": 0.28648153946195903, "grad_norm": 1.5437794923782349, "learning_rate": 7.511155055268227e-07, "loss": 0.928, "step": 11895 }, { "epoch": 0.2866019604537463, "grad_norm": 1.565635323524475, "learning_rate": 7.509887435351384e-07, "loss": 0.9666, "step": 11900 }, { "epoch": 0.2867223814455336, "grad_norm": 1.6704597473144531, "learning_rate": 7.508619815434539e-07, "loss": 0.9399, "step": 11905 }, { "epoch": 0.28684280243732085, "grad_norm": 1.522324800491333, "learning_rate": 7.507352195517696e-07, "loss": 0.9282, "step": 11910 }, { "epoch": 0.2869632234291082, "grad_norm": 1.4775621891021729, "learning_rate": 7.506084575600852e-07, "loss": 0.8827, "step": 11915 }, { "epoch": 0.28708364442089546, "grad_norm": 1.5191771984100342, "learning_rate": 7.504816955684007e-07, "loss": 0.887, "step": 11920 }, { "epoch": 0.28720406541268273, "grad_norm": 1.6595885753631592, "learning_rate": 7.503549335767163e-07, "loss": 0.9344, "step": 11925 }, { "epoch": 0.28732448640447, "grad_norm": 1.7235771417617798, "learning_rate": 7.502281715850319e-07, "loss": 0.967, "step": 11930 }, { "epoch": 0.28744490739625733, "grad_norm": 1.4997406005859375, "learning_rate": 7.501014095933475e-07, "loss": 0.9614, "step": 11935 }, { "epoch": 0.2875653283880446, "grad_norm": 1.6935820579528809, "learning_rate": 7.49974647601663e-07, "loss": 0.9519, "step": 11940 }, { "epoch": 0.2876857493798319, "grad_norm": 1.5619289875030518, "learning_rate": 7.498478856099787e-07, "loss": 0.8883, "step": 11945 }, { "epoch": 0.2878061703716192, "grad_norm": 1.4663188457489014, "learning_rate": 7.497211236182942e-07, "loss": 0.9096, "step": 11950 }, { "epoch": 0.2879265913634065, "grad_norm": 1.4758613109588623, "learning_rate": 7.495943616266098e-07, "loss": 0.9472, "step": 11955 }, { "epoch": 0.28804701235519375, "grad_norm": 1.6588231325149536, "learning_rate": 7.494675996349255e-07, "loss": 0.8967, "step": 11960 }, { "epoch": 0.28816743334698103, "grad_norm": 1.676204800605774, "learning_rate": 7.493408376432409e-07, "loss": 0.9067, "step": 11965 }, { "epoch": 0.28828785433876836, "grad_norm": 1.6175498962402344, "learning_rate": 7.492140756515566e-07, "loss": 0.9315, "step": 11970 }, { "epoch": 0.28840827533055563, "grad_norm": 1.8844404220581055, "learning_rate": 7.490873136598722e-07, "loss": 0.9453, "step": 11975 }, { "epoch": 0.2885286963223429, "grad_norm": 1.6034772396087646, "learning_rate": 7.489605516681878e-07, "loss": 0.9564, "step": 11980 }, { "epoch": 0.2886491173141302, "grad_norm": 1.7410510778427124, "learning_rate": 7.488337896765033e-07, "loss": 0.952, "step": 11985 }, { "epoch": 0.2887695383059175, "grad_norm": 1.5769463777542114, "learning_rate": 7.487070276848189e-07, "loss": 0.9601, "step": 11990 }, { "epoch": 0.2888899592977048, "grad_norm": 1.3784805536270142, "learning_rate": 7.485802656931345e-07, "loss": 0.9725, "step": 11995 }, { "epoch": 0.28901038028949205, "grad_norm": 1.6138496398925781, "learning_rate": 7.484535037014501e-07, "loss": 0.8706, "step": 12000 }, { "epoch": 0.2891308012812793, "grad_norm": 1.6166455745697021, "learning_rate": 7.483267417097658e-07, "loss": 0.9151, "step": 12005 }, { "epoch": 0.28925122227306665, "grad_norm": 1.4987436532974243, "learning_rate": 7.481999797180812e-07, "loss": 0.95, "step": 12010 }, { "epoch": 0.28937164326485393, "grad_norm": 1.3807342052459717, "learning_rate": 7.480732177263968e-07, "loss": 0.8848, "step": 12015 }, { "epoch": 0.2894920642566412, "grad_norm": 1.5243955850601196, "learning_rate": 7.479464557347125e-07, "loss": 0.9122, "step": 12020 }, { "epoch": 0.28961248524842853, "grad_norm": 1.4616668224334717, "learning_rate": 7.47819693743028e-07, "loss": 0.971, "step": 12025 }, { "epoch": 0.2897329062402158, "grad_norm": 1.71182119846344, "learning_rate": 7.476929317513437e-07, "loss": 0.9404, "step": 12030 }, { "epoch": 0.2898533272320031, "grad_norm": 1.7082267999649048, "learning_rate": 7.475661697596592e-07, "loss": 0.8648, "step": 12035 }, { "epoch": 0.28997374822379035, "grad_norm": 1.566318154335022, "learning_rate": 7.474394077679748e-07, "loss": 0.9647, "step": 12040 }, { "epoch": 0.2900941692155777, "grad_norm": 1.5414106845855713, "learning_rate": 7.473126457762904e-07, "loss": 0.927, "step": 12045 }, { "epoch": 0.29021459020736495, "grad_norm": 1.4850926399230957, "learning_rate": 7.47185883784606e-07, "loss": 0.9644, "step": 12050 }, { "epoch": 0.2903350111991522, "grad_norm": 1.467564582824707, "learning_rate": 7.470591217929216e-07, "loss": 0.9193, "step": 12055 }, { "epoch": 0.2904554321909395, "grad_norm": 1.3166462182998657, "learning_rate": 7.469323598012371e-07, "loss": 0.9112, "step": 12060 }, { "epoch": 0.29057585318272683, "grad_norm": 1.6410185098648071, "learning_rate": 7.468055978095528e-07, "loss": 0.9226, "step": 12065 }, { "epoch": 0.2906962741745141, "grad_norm": 1.5618233680725098, "learning_rate": 7.466788358178684e-07, "loss": 0.9354, "step": 12070 }, { "epoch": 0.2908166951663014, "grad_norm": 1.4823178052902222, "learning_rate": 7.465520738261839e-07, "loss": 0.9549, "step": 12075 }, { "epoch": 0.2909371161580887, "grad_norm": 1.6021852493286133, "learning_rate": 7.464253118344995e-07, "loss": 0.9438, "step": 12080 }, { "epoch": 0.291057537149876, "grad_norm": 1.3809022903442383, "learning_rate": 7.462985498428151e-07, "loss": 0.9323, "step": 12085 }, { "epoch": 0.29117795814166325, "grad_norm": 1.5327422618865967, "learning_rate": 7.461717878511307e-07, "loss": 0.9262, "step": 12090 }, { "epoch": 0.2912983791334505, "grad_norm": 1.8754101991653442, "learning_rate": 7.460450258594463e-07, "loss": 0.9813, "step": 12095 }, { "epoch": 0.29141880012523785, "grad_norm": 1.5512574911117554, "learning_rate": 7.45918263867762e-07, "loss": 0.9313, "step": 12100 }, { "epoch": 0.2915392211170251, "grad_norm": 1.3583546876907349, "learning_rate": 7.457915018760774e-07, "loss": 0.9547, "step": 12105 }, { "epoch": 0.2916596421088124, "grad_norm": 1.700033187866211, "learning_rate": 7.45664739884393e-07, "loss": 0.9332, "step": 12110 }, { "epoch": 0.2917800631005997, "grad_norm": 1.4785394668579102, "learning_rate": 7.455379778927087e-07, "loss": 0.9547, "step": 12115 }, { "epoch": 0.291900484092387, "grad_norm": 1.5970126390457153, "learning_rate": 7.454112159010242e-07, "loss": 0.9075, "step": 12120 }, { "epoch": 0.2920209050841743, "grad_norm": 1.6902731657028198, "learning_rate": 7.452844539093398e-07, "loss": 0.8942, "step": 12125 }, { "epoch": 0.29214132607596155, "grad_norm": 1.7784210443496704, "learning_rate": 7.451576919176554e-07, "loss": 0.9149, "step": 12130 }, { "epoch": 0.2922617470677488, "grad_norm": 1.578518033027649, "learning_rate": 7.450309299259709e-07, "loss": 0.8796, "step": 12135 }, { "epoch": 0.29238216805953615, "grad_norm": 1.5227102041244507, "learning_rate": 7.449041679342866e-07, "loss": 0.9589, "step": 12140 }, { "epoch": 0.2925025890513234, "grad_norm": 1.3288702964782715, "learning_rate": 7.447774059426022e-07, "loss": 0.9111, "step": 12145 }, { "epoch": 0.2926230100431107, "grad_norm": 1.8506990671157837, "learning_rate": 7.446506439509177e-07, "loss": 0.8961, "step": 12150 }, { "epoch": 0.292743431034898, "grad_norm": 1.7175493240356445, "learning_rate": 7.445238819592333e-07, "loss": 0.8911, "step": 12155 }, { "epoch": 0.2928638520266853, "grad_norm": 1.4701457023620605, "learning_rate": 7.44397119967549e-07, "loss": 0.9094, "step": 12160 }, { "epoch": 0.2929842730184726, "grad_norm": 1.4599250555038452, "learning_rate": 7.442703579758645e-07, "loss": 0.9085, "step": 12165 }, { "epoch": 0.29310469401025985, "grad_norm": 1.5617862939834595, "learning_rate": 7.4414359598418e-07, "loss": 0.9693, "step": 12170 }, { "epoch": 0.2932251150020472, "grad_norm": 1.5455378293991089, "learning_rate": 7.440168339924957e-07, "loss": 0.9117, "step": 12175 }, { "epoch": 0.29334553599383445, "grad_norm": 1.577228307723999, "learning_rate": 7.438900720008112e-07, "loss": 0.9267, "step": 12180 }, { "epoch": 0.2934659569856217, "grad_norm": 1.498317003250122, "learning_rate": 7.437633100091269e-07, "loss": 0.9736, "step": 12185 }, { "epoch": 0.293586377977409, "grad_norm": 1.5183560848236084, "learning_rate": 7.436365480174425e-07, "loss": 0.9262, "step": 12190 }, { "epoch": 0.2937067989691963, "grad_norm": 1.533486247062683, "learning_rate": 7.435097860257579e-07, "loss": 0.9778, "step": 12195 }, { "epoch": 0.2938272199609836, "grad_norm": 1.602159023284912, "learning_rate": 7.433830240340736e-07, "loss": 0.9087, "step": 12200 }, { "epoch": 0.29394764095277087, "grad_norm": 1.51285719871521, "learning_rate": 7.432562620423892e-07, "loss": 0.9398, "step": 12205 }, { "epoch": 0.2940680619445582, "grad_norm": 1.9584453105926514, "learning_rate": 7.431295000507048e-07, "loss": 0.9404, "step": 12210 }, { "epoch": 0.2941884829363455, "grad_norm": 1.566316843032837, "learning_rate": 7.430027380590204e-07, "loss": 0.8733, "step": 12215 }, { "epoch": 0.29430890392813275, "grad_norm": 1.4096190929412842, "learning_rate": 7.42875976067336e-07, "loss": 0.8925, "step": 12220 }, { "epoch": 0.29442932491992, "grad_norm": 1.6779398918151855, "learning_rate": 7.427492140756515e-07, "loss": 0.9056, "step": 12225 }, { "epoch": 0.29454974591170735, "grad_norm": 1.5147545337677002, "learning_rate": 7.426224520839671e-07, "loss": 0.8572, "step": 12230 }, { "epoch": 0.2946701669034946, "grad_norm": 1.5745984315872192, "learning_rate": 7.424956900922828e-07, "loss": 0.9432, "step": 12235 }, { "epoch": 0.2947905878952819, "grad_norm": 1.381061315536499, "learning_rate": 7.423689281005982e-07, "loss": 0.9103, "step": 12240 }, { "epoch": 0.29491100888706917, "grad_norm": 1.6397510766983032, "learning_rate": 7.422421661089139e-07, "loss": 0.9932, "step": 12245 }, { "epoch": 0.2950314298788565, "grad_norm": 1.387656807899475, "learning_rate": 7.421154041172295e-07, "loss": 0.9609, "step": 12250 }, { "epoch": 0.2951518508706438, "grad_norm": 1.619707465171814, "learning_rate": 7.41988642125545e-07, "loss": 0.9056, "step": 12255 }, { "epoch": 0.29527227186243105, "grad_norm": 1.9158905744552612, "learning_rate": 7.418618801338607e-07, "loss": 0.987, "step": 12260 }, { "epoch": 0.2953926928542184, "grad_norm": 1.4746280908584595, "learning_rate": 7.417351181421762e-07, "loss": 0.9398, "step": 12265 }, { "epoch": 0.29551311384600565, "grad_norm": 1.4023077487945557, "learning_rate": 7.416083561504918e-07, "loss": 0.9538, "step": 12270 }, { "epoch": 0.2956335348377929, "grad_norm": 1.656822681427002, "learning_rate": 7.414815941588074e-07, "loss": 0.9088, "step": 12275 }, { "epoch": 0.2957539558295802, "grad_norm": 1.841209888458252, "learning_rate": 7.41354832167123e-07, "loss": 0.9502, "step": 12280 }, { "epoch": 0.2958743768213675, "grad_norm": 1.6077429056167603, "learning_rate": 7.412280701754385e-07, "loss": 0.8769, "step": 12285 }, { "epoch": 0.2959947978131548, "grad_norm": 1.752013087272644, "learning_rate": 7.411013081837541e-07, "loss": 0.8655, "step": 12290 }, { "epoch": 0.29611521880494207, "grad_norm": 1.6575478315353394, "learning_rate": 7.409745461920698e-07, "loss": 0.9049, "step": 12295 }, { "epoch": 0.29623563979672934, "grad_norm": 1.7278908491134644, "learning_rate": 7.408477842003853e-07, "loss": 0.8964, "step": 12300 }, { "epoch": 0.2963560607885167, "grad_norm": 1.4890385866165161, "learning_rate": 7.40721022208701e-07, "loss": 0.9283, "step": 12305 }, { "epoch": 0.29647648178030395, "grad_norm": 1.5512890815734863, "learning_rate": 7.405942602170165e-07, "loss": 0.9398, "step": 12310 }, { "epoch": 0.2965969027720912, "grad_norm": 1.420446753501892, "learning_rate": 7.40467498225332e-07, "loss": 0.9375, "step": 12315 }, { "epoch": 0.2967173237638785, "grad_norm": 1.551830768585205, "learning_rate": 7.403407362336477e-07, "loss": 0.9162, "step": 12320 }, { "epoch": 0.2968377447556658, "grad_norm": 1.4006000757217407, "learning_rate": 7.402139742419633e-07, "loss": 0.937, "step": 12325 }, { "epoch": 0.2969581657474531, "grad_norm": 1.6034669876098633, "learning_rate": 7.400872122502789e-07, "loss": 0.9142, "step": 12330 }, { "epoch": 0.29707858673924037, "grad_norm": 1.5593807697296143, "learning_rate": 7.399604502585944e-07, "loss": 0.949, "step": 12335 }, { "epoch": 0.2971990077310277, "grad_norm": 1.5804564952850342, "learning_rate": 7.3983368826691e-07, "loss": 0.8958, "step": 12340 }, { "epoch": 0.29731942872281497, "grad_norm": 1.548058271408081, "learning_rate": 7.397069262752256e-07, "loss": 0.9193, "step": 12345 }, { "epoch": 0.29743984971460224, "grad_norm": 1.42953622341156, "learning_rate": 7.395801642835412e-07, "loss": 0.8941, "step": 12350 }, { "epoch": 0.2975602707063895, "grad_norm": 1.4340226650238037, "learning_rate": 7.394534022918568e-07, "loss": 0.8934, "step": 12355 }, { "epoch": 0.29768069169817685, "grad_norm": 2.00081205368042, "learning_rate": 7.393266403001723e-07, "loss": 0.9587, "step": 12360 }, { "epoch": 0.2978011126899641, "grad_norm": 1.5126882791519165, "learning_rate": 7.39199878308488e-07, "loss": 0.9114, "step": 12365 }, { "epoch": 0.2979215336817514, "grad_norm": 2.0931479930877686, "learning_rate": 7.390731163168036e-07, "loss": 0.9315, "step": 12370 }, { "epoch": 0.29804195467353867, "grad_norm": 1.5068527460098267, "learning_rate": 7.389463543251191e-07, "loss": 0.966, "step": 12375 }, { "epoch": 0.298162375665326, "grad_norm": 1.7116059064865112, "learning_rate": 7.388195923334347e-07, "loss": 0.9632, "step": 12380 }, { "epoch": 0.29828279665711327, "grad_norm": 1.4662964344024658, "learning_rate": 7.386928303417503e-07, "loss": 0.9452, "step": 12385 }, { "epoch": 0.29840321764890054, "grad_norm": 2.1836767196655273, "learning_rate": 7.385660683500659e-07, "loss": 0.9214, "step": 12390 }, { "epoch": 0.29852363864068787, "grad_norm": 1.7127418518066406, "learning_rate": 7.384393063583815e-07, "loss": 0.9549, "step": 12395 }, { "epoch": 0.29864405963247515, "grad_norm": 2.890199661254883, "learning_rate": 7.383125443666971e-07, "loss": 0.9384, "step": 12400 }, { "epoch": 0.2987644806242624, "grad_norm": 1.4727007150650024, "learning_rate": 7.381857823750126e-07, "loss": 0.9184, "step": 12405 }, { "epoch": 0.2988849016160497, "grad_norm": 1.4787065982818604, "learning_rate": 7.380590203833282e-07, "loss": 0.9226, "step": 12410 }, { "epoch": 0.299005322607837, "grad_norm": 1.5422483682632446, "learning_rate": 7.379322583916439e-07, "loss": 0.926, "step": 12415 }, { "epoch": 0.2991257435996243, "grad_norm": 1.5323668718338013, "learning_rate": 7.378054963999594e-07, "loss": 0.8974, "step": 12420 }, { "epoch": 0.29924616459141157, "grad_norm": 1.539766550064087, "learning_rate": 7.37678734408275e-07, "loss": 0.9356, "step": 12425 }, { "epoch": 0.29936658558319884, "grad_norm": 1.5156326293945312, "learning_rate": 7.375519724165906e-07, "loss": 0.9192, "step": 12430 }, { "epoch": 0.29948700657498617, "grad_norm": 1.5541387796401978, "learning_rate": 7.374252104249061e-07, "loss": 0.9117, "step": 12435 }, { "epoch": 0.29960742756677344, "grad_norm": 1.493133544921875, "learning_rate": 7.372984484332218e-07, "loss": 0.9163, "step": 12440 }, { "epoch": 0.2997278485585607, "grad_norm": 1.497205376625061, "learning_rate": 7.371716864415374e-07, "loss": 0.9502, "step": 12445 }, { "epoch": 0.299848269550348, "grad_norm": 1.4042925834655762, "learning_rate": 7.370449244498529e-07, "loss": 0.9456, "step": 12450 }, { "epoch": 0.2999686905421353, "grad_norm": 1.5551249980926514, "learning_rate": 7.369181624581685e-07, "loss": 0.9541, "step": 12455 }, { "epoch": 0.3000891115339226, "grad_norm": 1.5501880645751953, "learning_rate": 7.367914004664841e-07, "loss": 0.9713, "step": 12460 }, { "epoch": 0.30020953252570987, "grad_norm": 1.3974348306655884, "learning_rate": 7.366646384747997e-07, "loss": 0.935, "step": 12465 }, { "epoch": 0.3003299535174972, "grad_norm": 1.6827278137207031, "learning_rate": 7.365378764831152e-07, "loss": 0.9721, "step": 12470 }, { "epoch": 0.30045037450928447, "grad_norm": 1.3156495094299316, "learning_rate": 7.364111144914309e-07, "loss": 0.9203, "step": 12475 }, { "epoch": 0.30057079550107174, "grad_norm": 1.7625117301940918, "learning_rate": 7.362843524997464e-07, "loss": 0.9274, "step": 12480 }, { "epoch": 0.300691216492859, "grad_norm": 1.4372049570083618, "learning_rate": 7.36157590508062e-07, "loss": 0.925, "step": 12485 }, { "epoch": 0.30081163748464634, "grad_norm": 1.524330496788025, "learning_rate": 7.360308285163777e-07, "loss": 0.8828, "step": 12490 }, { "epoch": 0.3009320584764336, "grad_norm": 1.4534857273101807, "learning_rate": 7.359040665246931e-07, "loss": 0.9535, "step": 12495 }, { "epoch": 0.3010524794682209, "grad_norm": 1.603531837463379, "learning_rate": 7.357773045330088e-07, "loss": 0.9084, "step": 12500 }, { "epoch": 0.30117290046000816, "grad_norm": 1.6054507493972778, "learning_rate": 7.356505425413244e-07, "loss": 0.9002, "step": 12505 }, { "epoch": 0.3012933214517955, "grad_norm": 1.4217936992645264, "learning_rate": 7.3552378054964e-07, "loss": 0.9002, "step": 12510 }, { "epoch": 0.30141374244358277, "grad_norm": 1.3850017786026, "learning_rate": 7.353970185579556e-07, "loss": 0.9223, "step": 12515 }, { "epoch": 0.30153416343537004, "grad_norm": 1.639332890510559, "learning_rate": 7.352702565662711e-07, "loss": 0.8923, "step": 12520 }, { "epoch": 0.30165458442715737, "grad_norm": 1.4566148519515991, "learning_rate": 7.351434945745867e-07, "loss": 0.9832, "step": 12525 }, { "epoch": 0.30177500541894464, "grad_norm": 1.6120548248291016, "learning_rate": 7.350167325829023e-07, "loss": 0.9499, "step": 12530 }, { "epoch": 0.3018954264107319, "grad_norm": 1.4781494140625, "learning_rate": 7.34889970591218e-07, "loss": 0.9534, "step": 12535 }, { "epoch": 0.3020158474025192, "grad_norm": 1.6403977870941162, "learning_rate": 7.347632085995334e-07, "loss": 0.9437, "step": 12540 }, { "epoch": 0.3021362683943065, "grad_norm": 1.8433960676193237, "learning_rate": 7.34636446607849e-07, "loss": 0.9331, "step": 12545 }, { "epoch": 0.3022566893860938, "grad_norm": 1.4172022342681885, "learning_rate": 7.345096846161647e-07, "loss": 0.9138, "step": 12550 }, { "epoch": 0.30237711037788106, "grad_norm": 1.539608359336853, "learning_rate": 7.343829226244802e-07, "loss": 0.965, "step": 12555 }, { "epoch": 0.30249753136966834, "grad_norm": 1.4750511646270752, "learning_rate": 7.342561606327959e-07, "loss": 0.9426, "step": 12560 }, { "epoch": 0.30261795236145567, "grad_norm": 1.6245813369750977, "learning_rate": 7.341293986411114e-07, "loss": 0.9602, "step": 12565 }, { "epoch": 0.30273837335324294, "grad_norm": 1.6092920303344727, "learning_rate": 7.34002636649427e-07, "loss": 0.9319, "step": 12570 }, { "epoch": 0.3028587943450302, "grad_norm": 1.5803841352462769, "learning_rate": 7.338758746577426e-07, "loss": 0.9257, "step": 12575 }, { "epoch": 0.3029792153368175, "grad_norm": 1.3965332508087158, "learning_rate": 7.337491126660582e-07, "loss": 0.9672, "step": 12580 }, { "epoch": 0.3030996363286048, "grad_norm": 1.4964956045150757, "learning_rate": 7.336223506743737e-07, "loss": 0.9366, "step": 12585 }, { "epoch": 0.3032200573203921, "grad_norm": 1.4547592401504517, "learning_rate": 7.334955886826893e-07, "loss": 0.966, "step": 12590 }, { "epoch": 0.30334047831217936, "grad_norm": 1.5723978281021118, "learning_rate": 7.33368826691005e-07, "loss": 0.9346, "step": 12595 }, { "epoch": 0.3034608993039667, "grad_norm": 1.736131191253662, "learning_rate": 7.332420646993205e-07, "loss": 0.9028, "step": 12600 }, { "epoch": 0.30358132029575396, "grad_norm": 1.5331920385360718, "learning_rate": 7.331153027076361e-07, "loss": 0.951, "step": 12605 }, { "epoch": 0.30370174128754124, "grad_norm": 1.4614812135696411, "learning_rate": 7.329885407159517e-07, "loss": 0.9038, "step": 12610 }, { "epoch": 0.3038221622793285, "grad_norm": 1.489661693572998, "learning_rate": 7.328617787242672e-07, "loss": 0.9479, "step": 12615 }, { "epoch": 0.30394258327111584, "grad_norm": 1.7694226503372192, "learning_rate": 7.327350167325829e-07, "loss": 0.9877, "step": 12620 }, { "epoch": 0.3040630042629031, "grad_norm": 1.6576279401779175, "learning_rate": 7.326082547408985e-07, "loss": 0.9251, "step": 12625 }, { "epoch": 0.3041834252546904, "grad_norm": 1.6266634464263916, "learning_rate": 7.324814927492141e-07, "loss": 0.9477, "step": 12630 }, { "epoch": 0.30430384624647766, "grad_norm": 1.8012726306915283, "learning_rate": 7.323547307575296e-07, "loss": 0.9414, "step": 12635 }, { "epoch": 0.304424267238265, "grad_norm": 1.5876845121383667, "learning_rate": 7.322279687658452e-07, "loss": 0.8884, "step": 12640 }, { "epoch": 0.30454468823005226, "grad_norm": 1.5976060628890991, "learning_rate": 7.321012067741608e-07, "loss": 0.9195, "step": 12645 }, { "epoch": 0.30466510922183954, "grad_norm": 1.7384992837905884, "learning_rate": 7.319744447824764e-07, "loss": 0.8957, "step": 12650 }, { "epoch": 0.30478553021362687, "grad_norm": 1.5005650520324707, "learning_rate": 7.31847682790792e-07, "loss": 0.8999, "step": 12655 }, { "epoch": 0.30490595120541414, "grad_norm": 1.669329047203064, "learning_rate": 7.317209207991075e-07, "loss": 0.9812, "step": 12660 }, { "epoch": 0.3050263721972014, "grad_norm": 2.4113380908966064, "learning_rate": 7.315941588074231e-07, "loss": 0.937, "step": 12665 }, { "epoch": 0.3051467931889887, "grad_norm": 1.5560503005981445, "learning_rate": 7.314673968157388e-07, "loss": 0.9361, "step": 12670 }, { "epoch": 0.305267214180776, "grad_norm": 1.586330533027649, "learning_rate": 7.313406348240543e-07, "loss": 0.8831, "step": 12675 }, { "epoch": 0.3053876351725633, "grad_norm": 1.5432112216949463, "learning_rate": 7.312138728323699e-07, "loss": 0.9674, "step": 12680 }, { "epoch": 0.30550805616435056, "grad_norm": 1.4705675840377808, "learning_rate": 7.310871108406855e-07, "loss": 0.9631, "step": 12685 }, { "epoch": 0.30562847715613783, "grad_norm": 1.5707398653030396, "learning_rate": 7.30960348849001e-07, "loss": 0.9109, "step": 12690 }, { "epoch": 0.30574889814792516, "grad_norm": 1.442728042602539, "learning_rate": 7.308335868573167e-07, "loss": 0.939, "step": 12695 }, { "epoch": 0.30586931913971244, "grad_norm": 1.4560580253601074, "learning_rate": 7.307068248656323e-07, "loss": 0.9516, "step": 12700 }, { "epoch": 0.3059897401314997, "grad_norm": 1.3931323289871216, "learning_rate": 7.305800628739478e-07, "loss": 0.9051, "step": 12705 }, { "epoch": 0.30611016112328704, "grad_norm": 1.5336275100708008, "learning_rate": 7.304533008822634e-07, "loss": 0.9185, "step": 12710 }, { "epoch": 0.3062305821150743, "grad_norm": 1.623144507408142, "learning_rate": 7.303265388905791e-07, "loss": 0.9126, "step": 12715 }, { "epoch": 0.3063510031068616, "grad_norm": 1.3647394180297852, "learning_rate": 7.301997768988946e-07, "loss": 0.9169, "step": 12720 }, { "epoch": 0.30647142409864886, "grad_norm": 1.5183027982711792, "learning_rate": 7.300730149072101e-07, "loss": 0.8957, "step": 12725 }, { "epoch": 0.3065918450904362, "grad_norm": 1.7214516401290894, "learning_rate": 7.299462529155258e-07, "loss": 0.8836, "step": 12730 }, { "epoch": 0.30671226608222346, "grad_norm": 1.7252259254455566, "learning_rate": 7.298194909238413e-07, "loss": 0.8943, "step": 12735 }, { "epoch": 0.30683268707401073, "grad_norm": 1.7761805057525635, "learning_rate": 7.29692728932157e-07, "loss": 0.9436, "step": 12740 }, { "epoch": 0.306953108065798, "grad_norm": 1.742750883102417, "learning_rate": 7.295659669404726e-07, "loss": 0.9304, "step": 12745 }, { "epoch": 0.30707352905758534, "grad_norm": 1.8857698440551758, "learning_rate": 7.29439204948788e-07, "loss": 0.9366, "step": 12750 }, { "epoch": 0.3071939500493726, "grad_norm": 1.5318527221679688, "learning_rate": 7.293124429571037e-07, "loss": 0.9186, "step": 12755 }, { "epoch": 0.3073143710411599, "grad_norm": 1.5236737728118896, "learning_rate": 7.291856809654193e-07, "loss": 0.9085, "step": 12760 }, { "epoch": 0.30743479203294716, "grad_norm": 1.7529305219650269, "learning_rate": 7.290589189737349e-07, "loss": 0.8909, "step": 12765 }, { "epoch": 0.3075552130247345, "grad_norm": 1.4512908458709717, "learning_rate": 7.289321569820504e-07, "loss": 0.9263, "step": 12770 }, { "epoch": 0.30767563401652176, "grad_norm": 1.7084128856658936, "learning_rate": 7.288053949903661e-07, "loss": 0.9624, "step": 12775 }, { "epoch": 0.30779605500830903, "grad_norm": 1.4374417066574097, "learning_rate": 7.286786329986816e-07, "loss": 0.9476, "step": 12780 }, { "epoch": 0.30791647600009636, "grad_norm": 1.4318782091140747, "learning_rate": 7.285518710069972e-07, "loss": 0.9191, "step": 12785 }, { "epoch": 0.30803689699188364, "grad_norm": 1.5451619625091553, "learning_rate": 7.284251090153129e-07, "loss": 0.8945, "step": 12790 }, { "epoch": 0.3081573179836709, "grad_norm": 1.6542329788208008, "learning_rate": 7.282983470236283e-07, "loss": 0.8807, "step": 12795 }, { "epoch": 0.3082777389754582, "grad_norm": 1.6969656944274902, "learning_rate": 7.28171585031944e-07, "loss": 0.9141, "step": 12800 }, { "epoch": 0.3083981599672455, "grad_norm": 1.467103123664856, "learning_rate": 7.280448230402596e-07, "loss": 0.9163, "step": 12805 }, { "epoch": 0.3085185809590328, "grad_norm": 1.5035455226898193, "learning_rate": 7.279180610485751e-07, "loss": 0.9495, "step": 12810 }, { "epoch": 0.30863900195082006, "grad_norm": 1.504870891571045, "learning_rate": 7.277912990568908e-07, "loss": 0.8743, "step": 12815 }, { "epoch": 0.30875942294260733, "grad_norm": 1.5313233137130737, "learning_rate": 7.276645370652063e-07, "loss": 0.907, "step": 12820 }, { "epoch": 0.30887984393439466, "grad_norm": 1.7336504459381104, "learning_rate": 7.275377750735219e-07, "loss": 0.9576, "step": 12825 }, { "epoch": 0.30900026492618193, "grad_norm": 1.4466485977172852, "learning_rate": 7.274110130818375e-07, "loss": 0.932, "step": 12830 }, { "epoch": 0.3091206859179692, "grad_norm": 1.5821729898452759, "learning_rate": 7.272842510901532e-07, "loss": 0.8947, "step": 12835 }, { "epoch": 0.30924110690975654, "grad_norm": 1.4757659435272217, "learning_rate": 7.271574890984686e-07, "loss": 0.9697, "step": 12840 }, { "epoch": 0.3093615279015438, "grad_norm": 1.3930412530899048, "learning_rate": 7.270307271067842e-07, "loss": 0.8832, "step": 12845 }, { "epoch": 0.3094819488933311, "grad_norm": 1.5699188709259033, "learning_rate": 7.269039651150999e-07, "loss": 0.9627, "step": 12850 }, { "epoch": 0.30960236988511836, "grad_norm": 1.5719423294067383, "learning_rate": 7.267772031234154e-07, "loss": 0.9564, "step": 12855 }, { "epoch": 0.3097227908769057, "grad_norm": 1.3833011388778687, "learning_rate": 7.266504411317311e-07, "loss": 0.8952, "step": 12860 }, { "epoch": 0.30984321186869296, "grad_norm": 1.3970744609832764, "learning_rate": 7.265236791400466e-07, "loss": 0.881, "step": 12865 }, { "epoch": 0.30996363286048023, "grad_norm": 1.5236992835998535, "learning_rate": 7.263969171483621e-07, "loss": 0.9615, "step": 12870 }, { "epoch": 0.3100840538522675, "grad_norm": 1.4951473474502563, "learning_rate": 7.262701551566778e-07, "loss": 0.9406, "step": 12875 }, { "epoch": 0.31020447484405483, "grad_norm": 1.4671101570129395, "learning_rate": 7.261433931649934e-07, "loss": 0.9372, "step": 12880 }, { "epoch": 0.3103248958358421, "grad_norm": 1.4430850744247437, "learning_rate": 7.260166311733089e-07, "loss": 0.9654, "step": 12885 }, { "epoch": 0.3104453168276294, "grad_norm": 1.451224684715271, "learning_rate": 7.258898691816245e-07, "loss": 0.9165, "step": 12890 }, { "epoch": 0.31056573781941665, "grad_norm": 1.6193621158599854, "learning_rate": 7.257631071899402e-07, "loss": 0.9557, "step": 12895 }, { "epoch": 0.310686158811204, "grad_norm": 1.431778073310852, "learning_rate": 7.256363451982557e-07, "loss": 0.9045, "step": 12900 }, { "epoch": 0.31080657980299126, "grad_norm": 1.5868239402770996, "learning_rate": 7.255095832065713e-07, "loss": 0.9615, "step": 12905 }, { "epoch": 0.31092700079477853, "grad_norm": 1.6796597242355347, "learning_rate": 7.253828212148869e-07, "loss": 0.9218, "step": 12910 }, { "epoch": 0.31104742178656586, "grad_norm": 1.586971402168274, "learning_rate": 7.252560592232024e-07, "loss": 0.9413, "step": 12915 }, { "epoch": 0.31116784277835313, "grad_norm": 1.555291771888733, "learning_rate": 7.251292972315181e-07, "loss": 0.8944, "step": 12920 }, { "epoch": 0.3112882637701404, "grad_norm": 1.624487042427063, "learning_rate": 7.250025352398337e-07, "loss": 0.9409, "step": 12925 }, { "epoch": 0.3114086847619277, "grad_norm": 1.8175469636917114, "learning_rate": 7.248757732481492e-07, "loss": 0.8911, "step": 12930 }, { "epoch": 0.311529105753715, "grad_norm": 1.4849705696105957, "learning_rate": 7.247490112564648e-07, "loss": 0.927, "step": 12935 }, { "epoch": 0.3116495267455023, "grad_norm": 1.5028043985366821, "learning_rate": 7.246222492647804e-07, "loss": 0.9222, "step": 12940 }, { "epoch": 0.31176994773728955, "grad_norm": 1.5761449337005615, "learning_rate": 7.24495487273096e-07, "loss": 0.9601, "step": 12945 }, { "epoch": 0.31189036872907683, "grad_norm": 1.5496820211410522, "learning_rate": 7.243687252814116e-07, "loss": 0.9189, "step": 12950 }, { "epoch": 0.31201078972086416, "grad_norm": 1.502267599105835, "learning_rate": 7.242419632897271e-07, "loss": 0.8692, "step": 12955 }, { "epoch": 0.31213121071265143, "grad_norm": 1.561307668685913, "learning_rate": 7.241152012980427e-07, "loss": 0.9263, "step": 12960 }, { "epoch": 0.3122516317044387, "grad_norm": 1.597617268562317, "learning_rate": 7.239884393063583e-07, "loss": 0.888, "step": 12965 }, { "epoch": 0.31237205269622603, "grad_norm": 1.7717195749282837, "learning_rate": 7.23861677314674e-07, "loss": 0.893, "step": 12970 }, { "epoch": 0.3124924736880133, "grad_norm": 1.3559247255325317, "learning_rate": 7.237349153229895e-07, "loss": 0.9217, "step": 12975 }, { "epoch": 0.3126128946798006, "grad_norm": 1.5530009269714355, "learning_rate": 7.236081533313051e-07, "loss": 0.9354, "step": 12980 }, { "epoch": 0.31273331567158785, "grad_norm": 1.5920103788375854, "learning_rate": 7.234813913396207e-07, "loss": 0.906, "step": 12985 }, { "epoch": 0.3128537366633752, "grad_norm": 1.5397907495498657, "learning_rate": 7.233546293479362e-07, "loss": 0.8861, "step": 12990 }, { "epoch": 0.31297415765516245, "grad_norm": 1.7374628782272339, "learning_rate": 7.232278673562519e-07, "loss": 0.9622, "step": 12995 }, { "epoch": 0.31309457864694973, "grad_norm": 1.6311469078063965, "learning_rate": 7.231011053645675e-07, "loss": 0.9128, "step": 13000 }, { "epoch": 0.313214999638737, "grad_norm": 1.3971755504608154, "learning_rate": 7.22974343372883e-07, "loss": 0.9522, "step": 13005 }, { "epoch": 0.31333542063052433, "grad_norm": 1.5353749990463257, "learning_rate": 7.228475813811986e-07, "loss": 0.9066, "step": 13010 }, { "epoch": 0.3134558416223116, "grad_norm": 1.4577460289001465, "learning_rate": 7.227208193895143e-07, "loss": 0.9182, "step": 13015 }, { "epoch": 0.3135762626140989, "grad_norm": 1.6157535314559937, "learning_rate": 7.225940573978298e-07, "loss": 0.904, "step": 13020 }, { "epoch": 0.31369668360588615, "grad_norm": 1.75568687915802, "learning_rate": 7.224672954061453e-07, "loss": 0.9106, "step": 13025 }, { "epoch": 0.3138171045976735, "grad_norm": 1.5280760526657104, "learning_rate": 7.22340533414461e-07, "loss": 0.923, "step": 13030 }, { "epoch": 0.31393752558946075, "grad_norm": 1.4735020399093628, "learning_rate": 7.222137714227765e-07, "loss": 0.914, "step": 13035 }, { "epoch": 0.314057946581248, "grad_norm": 1.794739842414856, "learning_rate": 7.220870094310922e-07, "loss": 0.9401, "step": 13040 }, { "epoch": 0.31417836757303536, "grad_norm": 1.6184120178222656, "learning_rate": 7.219602474394078e-07, "loss": 0.9148, "step": 13045 }, { "epoch": 0.31429878856482263, "grad_norm": 1.521453857421875, "learning_rate": 7.218334854477232e-07, "loss": 0.9631, "step": 13050 }, { "epoch": 0.3144192095566099, "grad_norm": 1.5748240947723389, "learning_rate": 7.217067234560389e-07, "loss": 0.933, "step": 13055 }, { "epoch": 0.3145396305483972, "grad_norm": 1.590860366821289, "learning_rate": 7.215799614643545e-07, "loss": 0.8997, "step": 13060 }, { "epoch": 0.3146600515401845, "grad_norm": 1.5484305620193481, "learning_rate": 7.214531994726701e-07, "loss": 0.9044, "step": 13065 }, { "epoch": 0.3147804725319718, "grad_norm": 1.5679107904434204, "learning_rate": 7.213264374809856e-07, "loss": 0.8809, "step": 13070 }, { "epoch": 0.31490089352375905, "grad_norm": 1.368302345275879, "learning_rate": 7.211996754893012e-07, "loss": 0.9278, "step": 13075 }, { "epoch": 0.3150213145155463, "grad_norm": 1.55647873878479, "learning_rate": 7.210729134976169e-07, "loss": 0.9217, "step": 13080 }, { "epoch": 0.31514173550733365, "grad_norm": 1.5681729316711426, "learning_rate": 7.209461515059324e-07, "loss": 0.9554, "step": 13085 }, { "epoch": 0.3152621564991209, "grad_norm": 1.511460781097412, "learning_rate": 7.208193895142481e-07, "loss": 0.9247, "step": 13090 }, { "epoch": 0.3153825774909082, "grad_norm": 1.5184860229492188, "learning_rate": 7.206926275225636e-07, "loss": 0.8765, "step": 13095 }, { "epoch": 0.31550299848269553, "grad_norm": 1.7019352912902832, "learning_rate": 7.205658655308792e-07, "loss": 0.9122, "step": 13100 }, { "epoch": 0.3156234194744828, "grad_norm": 1.5765482187271118, "learning_rate": 7.204391035391948e-07, "loss": 0.9133, "step": 13105 }, { "epoch": 0.3157438404662701, "grad_norm": 1.5094279050827026, "learning_rate": 7.203123415475104e-07, "loss": 0.9337, "step": 13110 }, { "epoch": 0.31586426145805735, "grad_norm": 1.4930437803268433, "learning_rate": 7.20185579555826e-07, "loss": 0.9392, "step": 13115 }, { "epoch": 0.3159846824498447, "grad_norm": 1.6247256994247437, "learning_rate": 7.200588175641415e-07, "loss": 0.9644, "step": 13120 }, { "epoch": 0.31610510344163195, "grad_norm": 2.296663522720337, "learning_rate": 7.199320555724572e-07, "loss": 0.9872, "step": 13125 }, { "epoch": 0.3162255244334192, "grad_norm": 1.671225666999817, "learning_rate": 7.198052935807727e-07, "loss": 0.9924, "step": 13130 }, { "epoch": 0.3163459454252065, "grad_norm": 1.6024426221847534, "learning_rate": 7.196785315890883e-07, "loss": 0.9553, "step": 13135 }, { "epoch": 0.3164663664169938, "grad_norm": 1.5475425720214844, "learning_rate": 7.195517695974039e-07, "loss": 0.913, "step": 13140 }, { "epoch": 0.3165867874087811, "grad_norm": 1.7463332414627075, "learning_rate": 7.194250076057194e-07, "loss": 0.9436, "step": 13145 }, { "epoch": 0.3167072084005684, "grad_norm": 1.461932897567749, "learning_rate": 7.192982456140351e-07, "loss": 0.9176, "step": 13150 }, { "epoch": 0.3168276293923557, "grad_norm": 1.4646632671356201, "learning_rate": 7.191714836223507e-07, "loss": 0.9003, "step": 13155 }, { "epoch": 0.316948050384143, "grad_norm": 1.5874431133270264, "learning_rate": 7.190447216306663e-07, "loss": 0.9587, "step": 13160 }, { "epoch": 0.31706847137593025, "grad_norm": 1.8062708377838135, "learning_rate": 7.189179596389818e-07, "loss": 0.921, "step": 13165 }, { "epoch": 0.3171888923677175, "grad_norm": 1.5187337398529053, "learning_rate": 7.187911976472974e-07, "loss": 0.9286, "step": 13170 }, { "epoch": 0.31730931335950485, "grad_norm": 1.4751627445220947, "learning_rate": 7.18664435655613e-07, "loss": 0.8909, "step": 13175 }, { "epoch": 0.3174297343512921, "grad_norm": 1.4710909128189087, "learning_rate": 7.185376736639286e-07, "loss": 0.8811, "step": 13180 }, { "epoch": 0.3175501553430794, "grad_norm": 1.7347071170806885, "learning_rate": 7.184109116722443e-07, "loss": 0.9272, "step": 13185 }, { "epoch": 0.31767057633486667, "grad_norm": 1.5981749296188354, "learning_rate": 7.182841496805597e-07, "loss": 0.9722, "step": 13190 }, { "epoch": 0.317790997326654, "grad_norm": 1.7842888832092285, "learning_rate": 7.181573876888753e-07, "loss": 0.9258, "step": 13195 }, { "epoch": 0.3179114183184413, "grad_norm": 1.4398106336593628, "learning_rate": 7.18030625697191e-07, "loss": 0.8923, "step": 13200 }, { "epoch": 0.31803183931022855, "grad_norm": 1.4745246171951294, "learning_rate": 7.179038637055065e-07, "loss": 0.9105, "step": 13205 }, { "epoch": 0.3181522603020158, "grad_norm": 1.5541917085647583, "learning_rate": 7.177771017138221e-07, "loss": 0.8613, "step": 13210 }, { "epoch": 0.31827268129380315, "grad_norm": 1.4687206745147705, "learning_rate": 7.176503397221377e-07, "loss": 0.9219, "step": 13215 }, { "epoch": 0.3183931022855904, "grad_norm": 1.6779239177703857, "learning_rate": 7.175235777304532e-07, "loss": 0.8582, "step": 13220 }, { "epoch": 0.3185135232773777, "grad_norm": 1.4923917055130005, "learning_rate": 7.173968157387689e-07, "loss": 0.9351, "step": 13225 }, { "epoch": 0.318633944269165, "grad_norm": 1.669708490371704, "learning_rate": 7.172700537470845e-07, "loss": 0.973, "step": 13230 }, { "epoch": 0.3187543652609523, "grad_norm": 1.9078813791275024, "learning_rate": 7.171432917554e-07, "loss": 0.9391, "step": 13235 }, { "epoch": 0.3188747862527396, "grad_norm": 1.4691177606582642, "learning_rate": 7.170165297637156e-07, "loss": 0.9232, "step": 13240 }, { "epoch": 0.31899520724452685, "grad_norm": 1.5904854536056519, "learning_rate": 7.168897677720313e-07, "loss": 0.9105, "step": 13245 }, { "epoch": 0.3191156282363142, "grad_norm": 1.7188918590545654, "learning_rate": 7.167630057803468e-07, "loss": 0.884, "step": 13250 }, { "epoch": 0.31923604922810145, "grad_norm": 1.4673360586166382, "learning_rate": 7.166362437886623e-07, "loss": 0.9632, "step": 13255 }, { "epoch": 0.3193564702198887, "grad_norm": 1.5699816942214966, "learning_rate": 7.16509481796978e-07, "loss": 0.9771, "step": 13260 }, { "epoch": 0.319476891211676, "grad_norm": 1.598327398300171, "learning_rate": 7.163827198052935e-07, "loss": 0.9507, "step": 13265 }, { "epoch": 0.3195973122034633, "grad_norm": 1.6673810482025146, "learning_rate": 7.162559578136092e-07, "loss": 0.9501, "step": 13270 }, { "epoch": 0.3197177331952506, "grad_norm": 1.5441638231277466, "learning_rate": 7.161291958219248e-07, "loss": 0.9359, "step": 13275 }, { "epoch": 0.31983815418703787, "grad_norm": 1.3806934356689453, "learning_rate": 7.160024338302402e-07, "loss": 0.8852, "step": 13280 }, { "epoch": 0.3199585751788252, "grad_norm": 1.5654473304748535, "learning_rate": 7.158756718385559e-07, "loss": 0.9018, "step": 13285 }, { "epoch": 0.3200789961706125, "grad_norm": 1.5995250940322876, "learning_rate": 7.157489098468715e-07, "loss": 0.9273, "step": 13290 }, { "epoch": 0.32019941716239975, "grad_norm": 1.6036864519119263, "learning_rate": 7.156221478551871e-07, "loss": 0.9197, "step": 13295 }, { "epoch": 0.320319838154187, "grad_norm": 1.4564919471740723, "learning_rate": 7.154953858635027e-07, "loss": 0.9494, "step": 13300 }, { "epoch": 0.32044025914597435, "grad_norm": 1.441354513168335, "learning_rate": 7.153686238718183e-07, "loss": 0.9084, "step": 13305 }, { "epoch": 0.3205606801377616, "grad_norm": 1.6930876970291138, "learning_rate": 7.152418618801338e-07, "loss": 0.9256, "step": 13310 }, { "epoch": 0.3206811011295489, "grad_norm": 1.5789000988006592, "learning_rate": 7.151150998884494e-07, "loss": 0.918, "step": 13315 }, { "epoch": 0.32080152212133617, "grad_norm": 1.6201459169387817, "learning_rate": 7.149883378967651e-07, "loss": 0.8801, "step": 13320 }, { "epoch": 0.3209219431131235, "grad_norm": 1.62224543094635, "learning_rate": 7.148615759050805e-07, "loss": 0.8973, "step": 13325 }, { "epoch": 0.32104236410491077, "grad_norm": 1.5501794815063477, "learning_rate": 7.147348139133962e-07, "loss": 0.9312, "step": 13330 }, { "epoch": 0.32116278509669804, "grad_norm": 1.4192733764648438, "learning_rate": 7.146080519217118e-07, "loss": 0.9125, "step": 13335 }, { "epoch": 0.3212832060884853, "grad_norm": 1.6199774742126465, "learning_rate": 7.144812899300273e-07, "loss": 0.9498, "step": 13340 }, { "epoch": 0.32140362708027265, "grad_norm": 1.7698010206222534, "learning_rate": 7.14354527938343e-07, "loss": 0.9673, "step": 13345 }, { "epoch": 0.3215240480720599, "grad_norm": 1.632602572441101, "learning_rate": 7.142277659466585e-07, "loss": 0.8909, "step": 13350 }, { "epoch": 0.3216444690638472, "grad_norm": 1.4607270956039429, "learning_rate": 7.141010039549741e-07, "loss": 0.8579, "step": 13355 }, { "epoch": 0.3217648900556345, "grad_norm": 1.6992939710617065, "learning_rate": 7.139742419632897e-07, "loss": 0.9084, "step": 13360 }, { "epoch": 0.3218853110474218, "grad_norm": 1.440292239189148, "learning_rate": 7.138474799716054e-07, "loss": 0.8978, "step": 13365 }, { "epoch": 0.32200573203920907, "grad_norm": 1.411969542503357, "learning_rate": 7.137207179799208e-07, "loss": 0.9203, "step": 13370 }, { "epoch": 0.32212615303099634, "grad_norm": 1.7011206150054932, "learning_rate": 7.135939559882364e-07, "loss": 0.9135, "step": 13375 }, { "epoch": 0.32224657402278367, "grad_norm": 1.639039158821106, "learning_rate": 7.134671939965521e-07, "loss": 0.9575, "step": 13380 }, { "epoch": 0.32236699501457095, "grad_norm": 1.7481632232666016, "learning_rate": 7.133404320048676e-07, "loss": 0.9525, "step": 13385 }, { "epoch": 0.3224874160063582, "grad_norm": 1.509905219078064, "learning_rate": 7.132136700131833e-07, "loss": 0.9806, "step": 13390 }, { "epoch": 0.3226078369981455, "grad_norm": 1.5179061889648438, "learning_rate": 7.130869080214988e-07, "loss": 0.9796, "step": 13395 }, { "epoch": 0.3227282579899328, "grad_norm": 1.6003575325012207, "learning_rate": 7.129601460298143e-07, "loss": 0.9339, "step": 13400 }, { "epoch": 0.3228486789817201, "grad_norm": 1.5749802589416504, "learning_rate": 7.1283338403813e-07, "loss": 0.9196, "step": 13405 }, { "epoch": 0.32296909997350737, "grad_norm": 1.4835052490234375, "learning_rate": 7.127066220464456e-07, "loss": 0.9415, "step": 13410 }, { "epoch": 0.3230895209652947, "grad_norm": 1.6921086311340332, "learning_rate": 7.125798600547611e-07, "loss": 0.8908, "step": 13415 }, { "epoch": 0.32320994195708197, "grad_norm": 1.3966692686080933, "learning_rate": 7.124530980630767e-07, "loss": 0.9145, "step": 13420 }, { "epoch": 0.32333036294886924, "grad_norm": 1.4754492044448853, "learning_rate": 7.123263360713924e-07, "loss": 0.898, "step": 13425 }, { "epoch": 0.3234507839406565, "grad_norm": 1.5174185037612915, "learning_rate": 7.121995740797079e-07, "loss": 0.8697, "step": 13430 }, { "epoch": 0.32357120493244385, "grad_norm": 1.631089210510254, "learning_rate": 7.120728120880235e-07, "loss": 0.9433, "step": 13435 }, { "epoch": 0.3236916259242311, "grad_norm": 1.4840291738510132, "learning_rate": 7.119460500963391e-07, "loss": 0.932, "step": 13440 }, { "epoch": 0.3238120469160184, "grad_norm": 1.6019710302352905, "learning_rate": 7.118192881046546e-07, "loss": 0.9055, "step": 13445 }, { "epoch": 0.32393246790780567, "grad_norm": 1.71696937084198, "learning_rate": 7.116925261129703e-07, "loss": 0.9434, "step": 13450 }, { "epoch": 0.324052888899593, "grad_norm": 1.519882082939148, "learning_rate": 7.115657641212859e-07, "loss": 0.9167, "step": 13455 }, { "epoch": 0.32417330989138027, "grad_norm": 1.4360700845718384, "learning_rate": 7.114390021296014e-07, "loss": 0.9351, "step": 13460 }, { "epoch": 0.32429373088316754, "grad_norm": 1.5094038248062134, "learning_rate": 7.11312240137917e-07, "loss": 0.9051, "step": 13465 }, { "epoch": 0.3244141518749548, "grad_norm": 1.4464014768600464, "learning_rate": 7.111854781462326e-07, "loss": 0.9252, "step": 13470 }, { "epoch": 0.32453457286674214, "grad_norm": 1.5232619047164917, "learning_rate": 7.110587161545482e-07, "loss": 0.8695, "step": 13475 }, { "epoch": 0.3246549938585294, "grad_norm": 1.4510326385498047, "learning_rate": 7.109319541628638e-07, "loss": 0.9222, "step": 13480 }, { "epoch": 0.3247754148503167, "grad_norm": 1.5552394390106201, "learning_rate": 7.108051921711793e-07, "loss": 0.8811, "step": 13485 }, { "epoch": 0.324895835842104, "grad_norm": 1.6308211088180542, "learning_rate": 7.106784301794949e-07, "loss": 0.8948, "step": 13490 }, { "epoch": 0.3250162568338913, "grad_norm": 1.4746640920639038, "learning_rate": 7.105516681878105e-07, "loss": 0.9311, "step": 13495 }, { "epoch": 0.32513667782567857, "grad_norm": 1.5032947063446045, "learning_rate": 7.104249061961262e-07, "loss": 0.9029, "step": 13500 }, { "epoch": 0.32525709881746584, "grad_norm": 1.6702075004577637, "learning_rate": 7.102981442044417e-07, "loss": 0.9339, "step": 13505 }, { "epoch": 0.32537751980925317, "grad_norm": 1.8243368864059448, "learning_rate": 7.101713822127573e-07, "loss": 0.9171, "step": 13510 }, { "epoch": 0.32549794080104044, "grad_norm": 1.6092954874038696, "learning_rate": 7.100446202210729e-07, "loss": 0.9102, "step": 13515 }, { "epoch": 0.3256183617928277, "grad_norm": 1.7775665521621704, "learning_rate": 7.099178582293884e-07, "loss": 0.9326, "step": 13520 }, { "epoch": 0.325738782784615, "grad_norm": 1.5000969171524048, "learning_rate": 7.097910962377041e-07, "loss": 0.8927, "step": 13525 }, { "epoch": 0.3258592037764023, "grad_norm": 2.0762746334075928, "learning_rate": 7.096643342460197e-07, "loss": 0.8871, "step": 13530 }, { "epoch": 0.3259796247681896, "grad_norm": 1.554837942123413, "learning_rate": 7.095375722543352e-07, "loss": 0.9493, "step": 13535 }, { "epoch": 0.32610004575997686, "grad_norm": 1.3311814069747925, "learning_rate": 7.094108102626508e-07, "loss": 0.9113, "step": 13540 }, { "epoch": 0.3262204667517642, "grad_norm": 1.598663568496704, "learning_rate": 7.092840482709665e-07, "loss": 0.9669, "step": 13545 }, { "epoch": 0.32634088774355147, "grad_norm": 1.6727991104125977, "learning_rate": 7.09157286279282e-07, "loss": 0.8797, "step": 13550 }, { "epoch": 0.32646130873533874, "grad_norm": 1.5206135511398315, "learning_rate": 7.090305242875975e-07, "loss": 0.9103, "step": 13555 }, { "epoch": 0.326581729727126, "grad_norm": 1.4802076816558838, "learning_rate": 7.089037622959132e-07, "loss": 0.9366, "step": 13560 }, { "epoch": 0.32670215071891334, "grad_norm": 1.6493333578109741, "learning_rate": 7.087770003042287e-07, "loss": 0.8865, "step": 13565 }, { "epoch": 0.3268225717107006, "grad_norm": 1.5122278928756714, "learning_rate": 7.086502383125444e-07, "loss": 0.9311, "step": 13570 }, { "epoch": 0.3269429927024879, "grad_norm": 1.5008400678634644, "learning_rate": 7.0852347632086e-07, "loss": 0.9301, "step": 13575 }, { "epoch": 0.32706341369427516, "grad_norm": 1.6388431787490845, "learning_rate": 7.083967143291754e-07, "loss": 0.9092, "step": 13580 }, { "epoch": 0.3271838346860625, "grad_norm": 1.8835076093673706, "learning_rate": 7.082699523374911e-07, "loss": 0.8953, "step": 13585 }, { "epoch": 0.32730425567784976, "grad_norm": 1.381044864654541, "learning_rate": 7.081431903458067e-07, "loss": 0.9351, "step": 13590 }, { "epoch": 0.32742467666963704, "grad_norm": 1.7384233474731445, "learning_rate": 7.080164283541223e-07, "loss": 0.8984, "step": 13595 }, { "epoch": 0.32754509766142437, "grad_norm": 1.2871006727218628, "learning_rate": 7.078896663624378e-07, "loss": 0.8785, "step": 13600 }, { "epoch": 0.32766551865321164, "grad_norm": 1.4323610067367554, "learning_rate": 7.077629043707534e-07, "loss": 0.9649, "step": 13605 }, { "epoch": 0.3277859396449989, "grad_norm": 1.8085834980010986, "learning_rate": 7.07636142379069e-07, "loss": 0.956, "step": 13610 }, { "epoch": 0.3279063606367862, "grad_norm": 1.494591236114502, "learning_rate": 7.075093803873846e-07, "loss": 0.8899, "step": 13615 }, { "epoch": 0.3280267816285735, "grad_norm": 1.4227632284164429, "learning_rate": 7.073826183957003e-07, "loss": 0.948, "step": 13620 }, { "epoch": 0.3281472026203608, "grad_norm": 1.6277165412902832, "learning_rate": 7.072558564040157e-07, "loss": 0.9471, "step": 13625 }, { "epoch": 0.32826762361214806, "grad_norm": 1.4908299446105957, "learning_rate": 7.071290944123314e-07, "loss": 0.9433, "step": 13630 }, { "epoch": 0.32838804460393534, "grad_norm": 1.476320505142212, "learning_rate": 7.07002332420647e-07, "loss": 0.9705, "step": 13635 }, { "epoch": 0.32850846559572267, "grad_norm": 1.515441656112671, "learning_rate": 7.068755704289625e-07, "loss": 0.9863, "step": 13640 }, { "epoch": 0.32862888658750994, "grad_norm": 1.490149736404419, "learning_rate": 7.067488084372782e-07, "loss": 0.911, "step": 13645 }, { "epoch": 0.3287493075792972, "grad_norm": 1.5658856630325317, "learning_rate": 7.066220464455937e-07, "loss": 0.942, "step": 13650 }, { "epoch": 0.3288697285710845, "grad_norm": 1.8892005681991577, "learning_rate": 7.064952844539093e-07, "loss": 0.9446, "step": 13655 }, { "epoch": 0.3289901495628718, "grad_norm": 1.5404114723205566, "learning_rate": 7.063685224622249e-07, "loss": 0.926, "step": 13660 }, { "epoch": 0.3291105705546591, "grad_norm": 1.7159450054168701, "learning_rate": 7.062417604705405e-07, "loss": 0.9088, "step": 13665 }, { "epoch": 0.32923099154644636, "grad_norm": 1.5221284627914429, "learning_rate": 7.06114998478856e-07, "loss": 0.9194, "step": 13670 }, { "epoch": 0.3293514125382337, "grad_norm": 1.5362894535064697, "learning_rate": 7.059882364871716e-07, "loss": 0.9113, "step": 13675 }, { "epoch": 0.32947183353002096, "grad_norm": 1.454923391342163, "learning_rate": 7.058614744954873e-07, "loss": 0.8754, "step": 13680 }, { "epoch": 0.32959225452180824, "grad_norm": 1.5516047477722168, "learning_rate": 7.057347125038028e-07, "loss": 0.914, "step": 13685 }, { "epoch": 0.3297126755135955, "grad_norm": 1.3942458629608154, "learning_rate": 7.056079505121185e-07, "loss": 0.9446, "step": 13690 }, { "epoch": 0.32983309650538284, "grad_norm": 1.4205833673477173, "learning_rate": 7.05481188520434e-07, "loss": 0.9282, "step": 13695 }, { "epoch": 0.3299535174971701, "grad_norm": 1.4916503429412842, "learning_rate": 7.053544265287495e-07, "loss": 0.8933, "step": 13700 }, { "epoch": 0.3300739384889574, "grad_norm": 1.4028732776641846, "learning_rate": 7.052276645370652e-07, "loss": 0.921, "step": 13705 }, { "epoch": 0.33019435948074466, "grad_norm": 1.5253241062164307, "learning_rate": 7.051009025453808e-07, "loss": 0.942, "step": 13710 }, { "epoch": 0.330314780472532, "grad_norm": 1.5972962379455566, "learning_rate": 7.049741405536963e-07, "loss": 0.9471, "step": 13715 }, { "epoch": 0.33043520146431926, "grad_norm": 1.4718220233917236, "learning_rate": 7.048473785620119e-07, "loss": 0.9449, "step": 13720 }, { "epoch": 0.33055562245610653, "grad_norm": 1.5282396078109741, "learning_rate": 7.047206165703275e-07, "loss": 0.9375, "step": 13725 }, { "epoch": 0.33067604344789386, "grad_norm": 1.509559988975525, "learning_rate": 7.045938545786431e-07, "loss": 0.9044, "step": 13730 }, { "epoch": 0.33079646443968114, "grad_norm": 1.661907434463501, "learning_rate": 7.044670925869587e-07, "loss": 0.9749, "step": 13735 }, { "epoch": 0.3309168854314684, "grad_norm": 1.8139874935150146, "learning_rate": 7.043403305952743e-07, "loss": 0.8787, "step": 13740 }, { "epoch": 0.3310373064232557, "grad_norm": 1.4727627038955688, "learning_rate": 7.042135686035898e-07, "loss": 0.8722, "step": 13745 }, { "epoch": 0.331157727415043, "grad_norm": 1.3403947353363037, "learning_rate": 7.040868066119054e-07, "loss": 0.9231, "step": 13750 }, { "epoch": 0.3312781484068303, "grad_norm": 1.4505927562713623, "learning_rate": 7.039600446202211e-07, "loss": 0.8517, "step": 13755 }, { "epoch": 0.33139856939861756, "grad_norm": 1.5560884475708008, "learning_rate": 7.038332826285366e-07, "loss": 0.9006, "step": 13760 }, { "epoch": 0.33151899039040483, "grad_norm": 1.6754164695739746, "learning_rate": 7.037065206368522e-07, "loss": 0.9583, "step": 13765 }, { "epoch": 0.33163941138219216, "grad_norm": 1.5612949132919312, "learning_rate": 7.035797586451678e-07, "loss": 0.9091, "step": 13770 }, { "epoch": 0.33175983237397944, "grad_norm": 1.49845290184021, "learning_rate": 7.034529966534834e-07, "loss": 0.9324, "step": 13775 }, { "epoch": 0.3318802533657667, "grad_norm": 1.5018976926803589, "learning_rate": 7.03326234661799e-07, "loss": 0.9384, "step": 13780 }, { "epoch": 0.332000674357554, "grad_norm": 1.5326520204544067, "learning_rate": 7.031994726701145e-07, "loss": 0.8921, "step": 13785 }, { "epoch": 0.3321210953493413, "grad_norm": 1.4276926517486572, "learning_rate": 7.030727106784301e-07, "loss": 0.9469, "step": 13790 }, { "epoch": 0.3322415163411286, "grad_norm": 1.6255600452423096, "learning_rate": 7.029459486867457e-07, "loss": 0.9313, "step": 13795 }, { "epoch": 0.33236193733291586, "grad_norm": 1.544748067855835, "learning_rate": 7.028191866950614e-07, "loss": 0.9412, "step": 13800 }, { "epoch": 0.3324823583247032, "grad_norm": 1.56434965133667, "learning_rate": 7.026924247033769e-07, "loss": 0.9771, "step": 13805 }, { "epoch": 0.33260277931649046, "grad_norm": 1.5131580829620361, "learning_rate": 7.025656627116924e-07, "loss": 0.91, "step": 13810 }, { "epoch": 0.33272320030827773, "grad_norm": 1.5495069026947021, "learning_rate": 7.024389007200081e-07, "loss": 0.921, "step": 13815 }, { "epoch": 0.332843621300065, "grad_norm": 1.6015516519546509, "learning_rate": 7.023121387283236e-07, "loss": 0.8956, "step": 13820 }, { "epoch": 0.33296404229185234, "grad_norm": 1.6354554891586304, "learning_rate": 7.021853767366393e-07, "loss": 0.9254, "step": 13825 }, { "epoch": 0.3330844632836396, "grad_norm": 1.6220519542694092, "learning_rate": 7.020586147449549e-07, "loss": 0.9387, "step": 13830 }, { "epoch": 0.3332048842754269, "grad_norm": 1.7443767786026, "learning_rate": 7.019318527532704e-07, "loss": 0.9206, "step": 13835 }, { "epoch": 0.33332530526721416, "grad_norm": 1.4180527925491333, "learning_rate": 7.01805090761586e-07, "loss": 0.9414, "step": 13840 }, { "epoch": 0.3334457262590015, "grad_norm": 1.4445128440856934, "learning_rate": 7.016783287699016e-07, "loss": 0.9222, "step": 13845 }, { "epoch": 0.33356614725078876, "grad_norm": 1.522389531135559, "learning_rate": 7.015515667782172e-07, "loss": 0.9301, "step": 13850 }, { "epoch": 0.33368656824257603, "grad_norm": 1.559030294418335, "learning_rate": 7.014248047865327e-07, "loss": 0.9337, "step": 13855 }, { "epoch": 0.33380698923436336, "grad_norm": 1.6620439291000366, "learning_rate": 7.012980427948484e-07, "loss": 0.8898, "step": 13860 }, { "epoch": 0.33392741022615063, "grad_norm": 1.3690235614776611, "learning_rate": 7.011712808031639e-07, "loss": 0.939, "step": 13865 }, { "epoch": 0.3340478312179379, "grad_norm": 1.57186758518219, "learning_rate": 7.010445188114795e-07, "loss": 0.9139, "step": 13870 }, { "epoch": 0.3341682522097252, "grad_norm": 1.8457096815109253, "learning_rate": 7.009177568197952e-07, "loss": 0.9582, "step": 13875 }, { "epoch": 0.3342886732015125, "grad_norm": 1.5629444122314453, "learning_rate": 7.007909948281106e-07, "loss": 0.9404, "step": 13880 }, { "epoch": 0.3344090941932998, "grad_norm": 1.7172342538833618, "learning_rate": 7.006642328364263e-07, "loss": 0.8399, "step": 13885 }, { "epoch": 0.33452951518508706, "grad_norm": 1.6273506879806519, "learning_rate": 7.005374708447419e-07, "loss": 0.8748, "step": 13890 }, { "epoch": 0.33464993617687433, "grad_norm": 1.6080079078674316, "learning_rate": 7.004107088530575e-07, "loss": 0.9539, "step": 13895 }, { "epoch": 0.33477035716866166, "grad_norm": 1.5368893146514893, "learning_rate": 7.00283946861373e-07, "loss": 0.9064, "step": 13900 }, { "epoch": 0.33489077816044893, "grad_norm": 1.4585624933242798, "learning_rate": 7.001571848696886e-07, "loss": 0.9208, "step": 13905 }, { "epoch": 0.3350111991522362, "grad_norm": 1.532059669494629, "learning_rate": 7.000304228780042e-07, "loss": 0.9143, "step": 13910 }, { "epoch": 0.3351316201440235, "grad_norm": 1.5856126546859741, "learning_rate": 6.999036608863198e-07, "loss": 0.9381, "step": 13915 }, { "epoch": 0.3352520411358108, "grad_norm": 1.5512603521347046, "learning_rate": 6.997768988946355e-07, "loss": 0.9217, "step": 13920 }, { "epoch": 0.3353724621275981, "grad_norm": 1.4288643598556519, "learning_rate": 6.996501369029509e-07, "loss": 0.881, "step": 13925 }, { "epoch": 0.33549288311938535, "grad_norm": 1.4540021419525146, "learning_rate": 6.995233749112665e-07, "loss": 0.893, "step": 13930 }, { "epoch": 0.3356133041111727, "grad_norm": 1.457465410232544, "learning_rate": 6.993966129195822e-07, "loss": 0.9204, "step": 13935 }, { "epoch": 0.33573372510295996, "grad_norm": 1.5519061088562012, "learning_rate": 6.992698509278977e-07, "loss": 0.9038, "step": 13940 }, { "epoch": 0.33585414609474723, "grad_norm": 1.4745047092437744, "learning_rate": 6.991430889362134e-07, "loss": 0.9281, "step": 13945 }, { "epoch": 0.3359745670865345, "grad_norm": 1.4566423892974854, "learning_rate": 6.990163269445289e-07, "loss": 0.8641, "step": 13950 }, { "epoch": 0.33609498807832183, "grad_norm": 1.702518105506897, "learning_rate": 6.988895649528444e-07, "loss": 0.9277, "step": 13955 }, { "epoch": 0.3362154090701091, "grad_norm": 1.5833892822265625, "learning_rate": 6.987628029611601e-07, "loss": 0.8841, "step": 13960 }, { "epoch": 0.3363358300618964, "grad_norm": 1.4968024492263794, "learning_rate": 6.986360409694757e-07, "loss": 0.9054, "step": 13965 }, { "epoch": 0.33645625105368365, "grad_norm": 1.5771639347076416, "learning_rate": 6.985092789777912e-07, "loss": 0.9448, "step": 13970 }, { "epoch": 0.336576672045471, "grad_norm": 1.413447618484497, "learning_rate": 6.983825169861068e-07, "loss": 0.8788, "step": 13975 }, { "epoch": 0.33669709303725825, "grad_norm": 1.5518314838409424, "learning_rate": 6.982557549944225e-07, "loss": 0.8855, "step": 13980 }, { "epoch": 0.33681751402904553, "grad_norm": 1.6350971460342407, "learning_rate": 6.98128993002738e-07, "loss": 0.9442, "step": 13985 }, { "epoch": 0.33693793502083286, "grad_norm": 1.3646329641342163, "learning_rate": 6.980022310110536e-07, "loss": 0.912, "step": 13990 }, { "epoch": 0.33705835601262013, "grad_norm": 1.8672513961791992, "learning_rate": 6.978754690193692e-07, "loss": 0.9439, "step": 13995 }, { "epoch": 0.3371787770044074, "grad_norm": 1.4845424890518188, "learning_rate": 6.977487070276847e-07, "loss": 0.9131, "step": 14000 }, { "epoch": 0.3372991979961947, "grad_norm": 1.4043781757354736, "learning_rate": 6.976219450360004e-07, "loss": 0.9178, "step": 14005 }, { "epoch": 0.337419618987982, "grad_norm": 1.4757076501846313, "learning_rate": 6.97495183044316e-07, "loss": 0.925, "step": 14010 }, { "epoch": 0.3375400399797693, "grad_norm": 1.5454431772232056, "learning_rate": 6.973684210526314e-07, "loss": 0.9657, "step": 14015 }, { "epoch": 0.33766046097155655, "grad_norm": 1.502697229385376, "learning_rate": 6.972416590609471e-07, "loss": 0.9258, "step": 14020 }, { "epoch": 0.3377808819633438, "grad_norm": 1.6740292310714722, "learning_rate": 6.971148970692627e-07, "loss": 0.9542, "step": 14025 }, { "epoch": 0.33790130295513116, "grad_norm": 1.615235447883606, "learning_rate": 6.969881350775783e-07, "loss": 0.9191, "step": 14030 }, { "epoch": 0.33802172394691843, "grad_norm": 1.4535822868347168, "learning_rate": 6.968613730858939e-07, "loss": 0.9537, "step": 14035 }, { "epoch": 0.3381421449387057, "grad_norm": 1.4042657613754272, "learning_rate": 6.967346110942095e-07, "loss": 0.9199, "step": 14040 }, { "epoch": 0.33826256593049303, "grad_norm": 1.8075425624847412, "learning_rate": 6.96607849102525e-07, "loss": 0.924, "step": 14045 }, { "epoch": 0.3383829869222803, "grad_norm": 1.711000680923462, "learning_rate": 6.964810871108406e-07, "loss": 0.9265, "step": 14050 }, { "epoch": 0.3385034079140676, "grad_norm": 1.5463098287582397, "learning_rate": 6.963543251191563e-07, "loss": 0.9224, "step": 14055 }, { "epoch": 0.33862382890585485, "grad_norm": 1.6100355386734009, "learning_rate": 6.962275631274718e-07, "loss": 0.9438, "step": 14060 }, { "epoch": 0.3387442498976422, "grad_norm": 1.7484734058380127, "learning_rate": 6.961008011357874e-07, "loss": 0.9099, "step": 14065 }, { "epoch": 0.33886467088942945, "grad_norm": 1.6006505489349365, "learning_rate": 6.95974039144103e-07, "loss": 0.9214, "step": 14070 }, { "epoch": 0.3389850918812167, "grad_norm": 1.4968926906585693, "learning_rate": 6.958472771524185e-07, "loss": 0.942, "step": 14075 }, { "epoch": 0.339105512873004, "grad_norm": 1.4391860961914062, "learning_rate": 6.957205151607342e-07, "loss": 0.9237, "step": 14080 }, { "epoch": 0.33922593386479133, "grad_norm": 1.981737732887268, "learning_rate": 6.955937531690497e-07, "loss": 0.9219, "step": 14085 }, { "epoch": 0.3393463548565786, "grad_norm": 2.0144782066345215, "learning_rate": 6.954669911773653e-07, "loss": 0.953, "step": 14090 }, { "epoch": 0.3394667758483659, "grad_norm": 1.4978262186050415, "learning_rate": 6.953402291856809e-07, "loss": 0.8834, "step": 14095 }, { "epoch": 0.33958719684015315, "grad_norm": 1.5561473369598389, "learning_rate": 6.952134671939966e-07, "loss": 0.8911, "step": 14100 }, { "epoch": 0.3397076178319405, "grad_norm": 1.5292071104049683, "learning_rate": 6.950867052023122e-07, "loss": 0.9516, "step": 14105 }, { "epoch": 0.33982803882372775, "grad_norm": 1.6524728536605835, "learning_rate": 6.949599432106276e-07, "loss": 0.9669, "step": 14110 }, { "epoch": 0.339948459815515, "grad_norm": 1.861893653869629, "learning_rate": 6.948331812189433e-07, "loss": 0.8984, "step": 14115 }, { "epoch": 0.34006888080730235, "grad_norm": 1.8193475008010864, "learning_rate": 6.947064192272589e-07, "loss": 0.9429, "step": 14120 }, { "epoch": 0.3401893017990896, "grad_norm": 1.6743884086608887, "learning_rate": 6.945796572355745e-07, "loss": 0.9687, "step": 14125 }, { "epoch": 0.3403097227908769, "grad_norm": 1.641255497932434, "learning_rate": 6.944528952438901e-07, "loss": 0.9117, "step": 14130 }, { "epoch": 0.3404301437826642, "grad_norm": 1.6246029138565063, "learning_rate": 6.943261332522056e-07, "loss": 0.9413, "step": 14135 }, { "epoch": 0.3405505647744515, "grad_norm": 1.459546685218811, "learning_rate": 6.941993712605212e-07, "loss": 0.9272, "step": 14140 }, { "epoch": 0.3406709857662388, "grad_norm": 1.6321953535079956, "learning_rate": 6.940726092688368e-07, "loss": 0.938, "step": 14145 }, { "epoch": 0.34079140675802605, "grad_norm": 1.5907163619995117, "learning_rate": 6.939458472771525e-07, "loss": 0.9595, "step": 14150 }, { "epoch": 0.3409118277498133, "grad_norm": 1.456050157546997, "learning_rate": 6.938190852854679e-07, "loss": 0.9371, "step": 14155 }, { "epoch": 0.34103224874160065, "grad_norm": 1.67795729637146, "learning_rate": 6.936923232937836e-07, "loss": 0.9938, "step": 14160 }, { "epoch": 0.3411526697333879, "grad_norm": 1.7361115217208862, "learning_rate": 6.935655613020992e-07, "loss": 0.9387, "step": 14165 }, { "epoch": 0.3412730907251752, "grad_norm": 1.8264391422271729, "learning_rate": 6.934387993104147e-07, "loss": 0.9648, "step": 14170 }, { "epoch": 0.3413935117169625, "grad_norm": 1.6677944660186768, "learning_rate": 6.933120373187304e-07, "loss": 0.9156, "step": 14175 }, { "epoch": 0.3415139327087498, "grad_norm": 1.4905892610549927, "learning_rate": 6.931852753270459e-07, "loss": 0.8835, "step": 14180 }, { "epoch": 0.3416343537005371, "grad_norm": 1.5072036981582642, "learning_rate": 6.930585133353615e-07, "loss": 0.9174, "step": 14185 }, { "epoch": 0.34175477469232435, "grad_norm": 1.64654541015625, "learning_rate": 6.929317513436771e-07, "loss": 0.9023, "step": 14190 }, { "epoch": 0.3418751956841117, "grad_norm": 1.5622669458389282, "learning_rate": 6.928049893519927e-07, "loss": 0.9345, "step": 14195 }, { "epoch": 0.34199561667589895, "grad_norm": 1.562902808189392, "learning_rate": 6.926782273603082e-07, "loss": 0.8823, "step": 14200 }, { "epoch": 0.3421160376676862, "grad_norm": 1.4731025695800781, "learning_rate": 6.925514653686238e-07, "loss": 0.915, "step": 14205 }, { "epoch": 0.3422364586594735, "grad_norm": 1.7186492681503296, "learning_rate": 6.924247033769395e-07, "loss": 0.9277, "step": 14210 }, { "epoch": 0.3423568796512608, "grad_norm": 1.7451112270355225, "learning_rate": 6.92297941385255e-07, "loss": 0.9411, "step": 14215 }, { "epoch": 0.3424773006430481, "grad_norm": 1.4915034770965576, "learning_rate": 6.921711793935707e-07, "loss": 0.9142, "step": 14220 }, { "epoch": 0.3425977216348354, "grad_norm": 1.48759925365448, "learning_rate": 6.920444174018862e-07, "loss": 0.9394, "step": 14225 }, { "epoch": 0.34271814262662265, "grad_norm": 1.5974241495132446, "learning_rate": 6.919176554102017e-07, "loss": 0.8681, "step": 14230 }, { "epoch": 0.34283856361841, "grad_norm": 1.517372965812683, "learning_rate": 6.917908934185174e-07, "loss": 0.8845, "step": 14235 }, { "epoch": 0.34295898461019725, "grad_norm": 1.6111204624176025, "learning_rate": 6.91664131426833e-07, "loss": 0.9087, "step": 14240 }, { "epoch": 0.3430794056019845, "grad_norm": 1.4942491054534912, "learning_rate": 6.915373694351486e-07, "loss": 0.9885, "step": 14245 }, { "epoch": 0.34319982659377185, "grad_norm": 1.5210905075073242, "learning_rate": 6.914106074434641e-07, "loss": 0.9222, "step": 14250 }, { "epoch": 0.3433202475855591, "grad_norm": 1.6447272300720215, "learning_rate": 6.912838454517797e-07, "loss": 0.9016, "step": 14255 }, { "epoch": 0.3434406685773464, "grad_norm": 1.4414374828338623, "learning_rate": 6.911570834600953e-07, "loss": 0.9526, "step": 14260 }, { "epoch": 0.34356108956913367, "grad_norm": 1.6271892786026, "learning_rate": 6.910303214684109e-07, "loss": 0.9561, "step": 14265 }, { "epoch": 0.343681510560921, "grad_norm": 1.5519522428512573, "learning_rate": 6.909035594767265e-07, "loss": 0.8733, "step": 14270 }, { "epoch": 0.3438019315527083, "grad_norm": 1.8349835872650146, "learning_rate": 6.90776797485042e-07, "loss": 0.8791, "step": 14275 }, { "epoch": 0.34392235254449555, "grad_norm": 1.4303919076919556, "learning_rate": 6.906500354933577e-07, "loss": 0.9214, "step": 14280 }, { "epoch": 0.3440427735362828, "grad_norm": 1.4979641437530518, "learning_rate": 6.905232735016733e-07, "loss": 0.8764, "step": 14285 }, { "epoch": 0.34416319452807015, "grad_norm": 1.6719623804092407, "learning_rate": 6.903965115099888e-07, "loss": 0.93, "step": 14290 }, { "epoch": 0.3442836155198574, "grad_norm": 1.5701115131378174, "learning_rate": 6.902697495183044e-07, "loss": 0.8932, "step": 14295 }, { "epoch": 0.3444040365116447, "grad_norm": 1.6487075090408325, "learning_rate": 6.9014298752662e-07, "loss": 0.9413, "step": 14300 }, { "epoch": 0.344524457503432, "grad_norm": 1.575084924697876, "learning_rate": 6.900162255349356e-07, "loss": 0.9203, "step": 14305 }, { "epoch": 0.3446448784952193, "grad_norm": 1.462643027305603, "learning_rate": 6.898894635432512e-07, "loss": 0.9005, "step": 14310 }, { "epoch": 0.34476529948700657, "grad_norm": 1.448420763015747, "learning_rate": 6.897627015515668e-07, "loss": 0.9337, "step": 14315 }, { "epoch": 0.34488572047879384, "grad_norm": 1.6062064170837402, "learning_rate": 6.896359395598823e-07, "loss": 0.9557, "step": 14320 }, { "epoch": 0.3450061414705812, "grad_norm": 1.5350664854049683, "learning_rate": 6.895091775681979e-07, "loss": 0.9215, "step": 14325 }, { "epoch": 0.34512656246236845, "grad_norm": 1.4015288352966309, "learning_rate": 6.893824155765136e-07, "loss": 0.9488, "step": 14330 }, { "epoch": 0.3452469834541557, "grad_norm": 1.4283367395401, "learning_rate": 6.892556535848291e-07, "loss": 0.9227, "step": 14335 }, { "epoch": 0.345367404445943, "grad_norm": 1.5763317346572876, "learning_rate": 6.891288915931446e-07, "loss": 0.9458, "step": 14340 }, { "epoch": 0.3454878254377303, "grad_norm": 1.4506820440292358, "learning_rate": 6.890021296014603e-07, "loss": 0.9104, "step": 14345 }, { "epoch": 0.3456082464295176, "grad_norm": 1.586639165878296, "learning_rate": 6.888753676097758e-07, "loss": 0.9296, "step": 14350 }, { "epoch": 0.34572866742130487, "grad_norm": 1.5703184604644775, "learning_rate": 6.887486056180915e-07, "loss": 0.9198, "step": 14355 }, { "epoch": 0.34584908841309214, "grad_norm": 1.518678903579712, "learning_rate": 6.886218436264071e-07, "loss": 0.9037, "step": 14360 }, { "epoch": 0.34596950940487947, "grad_norm": 1.5263680219650269, "learning_rate": 6.884950816347226e-07, "loss": 0.9364, "step": 14365 }, { "epoch": 0.34608993039666675, "grad_norm": 1.6340490579605103, "learning_rate": 6.883683196430382e-07, "loss": 0.9527, "step": 14370 }, { "epoch": 0.346210351388454, "grad_norm": 1.4462147951126099, "learning_rate": 6.882415576513538e-07, "loss": 0.8904, "step": 14375 }, { "epoch": 0.34633077238024135, "grad_norm": 1.6212999820709229, "learning_rate": 6.881147956596694e-07, "loss": 0.942, "step": 14380 }, { "epoch": 0.3464511933720286, "grad_norm": 1.5607647895812988, "learning_rate": 6.879880336679849e-07, "loss": 0.9361, "step": 14385 }, { "epoch": 0.3465716143638159, "grad_norm": 1.485691785812378, "learning_rate": 6.878612716763006e-07, "loss": 0.896, "step": 14390 }, { "epoch": 0.34669203535560317, "grad_norm": 1.5243165493011475, "learning_rate": 6.877345096846161e-07, "loss": 0.9551, "step": 14395 }, { "epoch": 0.3468124563473905, "grad_norm": 1.5864940881729126, "learning_rate": 6.876077476929317e-07, "loss": 0.8871, "step": 14400 }, { "epoch": 0.34693287733917777, "grad_norm": 1.4279605150222778, "learning_rate": 6.874809857012474e-07, "loss": 0.9501, "step": 14405 }, { "epoch": 0.34705329833096504, "grad_norm": 1.622212290763855, "learning_rate": 6.873542237095628e-07, "loss": 0.8701, "step": 14410 }, { "epoch": 0.3471737193227523, "grad_norm": 1.7826794385910034, "learning_rate": 6.872274617178785e-07, "loss": 0.9761, "step": 14415 }, { "epoch": 0.34729414031453965, "grad_norm": 1.470369577407837, "learning_rate": 6.871006997261941e-07, "loss": 0.8671, "step": 14420 }, { "epoch": 0.3474145613063269, "grad_norm": 1.6227402687072754, "learning_rate": 6.869739377345097e-07, "loss": 0.9453, "step": 14425 }, { "epoch": 0.3475349822981142, "grad_norm": 1.3772656917572021, "learning_rate": 6.868471757428253e-07, "loss": 0.9403, "step": 14430 }, { "epoch": 0.3476554032899015, "grad_norm": 1.508234977722168, "learning_rate": 6.867204137511408e-07, "loss": 0.8758, "step": 14435 }, { "epoch": 0.3477758242816888, "grad_norm": 1.557151198387146, "learning_rate": 6.865936517594564e-07, "loss": 0.944, "step": 14440 }, { "epoch": 0.34789624527347607, "grad_norm": 1.6642489433288574, "learning_rate": 6.86466889767772e-07, "loss": 0.9506, "step": 14445 }, { "epoch": 0.34801666626526334, "grad_norm": 1.665263056755066, "learning_rate": 6.863401277760877e-07, "loss": 0.9669, "step": 14450 }, { "epoch": 0.34813708725705067, "grad_norm": 1.4337249994277954, "learning_rate": 6.862133657844031e-07, "loss": 0.9121, "step": 14455 }, { "epoch": 0.34825750824883794, "grad_norm": 1.5387197732925415, "learning_rate": 6.860866037927187e-07, "loss": 0.918, "step": 14460 }, { "epoch": 0.3483779292406252, "grad_norm": 1.3670315742492676, "learning_rate": 6.859598418010344e-07, "loss": 0.9419, "step": 14465 }, { "epoch": 0.3484983502324125, "grad_norm": 1.4968301057815552, "learning_rate": 6.858330798093499e-07, "loss": 0.9089, "step": 14470 }, { "epoch": 0.3486187712241998, "grad_norm": 1.4888393878936768, "learning_rate": 6.857063178176656e-07, "loss": 0.9157, "step": 14475 }, { "epoch": 0.3487391922159871, "grad_norm": 1.4342647790908813, "learning_rate": 6.855795558259811e-07, "loss": 0.9848, "step": 14480 }, { "epoch": 0.34885961320777437, "grad_norm": 1.4552973508834839, "learning_rate": 6.854527938342966e-07, "loss": 0.9555, "step": 14485 }, { "epoch": 0.3489800341995617, "grad_norm": 1.503014087677002, "learning_rate": 6.853260318426123e-07, "loss": 0.9381, "step": 14490 }, { "epoch": 0.34910045519134897, "grad_norm": 1.4178707599639893, "learning_rate": 6.851992698509279e-07, "loss": 0.9311, "step": 14495 }, { "epoch": 0.34922087618313624, "grad_norm": 1.550925612449646, "learning_rate": 6.850725078592434e-07, "loss": 0.9277, "step": 14500 }, { "epoch": 0.3493412971749235, "grad_norm": 1.6314831972122192, "learning_rate": 6.84945745867559e-07, "loss": 0.9252, "step": 14505 }, { "epoch": 0.34946171816671084, "grad_norm": 1.5699819326400757, "learning_rate": 6.848189838758747e-07, "loss": 0.9059, "step": 14510 }, { "epoch": 0.3495821391584981, "grad_norm": 1.4622288942337036, "learning_rate": 6.846922218841902e-07, "loss": 0.9703, "step": 14515 }, { "epoch": 0.3497025601502854, "grad_norm": 1.5557868480682373, "learning_rate": 6.845654598925058e-07, "loss": 0.8926, "step": 14520 }, { "epoch": 0.34982298114207266, "grad_norm": 1.5330698490142822, "learning_rate": 6.844386979008214e-07, "loss": 0.904, "step": 14525 }, { "epoch": 0.34994340213386, "grad_norm": 1.5403659343719482, "learning_rate": 6.843119359091369e-07, "loss": 0.9054, "step": 14530 }, { "epoch": 0.35006382312564727, "grad_norm": 1.4800559282302856, "learning_rate": 6.841851739174526e-07, "loss": 0.9392, "step": 14535 }, { "epoch": 0.35018424411743454, "grad_norm": 1.378165602684021, "learning_rate": 6.840584119257682e-07, "loss": 0.9487, "step": 14540 }, { "epoch": 0.3503046651092218, "grad_norm": 1.4545567035675049, "learning_rate": 6.839316499340838e-07, "loss": 0.9389, "step": 14545 }, { "epoch": 0.35042508610100914, "grad_norm": 1.4694862365722656, "learning_rate": 6.838048879423993e-07, "loss": 0.9267, "step": 14550 }, { "epoch": 0.3505455070927964, "grad_norm": 1.4605337381362915, "learning_rate": 6.836781259507149e-07, "loss": 0.8976, "step": 14555 }, { "epoch": 0.3506659280845837, "grad_norm": 1.4146809577941895, "learning_rate": 6.835513639590305e-07, "loss": 0.938, "step": 14560 }, { "epoch": 0.350786349076371, "grad_norm": 1.5950515270233154, "learning_rate": 6.834246019673461e-07, "loss": 0.9015, "step": 14565 }, { "epoch": 0.3509067700681583, "grad_norm": 1.5792133808135986, "learning_rate": 6.832978399756617e-07, "loss": 0.8922, "step": 14570 }, { "epoch": 0.35102719105994556, "grad_norm": 1.5362197160720825, "learning_rate": 6.831710779839772e-07, "loss": 0.9435, "step": 14575 }, { "epoch": 0.35114761205173284, "grad_norm": 1.4740729331970215, "learning_rate": 6.830443159922928e-07, "loss": 0.926, "step": 14580 }, { "epoch": 0.35126803304352017, "grad_norm": 1.5383095741271973, "learning_rate": 6.829175540006085e-07, "loss": 0.8675, "step": 14585 }, { "epoch": 0.35138845403530744, "grad_norm": 1.6412146091461182, "learning_rate": 6.82790792008924e-07, "loss": 0.9271, "step": 14590 }, { "epoch": 0.3515088750270947, "grad_norm": 1.4079954624176025, "learning_rate": 6.826640300172396e-07, "loss": 0.9501, "step": 14595 }, { "epoch": 0.351629296018882, "grad_norm": 1.6104514598846436, "learning_rate": 6.825372680255552e-07, "loss": 0.9345, "step": 14600 }, { "epoch": 0.3517497170106693, "grad_norm": 1.4540212154388428, "learning_rate": 6.824105060338707e-07, "loss": 0.9067, "step": 14605 }, { "epoch": 0.3518701380024566, "grad_norm": 1.5886861085891724, "learning_rate": 6.822837440421864e-07, "loss": 0.9966, "step": 14610 }, { "epoch": 0.35199055899424386, "grad_norm": 1.532599687576294, "learning_rate": 6.82156982050502e-07, "loss": 0.9118, "step": 14615 }, { "epoch": 0.3521109799860312, "grad_norm": 1.4757113456726074, "learning_rate": 6.820302200588175e-07, "loss": 0.8775, "step": 14620 }, { "epoch": 0.35223140097781847, "grad_norm": 1.3537869453430176, "learning_rate": 6.819034580671331e-07, "loss": 0.938, "step": 14625 }, { "epoch": 0.35235182196960574, "grad_norm": 1.5233502388000488, "learning_rate": 6.817766960754488e-07, "loss": 0.9157, "step": 14630 }, { "epoch": 0.352472242961393, "grad_norm": 1.6560343503952026, "learning_rate": 6.816499340837643e-07, "loss": 0.9447, "step": 14635 }, { "epoch": 0.35259266395318034, "grad_norm": 1.452549695968628, "learning_rate": 6.815231720920798e-07, "loss": 0.8794, "step": 14640 }, { "epoch": 0.3527130849449676, "grad_norm": 1.5324472188949585, "learning_rate": 6.813964101003955e-07, "loss": 0.9094, "step": 14645 }, { "epoch": 0.3528335059367549, "grad_norm": 1.5266990661621094, "learning_rate": 6.81269648108711e-07, "loss": 0.9036, "step": 14650 }, { "epoch": 0.35295392692854216, "grad_norm": 1.6903965473175049, "learning_rate": 6.811428861170267e-07, "loss": 0.9105, "step": 14655 }, { "epoch": 0.3530743479203295, "grad_norm": 1.6886303424835205, "learning_rate": 6.810161241253423e-07, "loss": 0.9129, "step": 14660 }, { "epoch": 0.35319476891211676, "grad_norm": 1.5531803369522095, "learning_rate": 6.808893621336577e-07, "loss": 0.9767, "step": 14665 }, { "epoch": 0.35331518990390404, "grad_norm": 1.5922292470932007, "learning_rate": 6.807626001419734e-07, "loss": 0.9539, "step": 14670 }, { "epoch": 0.3534356108956913, "grad_norm": 1.556394100189209, "learning_rate": 6.80635838150289e-07, "loss": 0.8524, "step": 14675 }, { "epoch": 0.35355603188747864, "grad_norm": 1.4371875524520874, "learning_rate": 6.805090761586046e-07, "loss": 0.895, "step": 14680 }, { "epoch": 0.3536764528792659, "grad_norm": 1.4753456115722656, "learning_rate": 6.803823141669201e-07, "loss": 0.9195, "step": 14685 }, { "epoch": 0.3537968738710532, "grad_norm": 1.5675758123397827, "learning_rate": 6.802555521752358e-07, "loss": 0.8769, "step": 14690 }, { "epoch": 0.3539172948628405, "grad_norm": 1.6256442070007324, "learning_rate": 6.801287901835513e-07, "loss": 0.8808, "step": 14695 }, { "epoch": 0.3540377158546278, "grad_norm": 1.9164952039718628, "learning_rate": 6.800020281918669e-07, "loss": 0.9165, "step": 14700 }, { "epoch": 0.35415813684641506, "grad_norm": 1.5501030683517456, "learning_rate": 6.798752662001826e-07, "loss": 0.8797, "step": 14705 }, { "epoch": 0.35427855783820233, "grad_norm": 1.5340889692306519, "learning_rate": 6.79748504208498e-07, "loss": 0.9298, "step": 14710 }, { "epoch": 0.35439897882998966, "grad_norm": 1.3141673803329468, "learning_rate": 6.796217422168137e-07, "loss": 0.9389, "step": 14715 }, { "epoch": 0.35451939982177694, "grad_norm": 1.4161008596420288, "learning_rate": 6.794949802251293e-07, "loss": 0.9145, "step": 14720 }, { "epoch": 0.3546398208135642, "grad_norm": 1.6499849557876587, "learning_rate": 6.793682182334448e-07, "loss": 0.9219, "step": 14725 }, { "epoch": 0.3547602418053515, "grad_norm": 1.5385875701904297, "learning_rate": 6.792414562417605e-07, "loss": 0.9265, "step": 14730 }, { "epoch": 0.3548806627971388, "grad_norm": 1.5018668174743652, "learning_rate": 6.79114694250076e-07, "loss": 1.0083, "step": 14735 }, { "epoch": 0.3550010837889261, "grad_norm": 1.418845534324646, "learning_rate": 6.789879322583916e-07, "loss": 0.9134, "step": 14740 }, { "epoch": 0.35512150478071336, "grad_norm": 1.488181471824646, "learning_rate": 6.788611702667072e-07, "loss": 0.9387, "step": 14745 }, { "epoch": 0.3552419257725007, "grad_norm": 1.5587797164916992, "learning_rate": 6.787344082750229e-07, "loss": 0.9402, "step": 14750 }, { "epoch": 0.35536234676428796, "grad_norm": 1.431835651397705, "learning_rate": 6.786076462833383e-07, "loss": 0.9412, "step": 14755 }, { "epoch": 0.35548276775607524, "grad_norm": 1.7869625091552734, "learning_rate": 6.784808842916539e-07, "loss": 0.9299, "step": 14760 }, { "epoch": 0.3556031887478625, "grad_norm": 1.3361440896987915, "learning_rate": 6.783541222999696e-07, "loss": 0.9497, "step": 14765 }, { "epoch": 0.35572360973964984, "grad_norm": 1.3490222692489624, "learning_rate": 6.782273603082851e-07, "loss": 0.8876, "step": 14770 }, { "epoch": 0.3558440307314371, "grad_norm": 1.5653305053710938, "learning_rate": 6.781005983166008e-07, "loss": 0.9391, "step": 14775 }, { "epoch": 0.3559644517232244, "grad_norm": 1.583202600479126, "learning_rate": 6.779738363249163e-07, "loss": 0.9037, "step": 14780 }, { "epoch": 0.35608487271501166, "grad_norm": 1.6768081188201904, "learning_rate": 6.778470743332318e-07, "loss": 0.943, "step": 14785 }, { "epoch": 0.356205293706799, "grad_norm": 1.8261566162109375, "learning_rate": 6.777203123415475e-07, "loss": 0.9512, "step": 14790 }, { "epoch": 0.35632571469858626, "grad_norm": 1.6274113655090332, "learning_rate": 6.775935503498631e-07, "loss": 0.9726, "step": 14795 }, { "epoch": 0.35644613569037353, "grad_norm": 1.8121517896652222, "learning_rate": 6.774667883581786e-07, "loss": 0.8796, "step": 14800 }, { "epoch": 0.3565665566821608, "grad_norm": 1.510184407234192, "learning_rate": 6.773400263664942e-07, "loss": 0.874, "step": 14805 }, { "epoch": 0.35668697767394814, "grad_norm": 1.5640634298324585, "learning_rate": 6.772132643748099e-07, "loss": 0.9704, "step": 14810 }, { "epoch": 0.3568073986657354, "grad_norm": 1.549045443534851, "learning_rate": 6.770865023831254e-07, "loss": 0.9432, "step": 14815 }, { "epoch": 0.3569278196575227, "grad_norm": 1.5475493669509888, "learning_rate": 6.76959740391441e-07, "loss": 0.8924, "step": 14820 }, { "epoch": 0.35704824064931, "grad_norm": 1.4325554370880127, "learning_rate": 6.768329783997566e-07, "loss": 0.8884, "step": 14825 }, { "epoch": 0.3571686616410973, "grad_norm": 1.6210287809371948, "learning_rate": 6.767062164080721e-07, "loss": 0.9236, "step": 14830 }, { "epoch": 0.35728908263288456, "grad_norm": 1.5211275815963745, "learning_rate": 6.765794544163878e-07, "loss": 0.9077, "step": 14835 }, { "epoch": 0.35740950362467183, "grad_norm": 1.7513643503189087, "learning_rate": 6.764526924247034e-07, "loss": 0.8607, "step": 14840 }, { "epoch": 0.35752992461645916, "grad_norm": 1.4754881858825684, "learning_rate": 6.763259304330188e-07, "loss": 0.9212, "step": 14845 }, { "epoch": 0.35765034560824643, "grad_norm": 1.642807126045227, "learning_rate": 6.761991684413345e-07, "loss": 0.8888, "step": 14850 }, { "epoch": 0.3577707666000337, "grad_norm": 1.4399112462997437, "learning_rate": 6.760724064496501e-07, "loss": 0.891, "step": 14855 }, { "epoch": 0.357891187591821, "grad_norm": 1.6988621950149536, "learning_rate": 6.759456444579657e-07, "loss": 0.9361, "step": 14860 }, { "epoch": 0.3580116085836083, "grad_norm": 1.4203370809555054, "learning_rate": 6.758188824662813e-07, "loss": 0.9133, "step": 14865 }, { "epoch": 0.3581320295753956, "grad_norm": 1.418886661529541, "learning_rate": 6.756921204745968e-07, "loss": 0.9059, "step": 14870 }, { "epoch": 0.35825245056718286, "grad_norm": 1.404044508934021, "learning_rate": 6.755653584829124e-07, "loss": 0.9271, "step": 14875 }, { "epoch": 0.3583728715589702, "grad_norm": 1.5734264850616455, "learning_rate": 6.75438596491228e-07, "loss": 0.8857, "step": 14880 }, { "epoch": 0.35849329255075746, "grad_norm": 1.4540066719055176, "learning_rate": 6.753118344995437e-07, "loss": 0.8624, "step": 14885 }, { "epoch": 0.35861371354254473, "grad_norm": 1.578904390335083, "learning_rate": 6.751850725078592e-07, "loss": 0.9649, "step": 14890 }, { "epoch": 0.358734134534332, "grad_norm": 1.5617300271987915, "learning_rate": 6.750583105161748e-07, "loss": 0.9249, "step": 14895 }, { "epoch": 0.35885455552611933, "grad_norm": 1.649563193321228, "learning_rate": 6.749315485244904e-07, "loss": 0.9314, "step": 14900 }, { "epoch": 0.3589749765179066, "grad_norm": 1.5435034036636353, "learning_rate": 6.748047865328059e-07, "loss": 0.9417, "step": 14905 }, { "epoch": 0.3590953975096939, "grad_norm": 1.6014434099197388, "learning_rate": 6.746780245411216e-07, "loss": 0.9236, "step": 14910 }, { "epoch": 0.35921581850148115, "grad_norm": 1.627638339996338, "learning_rate": 6.745512625494371e-07, "loss": 0.8733, "step": 14915 }, { "epoch": 0.3593362394932685, "grad_norm": 1.7317113876342773, "learning_rate": 6.744245005577527e-07, "loss": 0.986, "step": 14920 }, { "epoch": 0.35945666048505576, "grad_norm": 1.584110975265503, "learning_rate": 6.742977385660683e-07, "loss": 0.8878, "step": 14925 }, { "epoch": 0.35957708147684303, "grad_norm": 1.5106953382492065, "learning_rate": 6.74170976574384e-07, "loss": 0.9117, "step": 14930 }, { "epoch": 0.35969750246863036, "grad_norm": 1.4878908395767212, "learning_rate": 6.740442145826995e-07, "loss": 0.8723, "step": 14935 }, { "epoch": 0.35981792346041763, "grad_norm": 1.6115652322769165, "learning_rate": 6.73917452591015e-07, "loss": 0.9179, "step": 14940 }, { "epoch": 0.3599383444522049, "grad_norm": 1.5578006505966187, "learning_rate": 6.737906905993307e-07, "loss": 0.9446, "step": 14945 }, { "epoch": 0.3600587654439922, "grad_norm": 1.6315845251083374, "learning_rate": 6.736639286076462e-07, "loss": 0.9004, "step": 14950 }, { "epoch": 0.3601791864357795, "grad_norm": 1.4576815366744995, "learning_rate": 6.735371666159619e-07, "loss": 0.9585, "step": 14955 }, { "epoch": 0.3602996074275668, "grad_norm": 1.4762134552001953, "learning_rate": 6.734104046242775e-07, "loss": 0.9007, "step": 14960 }, { "epoch": 0.36042002841935405, "grad_norm": 1.4411206245422363, "learning_rate": 6.732836426325929e-07, "loss": 0.8957, "step": 14965 }, { "epoch": 0.36054044941114133, "grad_norm": 1.505435824394226, "learning_rate": 6.731568806409086e-07, "loss": 0.9294, "step": 14970 }, { "epoch": 0.36066087040292866, "grad_norm": 1.5263447761535645, "learning_rate": 6.730301186492242e-07, "loss": 0.9304, "step": 14975 }, { "epoch": 0.36078129139471593, "grad_norm": 1.568605661392212, "learning_rate": 6.729033566575398e-07, "loss": 0.9157, "step": 14980 }, { "epoch": 0.3609017123865032, "grad_norm": 1.693253517150879, "learning_rate": 6.727765946658553e-07, "loss": 0.9702, "step": 14985 }, { "epoch": 0.3610221333782905, "grad_norm": 1.3735846281051636, "learning_rate": 6.726498326741709e-07, "loss": 0.8727, "step": 14990 }, { "epoch": 0.3611425543700778, "grad_norm": 1.50700843334198, "learning_rate": 6.725230706824865e-07, "loss": 0.9141, "step": 14995 }, { "epoch": 0.3612629753618651, "grad_norm": 1.4231067895889282, "learning_rate": 6.723963086908021e-07, "loss": 0.9359, "step": 15000 }, { "epoch": 0.36138339635365235, "grad_norm": 1.4903537034988403, "learning_rate": 6.722695466991178e-07, "loss": 0.954, "step": 15005 }, { "epoch": 0.3615038173454397, "grad_norm": 1.5555487871170044, "learning_rate": 6.721427847074332e-07, "loss": 0.8628, "step": 15010 }, { "epoch": 0.36162423833722696, "grad_norm": 1.661105751991272, "learning_rate": 6.720160227157488e-07, "loss": 0.9473, "step": 15015 }, { "epoch": 0.36174465932901423, "grad_norm": 1.5630122423171997, "learning_rate": 6.718892607240645e-07, "loss": 0.9535, "step": 15020 }, { "epoch": 0.3618650803208015, "grad_norm": 1.8373959064483643, "learning_rate": 6.7176249873238e-07, "loss": 0.9357, "step": 15025 }, { "epoch": 0.36198550131258883, "grad_norm": 1.462262749671936, "learning_rate": 6.716357367406956e-07, "loss": 0.8968, "step": 15030 }, { "epoch": 0.3621059223043761, "grad_norm": 1.5995755195617676, "learning_rate": 6.715089747490112e-07, "loss": 0.9252, "step": 15035 }, { "epoch": 0.3622263432961634, "grad_norm": 1.420731782913208, "learning_rate": 6.713822127573268e-07, "loss": 0.8925, "step": 15040 }, { "epoch": 0.36234676428795065, "grad_norm": 1.6841161251068115, "learning_rate": 6.712554507656424e-07, "loss": 0.8825, "step": 15045 }, { "epoch": 0.362467185279738, "grad_norm": 1.5006433725357056, "learning_rate": 6.71128688773958e-07, "loss": 0.934, "step": 15050 }, { "epoch": 0.36258760627152525, "grad_norm": 1.5156694650650024, "learning_rate": 6.710019267822735e-07, "loss": 0.8896, "step": 15055 }, { "epoch": 0.3627080272633125, "grad_norm": 1.449398159980774, "learning_rate": 6.708751647905891e-07, "loss": 0.8704, "step": 15060 }, { "epoch": 0.36282844825509986, "grad_norm": 1.5191737413406372, "learning_rate": 6.707484027989048e-07, "loss": 0.959, "step": 15065 }, { "epoch": 0.36294886924688713, "grad_norm": 1.6532554626464844, "learning_rate": 6.706216408072203e-07, "loss": 0.9403, "step": 15070 }, { "epoch": 0.3630692902386744, "grad_norm": 1.6268415451049805, "learning_rate": 6.70494878815536e-07, "loss": 0.931, "step": 15075 }, { "epoch": 0.3631897112304617, "grad_norm": 1.616729974746704, "learning_rate": 6.703681168238515e-07, "loss": 0.9523, "step": 15080 }, { "epoch": 0.363310132222249, "grad_norm": 1.5711734294891357, "learning_rate": 6.70241354832167e-07, "loss": 0.8738, "step": 15085 }, { "epoch": 0.3634305532140363, "grad_norm": 1.3926578760147095, "learning_rate": 6.701145928404827e-07, "loss": 0.8539, "step": 15090 }, { "epoch": 0.36355097420582355, "grad_norm": 1.7388372421264648, "learning_rate": 6.699878308487983e-07, "loss": 0.9459, "step": 15095 }, { "epoch": 0.3636713951976108, "grad_norm": 1.4930264949798584, "learning_rate": 6.698610688571138e-07, "loss": 0.9285, "step": 15100 }, { "epoch": 0.36379181618939815, "grad_norm": 1.6159045696258545, "learning_rate": 6.697343068654294e-07, "loss": 0.8258, "step": 15105 }, { "epoch": 0.3639122371811854, "grad_norm": 1.646911382675171, "learning_rate": 6.69607544873745e-07, "loss": 0.9706, "step": 15110 }, { "epoch": 0.3640326581729727, "grad_norm": 1.7125273942947388, "learning_rate": 6.694807828820606e-07, "loss": 0.8925, "step": 15115 }, { "epoch": 0.36415307916476, "grad_norm": 1.4787230491638184, "learning_rate": 6.693540208903762e-07, "loss": 0.8243, "step": 15120 }, { "epoch": 0.3642735001565473, "grad_norm": 1.6219500303268433, "learning_rate": 6.692272588986918e-07, "loss": 0.9227, "step": 15125 }, { "epoch": 0.3643939211483346, "grad_norm": 1.4867146015167236, "learning_rate": 6.691004969070074e-07, "loss": 0.8787, "step": 15130 }, { "epoch": 0.36451434214012185, "grad_norm": 1.5020312070846558, "learning_rate": 6.689737349153229e-07, "loss": 0.9214, "step": 15135 }, { "epoch": 0.3646347631319092, "grad_norm": 1.6953963041305542, "learning_rate": 6.688469729236386e-07, "loss": 0.9409, "step": 15140 }, { "epoch": 0.36475518412369645, "grad_norm": 1.7892584800720215, "learning_rate": 6.687202109319542e-07, "loss": 0.894, "step": 15145 }, { "epoch": 0.3648756051154837, "grad_norm": 1.7647579908370972, "learning_rate": 6.685934489402697e-07, "loss": 0.9145, "step": 15150 }, { "epoch": 0.364996026107271, "grad_norm": 1.6124937534332275, "learning_rate": 6.684666869485853e-07, "loss": 0.961, "step": 15155 }, { "epoch": 0.3651164470990583, "grad_norm": 1.5672036409378052, "learning_rate": 6.68339924956901e-07, "loss": 0.9143, "step": 15160 }, { "epoch": 0.3652368680908456, "grad_norm": 1.5922040939331055, "learning_rate": 6.682131629652165e-07, "loss": 0.9337, "step": 15165 }, { "epoch": 0.3653572890826329, "grad_norm": 1.4634770154953003, "learning_rate": 6.68086400973532e-07, "loss": 0.9371, "step": 15170 }, { "epoch": 0.36547771007442015, "grad_norm": 1.745644211769104, "learning_rate": 6.679596389818477e-07, "loss": 0.894, "step": 15175 }, { "epoch": 0.3655981310662075, "grad_norm": 1.4401402473449707, "learning_rate": 6.678328769901632e-07, "loss": 0.9414, "step": 15180 }, { "epoch": 0.36571855205799475, "grad_norm": 1.5754197835922241, "learning_rate": 6.677061149984789e-07, "loss": 0.9305, "step": 15185 }, { "epoch": 0.365838973049782, "grad_norm": 1.81437349319458, "learning_rate": 6.675793530067945e-07, "loss": 0.9554, "step": 15190 }, { "epoch": 0.36595939404156935, "grad_norm": 1.5831947326660156, "learning_rate": 6.674525910151099e-07, "loss": 0.9147, "step": 15195 }, { "epoch": 0.3660798150333566, "grad_norm": 1.520748257637024, "learning_rate": 6.673258290234256e-07, "loss": 0.8806, "step": 15200 }, { "epoch": 0.3662002360251439, "grad_norm": 1.6147689819335938, "learning_rate": 6.671990670317412e-07, "loss": 0.9263, "step": 15205 }, { "epoch": 0.3663206570169312, "grad_norm": 1.4286832809448242, "learning_rate": 6.670723050400568e-07, "loss": 0.9172, "step": 15210 }, { "epoch": 0.3664410780087185, "grad_norm": 1.776050090789795, "learning_rate": 6.669455430483723e-07, "loss": 0.8837, "step": 15215 }, { "epoch": 0.3665614990005058, "grad_norm": 1.509376883506775, "learning_rate": 6.66818781056688e-07, "loss": 0.8783, "step": 15220 }, { "epoch": 0.36668191999229305, "grad_norm": 1.715714454650879, "learning_rate": 6.666920190650035e-07, "loss": 0.8801, "step": 15225 }, { "epoch": 0.3668023409840803, "grad_norm": 1.6566847562789917, "learning_rate": 6.665652570733191e-07, "loss": 0.8783, "step": 15230 }, { "epoch": 0.36692276197586765, "grad_norm": 1.720525860786438, "learning_rate": 6.664384950816348e-07, "loss": 0.9263, "step": 15235 }, { "epoch": 0.3670431829676549, "grad_norm": 1.5244266986846924, "learning_rate": 6.663117330899502e-07, "loss": 0.8521, "step": 15240 }, { "epoch": 0.3671636039594422, "grad_norm": 1.6485753059387207, "learning_rate": 6.661849710982659e-07, "loss": 0.8937, "step": 15245 }, { "epoch": 0.36728402495122947, "grad_norm": 1.4862918853759766, "learning_rate": 6.660582091065815e-07, "loss": 0.9009, "step": 15250 }, { "epoch": 0.3674044459430168, "grad_norm": 1.5611529350280762, "learning_rate": 6.65931447114897e-07, "loss": 0.8937, "step": 15255 }, { "epoch": 0.3675248669348041, "grad_norm": 1.604556918144226, "learning_rate": 6.658046851232127e-07, "loss": 0.9168, "step": 15260 }, { "epoch": 0.36764528792659135, "grad_norm": 1.7058712244033813, "learning_rate": 6.656779231315282e-07, "loss": 0.9182, "step": 15265 }, { "epoch": 0.3677657089183787, "grad_norm": 1.5210764408111572, "learning_rate": 6.655511611398438e-07, "loss": 0.9549, "step": 15270 }, { "epoch": 0.36788612991016595, "grad_norm": 1.4679126739501953, "learning_rate": 6.654243991481594e-07, "loss": 0.9445, "step": 15275 }, { "epoch": 0.3680065509019532, "grad_norm": 1.5327095985412598, "learning_rate": 6.652976371564751e-07, "loss": 0.9276, "step": 15280 }, { "epoch": 0.3681269718937405, "grad_norm": 1.4916969537734985, "learning_rate": 6.651708751647905e-07, "loss": 0.9069, "step": 15285 }, { "epoch": 0.3682473928855278, "grad_norm": 1.508493185043335, "learning_rate": 6.650441131731061e-07, "loss": 0.9226, "step": 15290 }, { "epoch": 0.3683678138773151, "grad_norm": 1.6520406007766724, "learning_rate": 6.649173511814218e-07, "loss": 0.8933, "step": 15295 }, { "epoch": 0.36848823486910237, "grad_norm": 1.682572603225708, "learning_rate": 6.647905891897373e-07, "loss": 0.895, "step": 15300 }, { "epoch": 0.36860865586088964, "grad_norm": 1.3576946258544922, "learning_rate": 6.64663827198053e-07, "loss": 0.8832, "step": 15305 }, { "epoch": 0.368729076852677, "grad_norm": 1.4632619619369507, "learning_rate": 6.645370652063685e-07, "loss": 0.9272, "step": 15310 }, { "epoch": 0.36884949784446425, "grad_norm": 1.3913495540618896, "learning_rate": 6.64410303214684e-07, "loss": 0.8963, "step": 15315 }, { "epoch": 0.3689699188362515, "grad_norm": 1.4010612964630127, "learning_rate": 6.642835412229997e-07, "loss": 0.8897, "step": 15320 }, { "epoch": 0.36909033982803885, "grad_norm": 1.4855096340179443, "learning_rate": 6.641567792313153e-07, "loss": 0.9235, "step": 15325 }, { "epoch": 0.3692107608198261, "grad_norm": 1.5315731763839722, "learning_rate": 6.640300172396308e-07, "loss": 0.8996, "step": 15330 }, { "epoch": 0.3693311818116134, "grad_norm": 1.444700002670288, "learning_rate": 6.639032552479464e-07, "loss": 0.8574, "step": 15335 }, { "epoch": 0.36945160280340067, "grad_norm": 1.5219303369522095, "learning_rate": 6.63776493256262e-07, "loss": 0.8845, "step": 15340 }, { "epoch": 0.369572023795188, "grad_norm": 1.5101958513259888, "learning_rate": 6.636497312645776e-07, "loss": 0.8634, "step": 15345 }, { "epoch": 0.36969244478697527, "grad_norm": 1.630250334739685, "learning_rate": 6.635229692728932e-07, "loss": 0.908, "step": 15350 }, { "epoch": 0.36981286577876255, "grad_norm": 1.55610990524292, "learning_rate": 6.633962072812088e-07, "loss": 0.9107, "step": 15355 }, { "epoch": 0.3699332867705498, "grad_norm": 1.470426082611084, "learning_rate": 6.632694452895243e-07, "loss": 0.9563, "step": 15360 }, { "epoch": 0.37005370776233715, "grad_norm": 1.4882392883300781, "learning_rate": 6.6314268329784e-07, "loss": 0.9542, "step": 15365 }, { "epoch": 0.3701741287541244, "grad_norm": 1.565896987915039, "learning_rate": 6.630159213061556e-07, "loss": 0.973, "step": 15370 }, { "epoch": 0.3702945497459117, "grad_norm": 1.5221936702728271, "learning_rate": 6.628891593144711e-07, "loss": 0.9164, "step": 15375 }, { "epoch": 0.370414970737699, "grad_norm": 1.369215726852417, "learning_rate": 6.627623973227867e-07, "loss": 0.8889, "step": 15380 }, { "epoch": 0.3705353917294863, "grad_norm": 1.3607157468795776, "learning_rate": 6.626356353311023e-07, "loss": 0.9203, "step": 15385 }, { "epoch": 0.37065581272127357, "grad_norm": 1.4368348121643066, "learning_rate": 6.625088733394179e-07, "loss": 0.9226, "step": 15390 }, { "epoch": 0.37077623371306084, "grad_norm": 1.6463371515274048, "learning_rate": 6.623821113477335e-07, "loss": 0.8857, "step": 15395 }, { "epoch": 0.37089665470484817, "grad_norm": 1.6441726684570312, "learning_rate": 6.62255349356049e-07, "loss": 0.8931, "step": 15400 }, { "epoch": 0.37101707569663545, "grad_norm": 1.3902664184570312, "learning_rate": 6.621285873643646e-07, "loss": 0.9283, "step": 15405 }, { "epoch": 0.3711374966884227, "grad_norm": 1.4455829858779907, "learning_rate": 6.620018253726802e-07, "loss": 0.933, "step": 15410 }, { "epoch": 0.37125791768021, "grad_norm": 1.4055064916610718, "learning_rate": 6.618750633809959e-07, "loss": 0.8331, "step": 15415 }, { "epoch": 0.3713783386719973, "grad_norm": 1.5223290920257568, "learning_rate": 6.617483013893114e-07, "loss": 0.8939, "step": 15420 }, { "epoch": 0.3714987596637846, "grad_norm": 1.7608566284179688, "learning_rate": 6.61621539397627e-07, "loss": 0.9388, "step": 15425 }, { "epoch": 0.37161918065557187, "grad_norm": 1.5901376008987427, "learning_rate": 6.614947774059426e-07, "loss": 0.8834, "step": 15430 }, { "epoch": 0.37173960164735914, "grad_norm": 1.581833839416504, "learning_rate": 6.613680154142581e-07, "loss": 0.9965, "step": 15435 }, { "epoch": 0.37186002263914647, "grad_norm": 1.6572521924972534, "learning_rate": 6.612412534225738e-07, "loss": 0.8962, "step": 15440 }, { "epoch": 0.37198044363093374, "grad_norm": 1.6382677555084229, "learning_rate": 6.611144914308894e-07, "loss": 0.9106, "step": 15445 }, { "epoch": 0.372100864622721, "grad_norm": 1.4641860723495483, "learning_rate": 6.609877294392049e-07, "loss": 0.9181, "step": 15450 }, { "epoch": 0.37222128561450835, "grad_norm": 1.5975430011749268, "learning_rate": 6.608609674475205e-07, "loss": 0.9305, "step": 15455 }, { "epoch": 0.3723417066062956, "grad_norm": 1.375770092010498, "learning_rate": 6.607342054558361e-07, "loss": 0.9122, "step": 15460 }, { "epoch": 0.3724621275980829, "grad_norm": 1.7418895959854126, "learning_rate": 6.606074434641517e-07, "loss": 0.8588, "step": 15465 }, { "epoch": 0.37258254858987017, "grad_norm": 1.4999524354934692, "learning_rate": 6.604806814724672e-07, "loss": 0.953, "step": 15470 }, { "epoch": 0.3727029695816575, "grad_norm": 1.607024908065796, "learning_rate": 6.603539194807829e-07, "loss": 0.9159, "step": 15475 }, { "epoch": 0.37282339057344477, "grad_norm": 1.7114670276641846, "learning_rate": 6.602271574890984e-07, "loss": 0.9333, "step": 15480 }, { "epoch": 0.37294381156523204, "grad_norm": 1.398600697517395, "learning_rate": 6.601003954974141e-07, "loss": 0.9032, "step": 15485 }, { "epoch": 0.3730642325570193, "grad_norm": 1.5683618783950806, "learning_rate": 6.599736335057297e-07, "loss": 0.8666, "step": 15490 }, { "epoch": 0.37318465354880664, "grad_norm": 1.7809679508209229, "learning_rate": 6.598468715140451e-07, "loss": 0.9364, "step": 15495 }, { "epoch": 0.3733050745405939, "grad_norm": 1.5396602153778076, "learning_rate": 6.597201095223608e-07, "loss": 0.9189, "step": 15500 }, { "epoch": 0.3734254955323812, "grad_norm": 1.375185251235962, "learning_rate": 6.595933475306764e-07, "loss": 0.9043, "step": 15505 }, { "epoch": 0.3735459165241685, "grad_norm": 1.5217154026031494, "learning_rate": 6.59466585538992e-07, "loss": 0.9204, "step": 15510 }, { "epoch": 0.3736663375159558, "grad_norm": 1.5808666944503784, "learning_rate": 6.593398235473075e-07, "loss": 0.9414, "step": 15515 }, { "epoch": 0.37378675850774307, "grad_norm": 1.6650584936141968, "learning_rate": 6.592130615556231e-07, "loss": 0.8927, "step": 15520 }, { "epoch": 0.37390717949953034, "grad_norm": 1.3403691053390503, "learning_rate": 6.590862995639387e-07, "loss": 0.9146, "step": 15525 }, { "epoch": 0.37402760049131767, "grad_norm": 1.6455671787261963, "learning_rate": 6.589595375722543e-07, "loss": 0.887, "step": 15530 }, { "epoch": 0.37414802148310494, "grad_norm": 1.5723843574523926, "learning_rate": 6.5883277558057e-07, "loss": 0.885, "step": 15535 }, { "epoch": 0.3742684424748922, "grad_norm": 1.5224273204803467, "learning_rate": 6.587060135888854e-07, "loss": 0.9342, "step": 15540 }, { "epoch": 0.3743888634666795, "grad_norm": 1.4680192470550537, "learning_rate": 6.58579251597201e-07, "loss": 0.915, "step": 15545 }, { "epoch": 0.3745092844584668, "grad_norm": 1.478745937347412, "learning_rate": 6.584524896055167e-07, "loss": 0.9552, "step": 15550 }, { "epoch": 0.3746297054502541, "grad_norm": 1.7124029397964478, "learning_rate": 6.583257276138322e-07, "loss": 0.9157, "step": 15555 }, { "epoch": 0.37475012644204136, "grad_norm": 1.7640318870544434, "learning_rate": 6.581989656221479e-07, "loss": 0.9328, "step": 15560 }, { "epoch": 0.37487054743382864, "grad_norm": 1.5102007389068604, "learning_rate": 6.580722036304634e-07, "loss": 0.9801, "step": 15565 }, { "epoch": 0.37499096842561597, "grad_norm": 1.603365182876587, "learning_rate": 6.57945441638779e-07, "loss": 0.9126, "step": 15570 }, { "epoch": 0.37511138941740324, "grad_norm": 1.7390234470367432, "learning_rate": 6.578186796470946e-07, "loss": 0.9553, "step": 15575 }, { "epoch": 0.3752318104091905, "grad_norm": 1.5153967142105103, "learning_rate": 6.576919176554102e-07, "loss": 0.9007, "step": 15580 }, { "epoch": 0.37535223140097784, "grad_norm": 1.5532500743865967, "learning_rate": 6.575651556637257e-07, "loss": 0.9155, "step": 15585 }, { "epoch": 0.3754726523927651, "grad_norm": 1.6289955377578735, "learning_rate": 6.574383936720413e-07, "loss": 0.9619, "step": 15590 }, { "epoch": 0.3755930733845524, "grad_norm": 1.5017445087432861, "learning_rate": 6.57311631680357e-07, "loss": 0.9287, "step": 15595 }, { "epoch": 0.37571349437633966, "grad_norm": 1.3886905908584595, "learning_rate": 6.571848696886725e-07, "loss": 0.9289, "step": 15600 }, { "epoch": 0.375833915368127, "grad_norm": 1.5643943548202515, "learning_rate": 6.570581076969882e-07, "loss": 0.9097, "step": 15605 }, { "epoch": 0.37595433635991427, "grad_norm": 1.5563030242919922, "learning_rate": 6.569313457053037e-07, "loss": 0.8883, "step": 15610 }, { "epoch": 0.37607475735170154, "grad_norm": 1.6308798789978027, "learning_rate": 6.568045837136192e-07, "loss": 0.9332, "step": 15615 }, { "epoch": 0.3761951783434888, "grad_norm": 1.3513928651809692, "learning_rate": 6.566778217219349e-07, "loss": 0.9116, "step": 15620 }, { "epoch": 0.37631559933527614, "grad_norm": 1.5536242723464966, "learning_rate": 6.565510597302505e-07, "loss": 0.9038, "step": 15625 }, { "epoch": 0.3764360203270634, "grad_norm": 1.635443925857544, "learning_rate": 6.56424297738566e-07, "loss": 0.9306, "step": 15630 }, { "epoch": 0.3765564413188507, "grad_norm": 1.6803452968597412, "learning_rate": 6.562975357468816e-07, "loss": 0.9142, "step": 15635 }, { "epoch": 0.376676862310638, "grad_norm": 1.5191351175308228, "learning_rate": 6.561707737551972e-07, "loss": 0.9241, "step": 15640 }, { "epoch": 0.3767972833024253, "grad_norm": 1.50296950340271, "learning_rate": 6.560440117635128e-07, "loss": 0.8873, "step": 15645 }, { "epoch": 0.37691770429421256, "grad_norm": 1.504440426826477, "learning_rate": 6.559172497718284e-07, "loss": 0.9058, "step": 15650 }, { "epoch": 0.37703812528599984, "grad_norm": 2.1617422103881836, "learning_rate": 6.55790487780144e-07, "loss": 0.967, "step": 15655 }, { "epoch": 0.37715854627778717, "grad_norm": 1.6108896732330322, "learning_rate": 6.556637257884595e-07, "loss": 0.9132, "step": 15660 }, { "epoch": 0.37727896726957444, "grad_norm": 1.5929207801818848, "learning_rate": 6.555369637967751e-07, "loss": 0.8985, "step": 15665 }, { "epoch": 0.3773993882613617, "grad_norm": 1.4225010871887207, "learning_rate": 6.554102018050908e-07, "loss": 0.902, "step": 15670 }, { "epoch": 0.377519809253149, "grad_norm": 1.5468358993530273, "learning_rate": 6.552834398134063e-07, "loss": 0.8869, "step": 15675 }, { "epoch": 0.3776402302449363, "grad_norm": 1.748754858970642, "learning_rate": 6.551566778217219e-07, "loss": 0.9002, "step": 15680 }, { "epoch": 0.3777606512367236, "grad_norm": 1.3694506883621216, "learning_rate": 6.550299158300375e-07, "loss": 0.9047, "step": 15685 }, { "epoch": 0.37788107222851086, "grad_norm": 1.686044454574585, "learning_rate": 6.549031538383531e-07, "loss": 0.8809, "step": 15690 }, { "epoch": 0.37800149322029813, "grad_norm": 1.3558310270309448, "learning_rate": 6.547763918466687e-07, "loss": 0.8826, "step": 15695 }, { "epoch": 0.37812191421208546, "grad_norm": 1.455237865447998, "learning_rate": 6.546496298549842e-07, "loss": 0.8968, "step": 15700 }, { "epoch": 0.37824233520387274, "grad_norm": 1.6350035667419434, "learning_rate": 6.545228678632998e-07, "loss": 0.9855, "step": 15705 }, { "epoch": 0.37836275619566, "grad_norm": 1.4560292959213257, "learning_rate": 6.543961058716154e-07, "loss": 0.9257, "step": 15710 }, { "epoch": 0.37848317718744734, "grad_norm": 1.6068018674850464, "learning_rate": 6.542693438799311e-07, "loss": 0.8658, "step": 15715 }, { "epoch": 0.3786035981792346, "grad_norm": 1.4421308040618896, "learning_rate": 6.541425818882466e-07, "loss": 0.9409, "step": 15720 }, { "epoch": 0.3787240191710219, "grad_norm": 1.6204299926757812, "learning_rate": 6.540158198965621e-07, "loss": 0.9064, "step": 15725 }, { "epoch": 0.37884444016280916, "grad_norm": 1.4492141008377075, "learning_rate": 6.538890579048778e-07, "loss": 0.8868, "step": 15730 }, { "epoch": 0.3789648611545965, "grad_norm": 1.4784122705459595, "learning_rate": 6.537622959131933e-07, "loss": 0.8887, "step": 15735 }, { "epoch": 0.37908528214638376, "grad_norm": 1.5144078731536865, "learning_rate": 6.53635533921509e-07, "loss": 0.8895, "step": 15740 }, { "epoch": 0.37920570313817104, "grad_norm": 1.4604182243347168, "learning_rate": 6.535087719298246e-07, "loss": 0.908, "step": 15745 }, { "epoch": 0.3793261241299583, "grad_norm": 1.8133037090301514, "learning_rate": 6.5338200993814e-07, "loss": 0.8743, "step": 15750 }, { "epoch": 0.37944654512174564, "grad_norm": 1.9104950428009033, "learning_rate": 6.532552479464557e-07, "loss": 0.9707, "step": 15755 }, { "epoch": 0.3795669661135329, "grad_norm": 1.2851866483688354, "learning_rate": 6.531284859547713e-07, "loss": 0.8677, "step": 15760 }, { "epoch": 0.3796873871053202, "grad_norm": 1.5550357103347778, "learning_rate": 6.530017239630869e-07, "loss": 0.9423, "step": 15765 }, { "epoch": 0.3798078080971075, "grad_norm": 1.375746250152588, "learning_rate": 6.528749619714024e-07, "loss": 0.8621, "step": 15770 }, { "epoch": 0.3799282290888948, "grad_norm": 1.5696123838424683, "learning_rate": 6.527481999797181e-07, "loss": 0.8835, "step": 15775 }, { "epoch": 0.38004865008068206, "grad_norm": 1.5719820261001587, "learning_rate": 6.526214379880336e-07, "loss": 0.9317, "step": 15780 }, { "epoch": 0.38016907107246933, "grad_norm": 1.4429283142089844, "learning_rate": 6.524946759963492e-07, "loss": 0.862, "step": 15785 }, { "epoch": 0.38028949206425666, "grad_norm": 1.5603541135787964, "learning_rate": 6.523679140046649e-07, "loss": 0.9097, "step": 15790 }, { "epoch": 0.38040991305604394, "grad_norm": 1.5130277872085571, "learning_rate": 6.522411520129803e-07, "loss": 0.9088, "step": 15795 }, { "epoch": 0.3805303340478312, "grad_norm": 1.3764976263046265, "learning_rate": 6.52114390021296e-07, "loss": 0.8709, "step": 15800 }, { "epoch": 0.3806507550396185, "grad_norm": 1.3912783861160278, "learning_rate": 6.519876280296116e-07, "loss": 0.9016, "step": 15805 }, { "epoch": 0.3807711760314058, "grad_norm": 1.5324950218200684, "learning_rate": 6.518608660379272e-07, "loss": 0.913, "step": 15810 }, { "epoch": 0.3808915970231931, "grad_norm": 1.5858607292175293, "learning_rate": 6.517341040462427e-07, "loss": 0.8764, "step": 15815 }, { "epoch": 0.38101201801498036, "grad_norm": 1.5081491470336914, "learning_rate": 6.516073420545583e-07, "loss": 0.9308, "step": 15820 }, { "epoch": 0.3811324390067677, "grad_norm": 1.554904580116272, "learning_rate": 6.514805800628739e-07, "loss": 0.9474, "step": 15825 }, { "epoch": 0.38125285999855496, "grad_norm": 1.6217893362045288, "learning_rate": 6.513538180711895e-07, "loss": 0.887, "step": 15830 }, { "epoch": 0.38137328099034223, "grad_norm": 1.6178269386291504, "learning_rate": 6.512270560795052e-07, "loss": 0.958, "step": 15835 }, { "epoch": 0.3814937019821295, "grad_norm": 1.3969887495040894, "learning_rate": 6.511002940878206e-07, "loss": 0.948, "step": 15840 }, { "epoch": 0.38161412297391684, "grad_norm": 1.5228673219680786, "learning_rate": 6.509735320961362e-07, "loss": 0.8732, "step": 15845 }, { "epoch": 0.3817345439657041, "grad_norm": 1.427930235862732, "learning_rate": 6.508467701044519e-07, "loss": 0.9315, "step": 15850 }, { "epoch": 0.3818549649574914, "grad_norm": 1.573591947555542, "learning_rate": 6.507200081127674e-07, "loss": 0.9077, "step": 15855 }, { "epoch": 0.38197538594927866, "grad_norm": 1.5548583269119263, "learning_rate": 6.505932461210831e-07, "loss": 0.9134, "step": 15860 }, { "epoch": 0.382095806941066, "grad_norm": 1.7557604312896729, "learning_rate": 6.504664841293986e-07, "loss": 0.9059, "step": 15865 }, { "epoch": 0.38221622793285326, "grad_norm": 1.349873661994934, "learning_rate": 6.503397221377141e-07, "loss": 0.8972, "step": 15870 }, { "epoch": 0.38233664892464053, "grad_norm": 1.7548439502716064, "learning_rate": 6.502129601460298e-07, "loss": 0.9036, "step": 15875 }, { "epoch": 0.3824570699164278, "grad_norm": 1.759466290473938, "learning_rate": 6.500861981543454e-07, "loss": 0.9172, "step": 15880 }, { "epoch": 0.38257749090821513, "grad_norm": 1.6105694770812988, "learning_rate": 6.499594361626609e-07, "loss": 0.89, "step": 15885 }, { "epoch": 0.3826979119000024, "grad_norm": 1.4921594858169556, "learning_rate": 6.498326741709765e-07, "loss": 0.958, "step": 15890 }, { "epoch": 0.3828183328917897, "grad_norm": 1.6352791786193848, "learning_rate": 6.497059121792922e-07, "loss": 0.8882, "step": 15895 }, { "epoch": 0.382938753883577, "grad_norm": 1.5943946838378906, "learning_rate": 6.495791501876077e-07, "loss": 0.9128, "step": 15900 }, { "epoch": 0.3830591748753643, "grad_norm": 1.497174859046936, "learning_rate": 6.494523881959233e-07, "loss": 0.9548, "step": 15905 }, { "epoch": 0.38317959586715156, "grad_norm": 1.540085792541504, "learning_rate": 6.493256262042389e-07, "loss": 0.9293, "step": 15910 }, { "epoch": 0.38330001685893883, "grad_norm": 1.5195233821868896, "learning_rate": 6.491988642125544e-07, "loss": 0.9311, "step": 15915 }, { "epoch": 0.38342043785072616, "grad_norm": 1.3735129833221436, "learning_rate": 6.490721022208701e-07, "loss": 0.8887, "step": 15920 }, { "epoch": 0.38354085884251343, "grad_norm": 1.440384030342102, "learning_rate": 6.489453402291857e-07, "loss": 0.8948, "step": 15925 }, { "epoch": 0.3836612798343007, "grad_norm": 1.7172589302062988, "learning_rate": 6.488185782375011e-07, "loss": 0.8894, "step": 15930 }, { "epoch": 0.383781700826088, "grad_norm": 1.529138207435608, "learning_rate": 6.486918162458168e-07, "loss": 0.9409, "step": 15935 }, { "epoch": 0.3839021218178753, "grad_norm": 1.5386652946472168, "learning_rate": 6.485650542541324e-07, "loss": 0.9094, "step": 15940 }, { "epoch": 0.3840225428096626, "grad_norm": 1.4735944271087646, "learning_rate": 6.48438292262448e-07, "loss": 0.8712, "step": 15945 }, { "epoch": 0.38414296380144985, "grad_norm": 1.6935973167419434, "learning_rate": 6.483115302707636e-07, "loss": 0.8893, "step": 15950 }, { "epoch": 0.3842633847932372, "grad_norm": 1.518571138381958, "learning_rate": 6.481847682790792e-07, "loss": 0.9172, "step": 15955 }, { "epoch": 0.38438380578502446, "grad_norm": 1.3189793825149536, "learning_rate": 6.480580062873947e-07, "loss": 0.9205, "step": 15960 }, { "epoch": 0.38450422677681173, "grad_norm": 1.464625358581543, "learning_rate": 6.479312442957103e-07, "loss": 0.8986, "step": 15965 }, { "epoch": 0.384624647768599, "grad_norm": 1.9872775077819824, "learning_rate": 6.47804482304026e-07, "loss": 0.892, "step": 15970 }, { "epoch": 0.38474506876038633, "grad_norm": 1.6058225631713867, "learning_rate": 6.476777203123415e-07, "loss": 0.9443, "step": 15975 }, { "epoch": 0.3848654897521736, "grad_norm": 1.6584371328353882, "learning_rate": 6.475509583206571e-07, "loss": 0.8896, "step": 15980 }, { "epoch": 0.3849859107439609, "grad_norm": 1.4320168495178223, "learning_rate": 6.474241963289727e-07, "loss": 0.8685, "step": 15985 }, { "epoch": 0.38510633173574815, "grad_norm": 1.446475863456726, "learning_rate": 6.472974343372882e-07, "loss": 0.9195, "step": 15990 }, { "epoch": 0.3852267527275355, "grad_norm": 1.3913068771362305, "learning_rate": 6.471706723456039e-07, "loss": 0.9159, "step": 15995 }, { "epoch": 0.38534717371932276, "grad_norm": 1.4390864372253418, "learning_rate": 6.470439103539194e-07, "loss": 0.9049, "step": 16000 }, { "epoch": 0.38546759471111003, "grad_norm": 1.8681691884994507, "learning_rate": 6.46917148362235e-07, "loss": 0.9117, "step": 16005 }, { "epoch": 0.3855880157028973, "grad_norm": 1.505843997001648, "learning_rate": 6.467903863705506e-07, "loss": 0.9035, "step": 16010 }, { "epoch": 0.38570843669468463, "grad_norm": 1.7058168649673462, "learning_rate": 6.466636243788663e-07, "loss": 0.8936, "step": 16015 }, { "epoch": 0.3858288576864719, "grad_norm": 1.4852770566940308, "learning_rate": 6.465368623871818e-07, "loss": 0.9428, "step": 16020 }, { "epoch": 0.3859492786782592, "grad_norm": 1.5499645471572876, "learning_rate": 6.464101003954973e-07, "loss": 0.9151, "step": 16025 }, { "epoch": 0.3860696996700465, "grad_norm": 1.4708459377288818, "learning_rate": 6.46283338403813e-07, "loss": 0.9137, "step": 16030 }, { "epoch": 0.3861901206618338, "grad_norm": 1.685199499130249, "learning_rate": 6.461565764121285e-07, "loss": 0.9266, "step": 16035 }, { "epoch": 0.38631054165362105, "grad_norm": 2.1729648113250732, "learning_rate": 6.460298144204442e-07, "loss": 0.9238, "step": 16040 }, { "epoch": 0.3864309626454083, "grad_norm": 1.7583898305892944, "learning_rate": 6.459030524287598e-07, "loss": 0.938, "step": 16045 }, { "epoch": 0.38655138363719566, "grad_norm": 1.462072730064392, "learning_rate": 6.457762904370752e-07, "loss": 0.8998, "step": 16050 }, { "epoch": 0.38667180462898293, "grad_norm": 1.5906111001968384, "learning_rate": 6.456495284453909e-07, "loss": 0.975, "step": 16055 }, { "epoch": 0.3867922256207702, "grad_norm": 1.4166845083236694, "learning_rate": 6.455227664537065e-07, "loss": 0.8913, "step": 16060 }, { "epoch": 0.3869126466125575, "grad_norm": 1.5081733465194702, "learning_rate": 6.453960044620221e-07, "loss": 0.9391, "step": 16065 }, { "epoch": 0.3870330676043448, "grad_norm": 1.5960620641708374, "learning_rate": 6.452692424703376e-07, "loss": 0.9146, "step": 16070 }, { "epoch": 0.3871534885961321, "grad_norm": 1.5286779403686523, "learning_rate": 6.451424804786533e-07, "loss": 0.9371, "step": 16075 }, { "epoch": 0.38727390958791935, "grad_norm": 1.838586688041687, "learning_rate": 6.450157184869688e-07, "loss": 0.9116, "step": 16080 }, { "epoch": 0.3873943305797067, "grad_norm": 1.3697798252105713, "learning_rate": 6.448889564952844e-07, "loss": 0.9188, "step": 16085 }, { "epoch": 0.38751475157149395, "grad_norm": 1.5102379322052002, "learning_rate": 6.447621945036001e-07, "loss": 0.9338, "step": 16090 }, { "epoch": 0.3876351725632812, "grad_norm": 1.5454723834991455, "learning_rate": 6.446354325119155e-07, "loss": 0.8167, "step": 16095 }, { "epoch": 0.3877555935550685, "grad_norm": 1.4021121263504028, "learning_rate": 6.445086705202312e-07, "loss": 0.9009, "step": 16100 }, { "epoch": 0.38787601454685583, "grad_norm": 1.4518719911575317, "learning_rate": 6.443819085285468e-07, "loss": 0.896, "step": 16105 }, { "epoch": 0.3879964355386431, "grad_norm": 1.5615367889404297, "learning_rate": 6.442551465368623e-07, "loss": 0.8825, "step": 16110 }, { "epoch": 0.3881168565304304, "grad_norm": 1.4544663429260254, "learning_rate": 6.441283845451779e-07, "loss": 0.9124, "step": 16115 }, { "epoch": 0.38823727752221765, "grad_norm": 1.5126712322235107, "learning_rate": 6.440016225534935e-07, "loss": 0.9434, "step": 16120 }, { "epoch": 0.388357698514005, "grad_norm": 1.5839651823043823, "learning_rate": 6.438748605618091e-07, "loss": 0.9702, "step": 16125 }, { "epoch": 0.38847811950579225, "grad_norm": 2.018829107284546, "learning_rate": 6.437480985701247e-07, "loss": 0.9184, "step": 16130 }, { "epoch": 0.3885985404975795, "grad_norm": 1.6250749826431274, "learning_rate": 6.436213365784404e-07, "loss": 0.9592, "step": 16135 }, { "epoch": 0.3887189614893668, "grad_norm": 1.336804747581482, "learning_rate": 6.434945745867558e-07, "loss": 0.8839, "step": 16140 }, { "epoch": 0.3888393824811541, "grad_norm": 1.6236604452133179, "learning_rate": 6.433678125950714e-07, "loss": 0.9167, "step": 16145 }, { "epoch": 0.3889598034729414, "grad_norm": 1.5479767322540283, "learning_rate": 6.432410506033871e-07, "loss": 0.8679, "step": 16150 }, { "epoch": 0.3890802244647287, "grad_norm": 1.5190855264663696, "learning_rate": 6.431142886117027e-07, "loss": 0.8982, "step": 16155 }, { "epoch": 0.389200645456516, "grad_norm": 1.6366859674453735, "learning_rate": 6.429875266200183e-07, "loss": 0.9002, "step": 16160 }, { "epoch": 0.3893210664483033, "grad_norm": 1.5403122901916504, "learning_rate": 6.428607646283338e-07, "loss": 0.8571, "step": 16165 }, { "epoch": 0.38944148744009055, "grad_norm": 1.5745781660079956, "learning_rate": 6.427340026366494e-07, "loss": 0.9317, "step": 16170 }, { "epoch": 0.3895619084318778, "grad_norm": 1.6703364849090576, "learning_rate": 6.42607240644965e-07, "loss": 0.927, "step": 16175 }, { "epoch": 0.38968232942366515, "grad_norm": 1.4843415021896362, "learning_rate": 6.424804786532806e-07, "loss": 0.9106, "step": 16180 }, { "epoch": 0.3898027504154524, "grad_norm": 1.4242477416992188, "learning_rate": 6.423537166615962e-07, "loss": 0.9481, "step": 16185 }, { "epoch": 0.3899231714072397, "grad_norm": 1.34717857837677, "learning_rate": 6.422269546699117e-07, "loss": 0.9421, "step": 16190 }, { "epoch": 0.390043592399027, "grad_norm": 1.734257698059082, "learning_rate": 6.421001926782273e-07, "loss": 0.9215, "step": 16195 }, { "epoch": 0.3901640133908143, "grad_norm": 1.5538983345031738, "learning_rate": 6.41973430686543e-07, "loss": 0.9245, "step": 16200 }, { "epoch": 0.3902844343826016, "grad_norm": 1.4236520528793335, "learning_rate": 6.418466686948585e-07, "loss": 0.9123, "step": 16205 }, { "epoch": 0.39040485537438885, "grad_norm": 1.6775470972061157, "learning_rate": 6.417199067031741e-07, "loss": 0.9514, "step": 16210 }, { "epoch": 0.3905252763661762, "grad_norm": 1.5894907712936401, "learning_rate": 6.415931447114897e-07, "loss": 0.8613, "step": 16215 }, { "epoch": 0.39064569735796345, "grad_norm": 1.8615303039550781, "learning_rate": 6.414663827198053e-07, "loss": 0.8585, "step": 16220 }, { "epoch": 0.3907661183497507, "grad_norm": 1.587877631187439, "learning_rate": 6.413396207281209e-07, "loss": 0.9591, "step": 16225 }, { "epoch": 0.390886539341538, "grad_norm": 1.5446151494979858, "learning_rate": 6.412128587364364e-07, "loss": 0.9316, "step": 16230 }, { "epoch": 0.3910069603333253, "grad_norm": 1.3091187477111816, "learning_rate": 6.41086096744752e-07, "loss": 0.9587, "step": 16235 }, { "epoch": 0.3911273813251126, "grad_norm": 1.5175570249557495, "learning_rate": 6.409593347530676e-07, "loss": 0.9147, "step": 16240 }, { "epoch": 0.3912478023168999, "grad_norm": 1.386093020439148, "learning_rate": 6.408325727613833e-07, "loss": 0.9803, "step": 16245 }, { "epoch": 0.39136822330868715, "grad_norm": 1.4947230815887451, "learning_rate": 6.407058107696988e-07, "loss": 0.879, "step": 16250 }, { "epoch": 0.3914886443004745, "grad_norm": 1.4012850522994995, "learning_rate": 6.405790487780143e-07, "loss": 0.8928, "step": 16255 }, { "epoch": 0.39160906529226175, "grad_norm": 1.508278727531433, "learning_rate": 6.4045228678633e-07, "loss": 0.8992, "step": 16260 }, { "epoch": 0.391729486284049, "grad_norm": 1.5345510244369507, "learning_rate": 6.403255247946455e-07, "loss": 0.9355, "step": 16265 }, { "epoch": 0.39184990727583635, "grad_norm": 1.515647292137146, "learning_rate": 6.401987628029612e-07, "loss": 0.9122, "step": 16270 }, { "epoch": 0.3919703282676236, "grad_norm": 1.5095194578170776, "learning_rate": 6.400720008112768e-07, "loss": 0.9869, "step": 16275 }, { "epoch": 0.3920907492594109, "grad_norm": 1.6890863180160522, "learning_rate": 6.399452388195922e-07, "loss": 0.9405, "step": 16280 }, { "epoch": 0.39221117025119817, "grad_norm": 1.5294418334960938, "learning_rate": 6.398184768279079e-07, "loss": 0.9011, "step": 16285 }, { "epoch": 0.3923315912429855, "grad_norm": 1.4933300018310547, "learning_rate": 6.396917148362235e-07, "loss": 0.9059, "step": 16290 }, { "epoch": 0.3924520122347728, "grad_norm": 1.4482684135437012, "learning_rate": 6.395649528445391e-07, "loss": 0.8861, "step": 16295 }, { "epoch": 0.39257243322656005, "grad_norm": 1.3621864318847656, "learning_rate": 6.394381908528546e-07, "loss": 0.9167, "step": 16300 }, { "epoch": 0.3926928542183473, "grad_norm": 1.451184868812561, "learning_rate": 6.393114288611703e-07, "loss": 0.9173, "step": 16305 }, { "epoch": 0.39281327521013465, "grad_norm": 1.487359642982483, "learning_rate": 6.391846668694858e-07, "loss": 0.9596, "step": 16310 }, { "epoch": 0.3929336962019219, "grad_norm": 1.5361380577087402, "learning_rate": 6.390579048778014e-07, "loss": 0.934, "step": 16315 }, { "epoch": 0.3930541171937092, "grad_norm": 1.566884160041809, "learning_rate": 6.389311428861171e-07, "loss": 0.8644, "step": 16320 }, { "epoch": 0.39317453818549647, "grad_norm": 1.6171588897705078, "learning_rate": 6.388043808944325e-07, "loss": 0.9374, "step": 16325 }, { "epoch": 0.3932949591772838, "grad_norm": 1.5238803625106812, "learning_rate": 6.386776189027482e-07, "loss": 0.9359, "step": 16330 }, { "epoch": 0.39341538016907107, "grad_norm": 1.6145451068878174, "learning_rate": 6.385508569110638e-07, "loss": 0.914, "step": 16335 }, { "epoch": 0.39353580116085835, "grad_norm": 1.5774810314178467, "learning_rate": 6.384240949193794e-07, "loss": 0.8834, "step": 16340 }, { "epoch": 0.3936562221526457, "grad_norm": 1.6705057621002197, "learning_rate": 6.382973329276949e-07, "loss": 0.8951, "step": 16345 }, { "epoch": 0.39377664314443295, "grad_norm": 1.5039398670196533, "learning_rate": 6.381705709360105e-07, "loss": 0.9229, "step": 16350 }, { "epoch": 0.3938970641362202, "grad_norm": 1.585025429725647, "learning_rate": 6.380438089443261e-07, "loss": 0.9039, "step": 16355 }, { "epoch": 0.3940174851280075, "grad_norm": 1.6906098127365112, "learning_rate": 6.379170469526417e-07, "loss": 0.9368, "step": 16360 }, { "epoch": 0.3941379061197948, "grad_norm": 1.5157690048217773, "learning_rate": 6.377902849609574e-07, "loss": 0.8906, "step": 16365 }, { "epoch": 0.3942583271115821, "grad_norm": 1.4066121578216553, "learning_rate": 6.376635229692728e-07, "loss": 0.9337, "step": 16370 }, { "epoch": 0.39437874810336937, "grad_norm": 1.4881788492202759, "learning_rate": 6.375367609775884e-07, "loss": 0.9241, "step": 16375 }, { "epoch": 0.39449916909515664, "grad_norm": 1.500671148300171, "learning_rate": 6.374099989859041e-07, "loss": 0.97, "step": 16380 }, { "epoch": 0.39461959008694397, "grad_norm": 1.5400736331939697, "learning_rate": 6.372832369942196e-07, "loss": 0.9208, "step": 16385 }, { "epoch": 0.39474001107873125, "grad_norm": 1.468634009361267, "learning_rate": 6.371564750025353e-07, "loss": 0.9247, "step": 16390 }, { "epoch": 0.3948604320705185, "grad_norm": 1.510955572128296, "learning_rate": 6.370297130108508e-07, "loss": 0.9041, "step": 16395 }, { "epoch": 0.39498085306230585, "grad_norm": 1.528254747390747, "learning_rate": 6.369029510191663e-07, "loss": 0.9339, "step": 16400 }, { "epoch": 0.3951012740540931, "grad_norm": 1.5330232381820679, "learning_rate": 6.36776189027482e-07, "loss": 0.897, "step": 16405 }, { "epoch": 0.3952216950458804, "grad_norm": 1.4764940738677979, "learning_rate": 6.366494270357976e-07, "loss": 0.9394, "step": 16410 }, { "epoch": 0.39534211603766767, "grad_norm": 1.4831757545471191, "learning_rate": 6.365226650441131e-07, "loss": 0.9418, "step": 16415 }, { "epoch": 0.395462537029455, "grad_norm": 1.6943193674087524, "learning_rate": 6.363959030524287e-07, "loss": 0.8861, "step": 16420 }, { "epoch": 0.39558295802124227, "grad_norm": 1.3982515335083008, "learning_rate": 6.362691410607444e-07, "loss": 0.791, "step": 16425 }, { "epoch": 0.39570337901302954, "grad_norm": 1.4633845090866089, "learning_rate": 6.361423790690599e-07, "loss": 0.9638, "step": 16430 }, { "epoch": 0.3958238000048168, "grad_norm": 1.807855486869812, "learning_rate": 6.360156170773755e-07, "loss": 0.941, "step": 16435 }, { "epoch": 0.39594422099660415, "grad_norm": 1.523313283920288, "learning_rate": 6.358888550856911e-07, "loss": 0.9431, "step": 16440 }, { "epoch": 0.3960646419883914, "grad_norm": 1.3463391065597534, "learning_rate": 6.357620930940066e-07, "loss": 0.9069, "step": 16445 }, { "epoch": 0.3961850629801787, "grad_norm": 1.6524784564971924, "learning_rate": 6.356353311023223e-07, "loss": 0.9588, "step": 16450 }, { "epoch": 0.39630548397196597, "grad_norm": 1.4540703296661377, "learning_rate": 6.355085691106379e-07, "loss": 0.8533, "step": 16455 }, { "epoch": 0.3964259049637533, "grad_norm": 1.5041921138763428, "learning_rate": 6.353818071189533e-07, "loss": 0.9025, "step": 16460 }, { "epoch": 0.39654632595554057, "grad_norm": 1.4892079830169678, "learning_rate": 6.35255045127269e-07, "loss": 0.948, "step": 16465 }, { "epoch": 0.39666674694732784, "grad_norm": 1.909855604171753, "learning_rate": 6.351282831355846e-07, "loss": 0.9106, "step": 16470 }, { "epoch": 0.39678716793911517, "grad_norm": 1.4165951013565063, "learning_rate": 6.350015211439002e-07, "loss": 0.9051, "step": 16475 }, { "epoch": 0.39690758893090244, "grad_norm": 1.558835506439209, "learning_rate": 6.348747591522158e-07, "loss": 0.8895, "step": 16480 }, { "epoch": 0.3970280099226897, "grad_norm": 1.5130789279937744, "learning_rate": 6.347479971605314e-07, "loss": 0.9011, "step": 16485 }, { "epoch": 0.397148430914477, "grad_norm": 1.735569953918457, "learning_rate": 6.346212351688469e-07, "loss": 0.9343, "step": 16490 }, { "epoch": 0.3972688519062643, "grad_norm": 1.4600125551223755, "learning_rate": 6.344944731771625e-07, "loss": 0.9466, "step": 16495 }, { "epoch": 0.3973892728980516, "grad_norm": 1.612957239151001, "learning_rate": 6.343677111854782e-07, "loss": 0.9414, "step": 16500 }, { "epoch": 0.39750969388983887, "grad_norm": 1.5158154964447021, "learning_rate": 6.342409491937937e-07, "loss": 0.9412, "step": 16505 }, { "epoch": 0.39763011488162614, "grad_norm": 1.6656270027160645, "learning_rate": 6.341141872021093e-07, "loss": 0.8693, "step": 16510 }, { "epoch": 0.39775053587341347, "grad_norm": 1.6811892986297607, "learning_rate": 6.339874252104249e-07, "loss": 0.9543, "step": 16515 }, { "epoch": 0.39787095686520074, "grad_norm": 1.6302272081375122, "learning_rate": 6.338606632187404e-07, "loss": 0.9084, "step": 16520 }, { "epoch": 0.397991377856988, "grad_norm": 1.7181906700134277, "learning_rate": 6.337339012270561e-07, "loss": 0.8922, "step": 16525 }, { "epoch": 0.39811179884877534, "grad_norm": 1.523653268814087, "learning_rate": 6.336071392353716e-07, "loss": 0.8399, "step": 16530 }, { "epoch": 0.3982322198405626, "grad_norm": 1.7446633577346802, "learning_rate": 6.334803772436872e-07, "loss": 0.9446, "step": 16535 }, { "epoch": 0.3983526408323499, "grad_norm": 1.4260302782058716, "learning_rate": 6.333536152520028e-07, "loss": 0.9209, "step": 16540 }, { "epoch": 0.39847306182413716, "grad_norm": 1.572892189025879, "learning_rate": 6.332268532603185e-07, "loss": 0.899, "step": 16545 }, { "epoch": 0.3985934828159245, "grad_norm": 1.5617256164550781, "learning_rate": 6.33100091268634e-07, "loss": 0.909, "step": 16550 }, { "epoch": 0.39871390380771177, "grad_norm": 1.680518388748169, "learning_rate": 6.329733292769495e-07, "loss": 0.9296, "step": 16555 }, { "epoch": 0.39883432479949904, "grad_norm": 1.9393974542617798, "learning_rate": 6.328465672852652e-07, "loss": 0.8945, "step": 16560 }, { "epoch": 0.3989547457912863, "grad_norm": 1.7089143991470337, "learning_rate": 6.327198052935807e-07, "loss": 0.9152, "step": 16565 }, { "epoch": 0.39907516678307364, "grad_norm": 1.5115058422088623, "learning_rate": 6.325930433018964e-07, "loss": 0.8943, "step": 16570 }, { "epoch": 0.3991955877748609, "grad_norm": 1.4281383752822876, "learning_rate": 6.32466281310212e-07, "loss": 0.9097, "step": 16575 }, { "epoch": 0.3993160087666482, "grad_norm": 1.453593373298645, "learning_rate": 6.323395193185274e-07, "loss": 0.9352, "step": 16580 }, { "epoch": 0.39943642975843546, "grad_norm": 1.7239865064620972, "learning_rate": 6.322127573268431e-07, "loss": 0.9259, "step": 16585 }, { "epoch": 0.3995568507502228, "grad_norm": 1.3729093074798584, "learning_rate": 6.320859953351587e-07, "loss": 0.9445, "step": 16590 }, { "epoch": 0.39967727174201007, "grad_norm": 1.594544529914856, "learning_rate": 6.319592333434743e-07, "loss": 0.9346, "step": 16595 }, { "epoch": 0.39979769273379734, "grad_norm": 1.5372705459594727, "learning_rate": 6.318324713517898e-07, "loss": 0.937, "step": 16600 }, { "epoch": 0.39991811372558467, "grad_norm": 1.740492343902588, "learning_rate": 6.317057093601055e-07, "loss": 0.955, "step": 16605 }, { "epoch": 0.40003853471737194, "grad_norm": 1.7151694297790527, "learning_rate": 6.31578947368421e-07, "loss": 0.9433, "step": 16610 }, { "epoch": 0.4001589557091592, "grad_norm": 1.511307954788208, "learning_rate": 6.314521853767366e-07, "loss": 0.8717, "step": 16615 }, { "epoch": 0.4002793767009465, "grad_norm": 1.4371942281723022, "learning_rate": 6.313254233850523e-07, "loss": 0.9438, "step": 16620 }, { "epoch": 0.4003997976927338, "grad_norm": 1.4800249338150024, "learning_rate": 6.311986613933677e-07, "loss": 0.92, "step": 16625 }, { "epoch": 0.4005202186845211, "grad_norm": 1.5603761672973633, "learning_rate": 6.310718994016834e-07, "loss": 0.9322, "step": 16630 }, { "epoch": 0.40064063967630836, "grad_norm": 1.6178046464920044, "learning_rate": 6.30945137409999e-07, "loss": 0.8687, "step": 16635 }, { "epoch": 0.40076106066809564, "grad_norm": 1.4481092691421509, "learning_rate": 6.308183754183145e-07, "loss": 0.9405, "step": 16640 }, { "epoch": 0.40088148165988297, "grad_norm": 1.5825382471084595, "learning_rate": 6.306916134266301e-07, "loss": 0.8999, "step": 16645 }, { "epoch": 0.40100190265167024, "grad_norm": 1.569267749786377, "learning_rate": 6.305648514349457e-07, "loss": 0.8915, "step": 16650 }, { "epoch": 0.4011223236434575, "grad_norm": 1.5229368209838867, "learning_rate": 6.304380894432613e-07, "loss": 0.9088, "step": 16655 }, { "epoch": 0.40124274463524484, "grad_norm": 1.4642391204833984, "learning_rate": 6.303113274515769e-07, "loss": 0.9022, "step": 16660 }, { "epoch": 0.4013631656270321, "grad_norm": 1.8505054712295532, "learning_rate": 6.301845654598926e-07, "loss": 0.9742, "step": 16665 }, { "epoch": 0.4014835866188194, "grad_norm": 1.8605384826660156, "learning_rate": 6.30057803468208e-07, "loss": 0.8914, "step": 16670 }, { "epoch": 0.40160400761060666, "grad_norm": 1.5216584205627441, "learning_rate": 6.299310414765236e-07, "loss": 0.9265, "step": 16675 }, { "epoch": 0.401724428602394, "grad_norm": 1.6303132772445679, "learning_rate": 6.298042794848393e-07, "loss": 0.9063, "step": 16680 }, { "epoch": 0.40184484959418126, "grad_norm": 1.4981777667999268, "learning_rate": 6.296775174931548e-07, "loss": 0.8603, "step": 16685 }, { "epoch": 0.40196527058596854, "grad_norm": 1.6867685317993164, "learning_rate": 6.295507555014705e-07, "loss": 0.9356, "step": 16690 }, { "epoch": 0.4020856915777558, "grad_norm": 1.4873652458190918, "learning_rate": 6.29423993509786e-07, "loss": 0.9087, "step": 16695 }, { "epoch": 0.40220611256954314, "grad_norm": 1.558084487915039, "learning_rate": 6.292972315181015e-07, "loss": 0.9398, "step": 16700 }, { "epoch": 0.4023265335613304, "grad_norm": 1.403309941291809, "learning_rate": 6.291704695264172e-07, "loss": 0.8758, "step": 16705 }, { "epoch": 0.4024469545531177, "grad_norm": 1.7472859621047974, "learning_rate": 6.290437075347328e-07, "loss": 0.9222, "step": 16710 }, { "epoch": 0.402567375544905, "grad_norm": 1.427155613899231, "learning_rate": 6.289169455430483e-07, "loss": 0.8999, "step": 16715 }, { "epoch": 0.4026877965366923, "grad_norm": 1.5862841606140137, "learning_rate": 6.287901835513639e-07, "loss": 0.8766, "step": 16720 }, { "epoch": 0.40280821752847956, "grad_norm": 1.6798174381256104, "learning_rate": 6.286634215596795e-07, "loss": 0.9728, "step": 16725 }, { "epoch": 0.40292863852026684, "grad_norm": 1.4473795890808105, "learning_rate": 6.285366595679951e-07, "loss": 0.898, "step": 16730 }, { "epoch": 0.40304905951205416, "grad_norm": 1.7120901346206665, "learning_rate": 6.284098975763107e-07, "loss": 0.8887, "step": 16735 }, { "epoch": 0.40316948050384144, "grad_norm": 1.587019920349121, "learning_rate": 6.282831355846263e-07, "loss": 0.933, "step": 16740 }, { "epoch": 0.4032899014956287, "grad_norm": 1.3541581630706787, "learning_rate": 6.281563735929418e-07, "loss": 0.8703, "step": 16745 }, { "epoch": 0.403410322487416, "grad_norm": 1.5999221801757812, "learning_rate": 6.280296116012575e-07, "loss": 0.8818, "step": 16750 }, { "epoch": 0.4035307434792033, "grad_norm": 1.6486579179763794, "learning_rate": 6.279028496095731e-07, "loss": 0.9107, "step": 16755 }, { "epoch": 0.4036511644709906, "grad_norm": 1.8040155172348022, "learning_rate": 6.277760876178885e-07, "loss": 0.9138, "step": 16760 }, { "epoch": 0.40377158546277786, "grad_norm": 1.6447283029556274, "learning_rate": 6.276493256262042e-07, "loss": 0.9212, "step": 16765 }, { "epoch": 0.40389200645456513, "grad_norm": 1.5155274868011475, "learning_rate": 6.275225636345198e-07, "loss": 0.8841, "step": 16770 }, { "epoch": 0.40401242744635246, "grad_norm": 1.705776333808899, "learning_rate": 6.273958016428354e-07, "loss": 0.9116, "step": 16775 }, { "epoch": 0.40413284843813974, "grad_norm": 1.5871044397354126, "learning_rate": 6.27269039651151e-07, "loss": 0.9305, "step": 16780 }, { "epoch": 0.404253269429927, "grad_norm": 1.5335018634796143, "learning_rate": 6.271422776594665e-07, "loss": 0.9243, "step": 16785 }, { "epoch": 0.40437369042171434, "grad_norm": 1.5292437076568604, "learning_rate": 6.270155156677821e-07, "loss": 0.8909, "step": 16790 }, { "epoch": 0.4044941114135016, "grad_norm": 1.6406742334365845, "learning_rate": 6.268887536760977e-07, "loss": 0.9501, "step": 16795 }, { "epoch": 0.4046145324052889, "grad_norm": 1.462023377418518, "learning_rate": 6.267619916844134e-07, "loss": 0.8919, "step": 16800 }, { "epoch": 0.40473495339707616, "grad_norm": 1.5037622451782227, "learning_rate": 6.266352296927289e-07, "loss": 0.9235, "step": 16805 }, { "epoch": 0.4048553743888635, "grad_norm": 1.6942980289459229, "learning_rate": 6.265084677010445e-07, "loss": 0.9217, "step": 16810 }, { "epoch": 0.40497579538065076, "grad_norm": 1.740375280380249, "learning_rate": 6.263817057093601e-07, "loss": 0.9559, "step": 16815 }, { "epoch": 0.40509621637243803, "grad_norm": 1.5765180587768555, "learning_rate": 6.262549437176756e-07, "loss": 0.9235, "step": 16820 }, { "epoch": 0.4052166373642253, "grad_norm": 1.4993369579315186, "learning_rate": 6.261281817259913e-07, "loss": 0.9422, "step": 16825 }, { "epoch": 0.40533705835601264, "grad_norm": 1.3874576091766357, "learning_rate": 6.260014197343068e-07, "loss": 0.9643, "step": 16830 }, { "epoch": 0.4054574793477999, "grad_norm": 1.4534552097320557, "learning_rate": 6.258746577426224e-07, "loss": 0.908, "step": 16835 }, { "epoch": 0.4055779003395872, "grad_norm": 1.5588668584823608, "learning_rate": 6.25747895750938e-07, "loss": 0.8767, "step": 16840 }, { "epoch": 0.4056983213313745, "grad_norm": 1.445541501045227, "learning_rate": 6.256211337592536e-07, "loss": 0.8934, "step": 16845 }, { "epoch": 0.4058187423231618, "grad_norm": 1.5574666261672974, "learning_rate": 6.254943717675692e-07, "loss": 0.92, "step": 16850 }, { "epoch": 0.40593916331494906, "grad_norm": 1.498324990272522, "learning_rate": 6.253676097758847e-07, "loss": 0.8976, "step": 16855 }, { "epoch": 0.40605958430673633, "grad_norm": 1.4403085708618164, "learning_rate": 6.252408477842004e-07, "loss": 0.9248, "step": 16860 }, { "epoch": 0.40618000529852366, "grad_norm": 1.6509935855865479, "learning_rate": 6.251140857925159e-07, "loss": 0.9398, "step": 16865 }, { "epoch": 0.40630042629031093, "grad_norm": 1.5441958904266357, "learning_rate": 6.249873238008316e-07, "loss": 0.9183, "step": 16870 }, { "epoch": 0.4064208472820982, "grad_norm": 1.4594789743423462, "learning_rate": 6.248605618091472e-07, "loss": 0.9768, "step": 16875 }, { "epoch": 0.4065412682738855, "grad_norm": 1.571266770362854, "learning_rate": 6.247337998174626e-07, "loss": 0.8789, "step": 16880 }, { "epoch": 0.4066616892656728, "grad_norm": 1.6715950965881348, "learning_rate": 6.246070378257783e-07, "loss": 0.8904, "step": 16885 }, { "epoch": 0.4067821102574601, "grad_norm": 1.7333935499191284, "learning_rate": 6.244802758340939e-07, "loss": 0.9722, "step": 16890 }, { "epoch": 0.40690253124924736, "grad_norm": 1.4930369853973389, "learning_rate": 6.243535138424095e-07, "loss": 0.879, "step": 16895 }, { "epoch": 0.40702295224103463, "grad_norm": 1.5462515354156494, "learning_rate": 6.24226751850725e-07, "loss": 0.8793, "step": 16900 }, { "epoch": 0.40714337323282196, "grad_norm": 1.6213178634643555, "learning_rate": 6.240999898590406e-07, "loss": 0.9315, "step": 16905 }, { "epoch": 0.40726379422460923, "grad_norm": 1.5512969493865967, "learning_rate": 6.239732278673562e-07, "loss": 0.9116, "step": 16910 }, { "epoch": 0.4073842152163965, "grad_norm": 1.5854614973068237, "learning_rate": 6.238464658756718e-07, "loss": 0.9105, "step": 16915 }, { "epoch": 0.40750463620818383, "grad_norm": 1.4322234392166138, "learning_rate": 6.237197038839875e-07, "loss": 0.9013, "step": 16920 }, { "epoch": 0.4076250571999711, "grad_norm": 1.5661823749542236, "learning_rate": 6.235929418923029e-07, "loss": 0.887, "step": 16925 }, { "epoch": 0.4077454781917584, "grad_norm": 1.6415965557098389, "learning_rate": 6.234661799006185e-07, "loss": 0.8904, "step": 16930 }, { "epoch": 0.40786589918354565, "grad_norm": 1.5648971796035767, "learning_rate": 6.233394179089342e-07, "loss": 0.9167, "step": 16935 }, { "epoch": 0.407986320175333, "grad_norm": 1.5005626678466797, "learning_rate": 6.232126559172497e-07, "loss": 0.937, "step": 16940 }, { "epoch": 0.40810674116712026, "grad_norm": 1.5273611545562744, "learning_rate": 6.230858939255653e-07, "loss": 0.9338, "step": 16945 }, { "epoch": 0.40822716215890753, "grad_norm": 1.5377509593963623, "learning_rate": 6.229591319338809e-07, "loss": 0.9068, "step": 16950 }, { "epoch": 0.4083475831506948, "grad_norm": 1.4508482217788696, "learning_rate": 6.228323699421965e-07, "loss": 0.9001, "step": 16955 }, { "epoch": 0.40846800414248213, "grad_norm": 1.6953697204589844, "learning_rate": 6.227056079505121e-07, "loss": 0.9195, "step": 16960 }, { "epoch": 0.4085884251342694, "grad_norm": 1.6012778282165527, "learning_rate": 6.225788459588277e-07, "loss": 0.8474, "step": 16965 }, { "epoch": 0.4087088461260567, "grad_norm": 1.5923460721969604, "learning_rate": 6.224520839671432e-07, "loss": 0.9111, "step": 16970 }, { "epoch": 0.408829267117844, "grad_norm": 1.6092580556869507, "learning_rate": 6.223253219754588e-07, "loss": 0.8893, "step": 16975 }, { "epoch": 0.4089496881096313, "grad_norm": 1.7636964321136475, "learning_rate": 6.221985599837745e-07, "loss": 0.9521, "step": 16980 }, { "epoch": 0.40907010910141856, "grad_norm": 1.5564132928848267, "learning_rate": 6.2207179799209e-07, "loss": 0.9026, "step": 16985 }, { "epoch": 0.40919053009320583, "grad_norm": 1.6354960203170776, "learning_rate": 6.219450360004056e-07, "loss": 0.9423, "step": 16990 }, { "epoch": 0.40931095108499316, "grad_norm": 1.5429902076721191, "learning_rate": 6.218182740087212e-07, "loss": 0.9144, "step": 16995 }, { "epoch": 0.40943137207678043, "grad_norm": 2.275571823120117, "learning_rate": 6.216915120170367e-07, "loss": 0.9458, "step": 17000 }, { "epoch": 0.4095517930685677, "grad_norm": 1.4017913341522217, "learning_rate": 6.215647500253524e-07, "loss": 0.9419, "step": 17005 }, { "epoch": 0.409672214060355, "grad_norm": 1.5098503828048706, "learning_rate": 6.21437988033668e-07, "loss": 0.9175, "step": 17010 }, { "epoch": 0.4097926350521423, "grad_norm": 1.4720624685287476, "learning_rate": 6.213112260419834e-07, "loss": 0.8629, "step": 17015 }, { "epoch": 0.4099130560439296, "grad_norm": 1.7012861967086792, "learning_rate": 6.211844640502991e-07, "loss": 0.9614, "step": 17020 }, { "epoch": 0.41003347703571685, "grad_norm": 1.732938528060913, "learning_rate": 6.210577020586147e-07, "loss": 0.9399, "step": 17025 }, { "epoch": 0.4101538980275041, "grad_norm": 1.5244909524917603, "learning_rate": 6.209309400669303e-07, "loss": 0.8768, "step": 17030 }, { "epoch": 0.41027431901929146, "grad_norm": 1.501592755317688, "learning_rate": 6.208041780752459e-07, "loss": 0.8713, "step": 17035 }, { "epoch": 0.41039474001107873, "grad_norm": 1.55648934841156, "learning_rate": 6.206774160835615e-07, "loss": 0.9467, "step": 17040 }, { "epoch": 0.410515161002866, "grad_norm": 1.4854168891906738, "learning_rate": 6.20550654091877e-07, "loss": 0.9206, "step": 17045 }, { "epoch": 0.41063558199465333, "grad_norm": 1.598010540008545, "learning_rate": 6.204238921001926e-07, "loss": 0.892, "step": 17050 }, { "epoch": 0.4107560029864406, "grad_norm": 1.6800447702407837, "learning_rate": 6.202971301085083e-07, "loss": 0.8823, "step": 17055 }, { "epoch": 0.4108764239782279, "grad_norm": 1.541235327720642, "learning_rate": 6.201703681168237e-07, "loss": 0.9197, "step": 17060 }, { "epoch": 0.41099684497001515, "grad_norm": 1.558349370956421, "learning_rate": 6.200436061251394e-07, "loss": 0.889, "step": 17065 }, { "epoch": 0.4111172659618025, "grad_norm": 1.6211495399475098, "learning_rate": 6.19916844133455e-07, "loss": 0.9126, "step": 17070 }, { "epoch": 0.41123768695358975, "grad_norm": 1.5009198188781738, "learning_rate": 6.197900821417706e-07, "loss": 0.9916, "step": 17075 }, { "epoch": 0.411358107945377, "grad_norm": 1.7271745204925537, "learning_rate": 6.196633201500862e-07, "loss": 0.8982, "step": 17080 }, { "epoch": 0.4114785289371643, "grad_norm": 1.4661312103271484, "learning_rate": 6.195365581584017e-07, "loss": 0.9097, "step": 17085 }, { "epoch": 0.41159894992895163, "grad_norm": 1.656012773513794, "learning_rate": 6.194097961667173e-07, "loss": 0.9434, "step": 17090 }, { "epoch": 0.4117193709207389, "grad_norm": 1.7758303880691528, "learning_rate": 6.192830341750329e-07, "loss": 0.9637, "step": 17095 }, { "epoch": 0.4118397919125262, "grad_norm": 1.5642658472061157, "learning_rate": 6.191562721833486e-07, "loss": 0.9356, "step": 17100 }, { "epoch": 0.4119602129043135, "grad_norm": 1.6727337837219238, "learning_rate": 6.190295101916641e-07, "loss": 0.958, "step": 17105 }, { "epoch": 0.4120806338961008, "grad_norm": 1.5685449838638306, "learning_rate": 6.189027481999796e-07, "loss": 0.8907, "step": 17110 }, { "epoch": 0.41220105488788805, "grad_norm": 1.5111569166183472, "learning_rate": 6.187759862082953e-07, "loss": 0.9169, "step": 17115 }, { "epoch": 0.4123214758796753, "grad_norm": 1.5179859399795532, "learning_rate": 6.186492242166108e-07, "loss": 0.8996, "step": 17120 }, { "epoch": 0.41244189687146265, "grad_norm": 1.6612423658370972, "learning_rate": 6.185224622249265e-07, "loss": 0.9161, "step": 17125 }, { "epoch": 0.4125623178632499, "grad_norm": 1.5869808197021484, "learning_rate": 6.18395700233242e-07, "loss": 0.9005, "step": 17130 }, { "epoch": 0.4126827388550372, "grad_norm": 1.490541696548462, "learning_rate": 6.182689382415575e-07, "loss": 0.949, "step": 17135 }, { "epoch": 0.4128031598468245, "grad_norm": 1.481579065322876, "learning_rate": 6.181421762498732e-07, "loss": 0.8829, "step": 17140 }, { "epoch": 0.4129235808386118, "grad_norm": 1.6850212812423706, "learning_rate": 6.180154142581888e-07, "loss": 0.8974, "step": 17145 }, { "epoch": 0.4130440018303991, "grad_norm": 1.5239510536193848, "learning_rate": 6.178886522665044e-07, "loss": 0.9171, "step": 17150 }, { "epoch": 0.41316442282218635, "grad_norm": 1.6153520345687866, "learning_rate": 6.177618902748199e-07, "loss": 0.9529, "step": 17155 }, { "epoch": 0.4132848438139737, "grad_norm": 1.5189660787582397, "learning_rate": 6.176351282831356e-07, "loss": 0.9021, "step": 17160 }, { "epoch": 0.41340526480576095, "grad_norm": 1.436489462852478, "learning_rate": 6.175083662914511e-07, "loss": 0.9482, "step": 17165 }, { "epoch": 0.4135256857975482, "grad_norm": 1.5382148027420044, "learning_rate": 6.173816042997667e-07, "loss": 0.8656, "step": 17170 }, { "epoch": 0.4136461067893355, "grad_norm": 1.4603832960128784, "learning_rate": 6.172548423080824e-07, "loss": 0.9581, "step": 17175 }, { "epoch": 0.41376652778112283, "grad_norm": 1.5139708518981934, "learning_rate": 6.171280803163979e-07, "loss": 0.8897, "step": 17180 }, { "epoch": 0.4138869487729101, "grad_norm": 1.4182488918304443, "learning_rate": 6.170013183247135e-07, "loss": 0.9158, "step": 17185 }, { "epoch": 0.4140073697646974, "grad_norm": 1.9594801664352417, "learning_rate": 6.168745563330291e-07, "loss": 0.9053, "step": 17190 }, { "epoch": 0.41412779075648465, "grad_norm": 1.588879108428955, "learning_rate": 6.167477943413448e-07, "loss": 0.8981, "step": 17195 }, { "epoch": 0.414248211748272, "grad_norm": 1.7074207067489624, "learning_rate": 6.166210323496602e-07, "loss": 0.8977, "step": 17200 }, { "epoch": 0.41436863274005925, "grad_norm": 1.6250323057174683, "learning_rate": 6.164942703579758e-07, "loss": 0.8861, "step": 17205 }, { "epoch": 0.4144890537318465, "grad_norm": 1.850350022315979, "learning_rate": 6.163675083662915e-07, "loss": 0.8796, "step": 17210 }, { "epoch": 0.4146094747236338, "grad_norm": 1.6797027587890625, "learning_rate": 6.16240746374607e-07, "loss": 0.8986, "step": 17215 }, { "epoch": 0.4147298957154211, "grad_norm": 1.584898591041565, "learning_rate": 6.161139843829227e-07, "loss": 0.951, "step": 17220 }, { "epoch": 0.4148503167072084, "grad_norm": 1.5470921993255615, "learning_rate": 6.159872223912382e-07, "loss": 0.9126, "step": 17225 }, { "epoch": 0.4149707376989957, "grad_norm": 1.5832595825195312, "learning_rate": 6.158604603995537e-07, "loss": 0.9547, "step": 17230 }, { "epoch": 0.415091158690783, "grad_norm": 1.6913233995437622, "learning_rate": 6.157336984078694e-07, "loss": 0.8602, "step": 17235 }, { "epoch": 0.4152115796825703, "grad_norm": 1.8051854372024536, "learning_rate": 6.15606936416185e-07, "loss": 0.9028, "step": 17240 }, { "epoch": 0.41533200067435755, "grad_norm": 1.5922828912734985, "learning_rate": 6.154801744245005e-07, "loss": 0.9325, "step": 17245 }, { "epoch": 0.4154524216661448, "grad_norm": 1.8161146640777588, "learning_rate": 6.153534124328161e-07, "loss": 0.9126, "step": 17250 }, { "epoch": 0.41557284265793215, "grad_norm": 1.5556491613388062, "learning_rate": 6.152266504411317e-07, "loss": 0.9627, "step": 17255 }, { "epoch": 0.4156932636497194, "grad_norm": 1.4069287776947021, "learning_rate": 6.150998884494473e-07, "loss": 0.9157, "step": 17260 }, { "epoch": 0.4158136846415067, "grad_norm": 1.5378355979919434, "learning_rate": 6.149731264577629e-07, "loss": 0.9711, "step": 17265 }, { "epoch": 0.41593410563329397, "grad_norm": 1.4258906841278076, "learning_rate": 6.148463644660785e-07, "loss": 0.8693, "step": 17270 }, { "epoch": 0.4160545266250813, "grad_norm": 1.4657279253005981, "learning_rate": 6.14719602474394e-07, "loss": 0.9234, "step": 17275 }, { "epoch": 0.4161749476168686, "grad_norm": 1.731824517250061, "learning_rate": 6.145928404827097e-07, "loss": 0.8964, "step": 17280 }, { "epoch": 0.41629536860865585, "grad_norm": 1.8693464994430542, "learning_rate": 6.144660784910253e-07, "loss": 0.9262, "step": 17285 }, { "epoch": 0.4164157896004432, "grad_norm": 1.500510811805725, "learning_rate": 6.143393164993408e-07, "loss": 0.9205, "step": 17290 }, { "epoch": 0.41653621059223045, "grad_norm": 1.625565767288208, "learning_rate": 6.142125545076564e-07, "loss": 0.9148, "step": 17295 }, { "epoch": 0.4166566315840177, "grad_norm": 1.5997830629348755, "learning_rate": 6.14085792515972e-07, "loss": 0.9111, "step": 17300 }, { "epoch": 0.416777052575805, "grad_norm": 1.6735154390335083, "learning_rate": 6.139590305242876e-07, "loss": 0.9583, "step": 17305 }, { "epoch": 0.4168974735675923, "grad_norm": 1.5538638830184937, "learning_rate": 6.138322685326032e-07, "loss": 0.9632, "step": 17310 }, { "epoch": 0.4170178945593796, "grad_norm": 1.4466016292572021, "learning_rate": 6.137055065409187e-07, "loss": 0.9088, "step": 17315 }, { "epoch": 0.41713831555116687, "grad_norm": 1.5244786739349365, "learning_rate": 6.135787445492343e-07, "loss": 0.9279, "step": 17320 }, { "epoch": 0.41725873654295415, "grad_norm": 1.305046796798706, "learning_rate": 6.134519825575499e-07, "loss": 0.9045, "step": 17325 }, { "epoch": 0.4173791575347415, "grad_norm": 1.670288324356079, "learning_rate": 6.133252205658656e-07, "loss": 0.8701, "step": 17330 }, { "epoch": 0.41749957852652875, "grad_norm": 1.6538128852844238, "learning_rate": 6.131984585741811e-07, "loss": 0.9362, "step": 17335 }, { "epoch": 0.417619999518316, "grad_norm": 1.5970174074172974, "learning_rate": 6.130716965824967e-07, "loss": 0.9376, "step": 17340 }, { "epoch": 0.4177404205101033, "grad_norm": 1.530699372291565, "learning_rate": 6.129449345908123e-07, "loss": 0.9649, "step": 17345 }, { "epoch": 0.4178608415018906, "grad_norm": 1.6150037050247192, "learning_rate": 6.128181725991278e-07, "loss": 0.9158, "step": 17350 }, { "epoch": 0.4179812624936779, "grad_norm": 1.4796768426895142, "learning_rate": 6.126914106074435e-07, "loss": 0.9045, "step": 17355 }, { "epoch": 0.41810168348546517, "grad_norm": 1.542307734489441, "learning_rate": 6.125646486157591e-07, "loss": 0.8482, "step": 17360 }, { "epoch": 0.4182221044772525, "grad_norm": 1.6469286680221558, "learning_rate": 6.124378866240746e-07, "loss": 0.929, "step": 17365 }, { "epoch": 0.41834252546903977, "grad_norm": 1.5979429483413696, "learning_rate": 6.123111246323902e-07, "loss": 0.9167, "step": 17370 }, { "epoch": 0.41846294646082705, "grad_norm": 1.4680538177490234, "learning_rate": 6.121843626407058e-07, "loss": 0.8667, "step": 17375 }, { "epoch": 0.4185833674526143, "grad_norm": 1.752623200416565, "learning_rate": 6.120576006490214e-07, "loss": 0.9389, "step": 17380 }, { "epoch": 0.41870378844440165, "grad_norm": 1.4785820245742798, "learning_rate": 6.119308386573369e-07, "loss": 0.8957, "step": 17385 }, { "epoch": 0.4188242094361889, "grad_norm": 1.513867735862732, "learning_rate": 6.118040766656526e-07, "loss": 0.9167, "step": 17390 }, { "epoch": 0.4189446304279762, "grad_norm": 1.5811598300933838, "learning_rate": 6.116773146739681e-07, "loss": 0.9019, "step": 17395 }, { "epoch": 0.41906505141976347, "grad_norm": 1.643618106842041, "learning_rate": 6.115505526822838e-07, "loss": 0.9424, "step": 17400 }, { "epoch": 0.4191854724115508, "grad_norm": 1.3657090663909912, "learning_rate": 6.114237906905994e-07, "loss": 0.9588, "step": 17405 }, { "epoch": 0.41930589340333807, "grad_norm": 1.5041183233261108, "learning_rate": 6.112970286989148e-07, "loss": 0.8784, "step": 17410 }, { "epoch": 0.41942631439512534, "grad_norm": 1.5289888381958008, "learning_rate": 6.111702667072305e-07, "loss": 0.8804, "step": 17415 }, { "epoch": 0.4195467353869127, "grad_norm": 1.574815034866333, "learning_rate": 6.110435047155461e-07, "loss": 0.9201, "step": 17420 }, { "epoch": 0.41966715637869995, "grad_norm": 1.6267809867858887, "learning_rate": 6.109167427238617e-07, "loss": 0.8833, "step": 17425 }, { "epoch": 0.4197875773704872, "grad_norm": 1.4753605127334595, "learning_rate": 6.107899807321772e-07, "loss": 0.9439, "step": 17430 }, { "epoch": 0.4199079983622745, "grad_norm": 1.5669854879379272, "learning_rate": 6.106632187404928e-07, "loss": 0.9109, "step": 17435 }, { "epoch": 0.4200284193540618, "grad_norm": 1.5805938243865967, "learning_rate": 6.105364567488084e-07, "loss": 0.9097, "step": 17440 }, { "epoch": 0.4201488403458491, "grad_norm": 1.501623272895813, "learning_rate": 6.10409694757124e-07, "loss": 0.9498, "step": 17445 }, { "epoch": 0.42026926133763637, "grad_norm": 1.611153483390808, "learning_rate": 6.102829327654397e-07, "loss": 0.9448, "step": 17450 }, { "epoch": 0.42038968232942364, "grad_norm": 1.4031232595443726, "learning_rate": 6.101561707737551e-07, "loss": 0.9303, "step": 17455 }, { "epoch": 0.42051010332121097, "grad_norm": 1.5860594511032104, "learning_rate": 6.100294087820707e-07, "loss": 0.9008, "step": 17460 }, { "epoch": 0.42063052431299824, "grad_norm": 1.665428876876831, "learning_rate": 6.099026467903864e-07, "loss": 0.9014, "step": 17465 }, { "epoch": 0.4207509453047855, "grad_norm": 1.5833650827407837, "learning_rate": 6.097758847987019e-07, "loss": 0.9349, "step": 17470 }, { "epoch": 0.4208713662965728, "grad_norm": 1.6934082508087158, "learning_rate": 6.096491228070176e-07, "loss": 0.9126, "step": 17475 }, { "epoch": 0.4209917872883601, "grad_norm": 1.7227505445480347, "learning_rate": 6.095223608153331e-07, "loss": 1.0, "step": 17480 }, { "epoch": 0.4211122082801474, "grad_norm": 1.4861100912094116, "learning_rate": 6.093955988236487e-07, "loss": 0.9235, "step": 17485 }, { "epoch": 0.42123262927193467, "grad_norm": 1.6223926544189453, "learning_rate": 6.092688368319643e-07, "loss": 0.9327, "step": 17490 }, { "epoch": 0.421353050263722, "grad_norm": 1.5731300115585327, "learning_rate": 6.091420748402799e-07, "loss": 0.9212, "step": 17495 }, { "epoch": 0.42147347125550927, "grad_norm": 1.519948959350586, "learning_rate": 6.090153128485954e-07, "loss": 0.8683, "step": 17500 }, { "epoch": 0.42159389224729654, "grad_norm": 1.383150339126587, "learning_rate": 6.08888550856911e-07, "loss": 0.9131, "step": 17505 }, { "epoch": 0.4217143132390838, "grad_norm": 1.4093159437179565, "learning_rate": 6.087617888652267e-07, "loss": 0.9419, "step": 17510 }, { "epoch": 0.42183473423087114, "grad_norm": 1.5490443706512451, "learning_rate": 6.086350268735422e-07, "loss": 0.9289, "step": 17515 }, { "epoch": 0.4219551552226584, "grad_norm": 1.570052981376648, "learning_rate": 6.085082648818578e-07, "loss": 0.9024, "step": 17520 }, { "epoch": 0.4220755762144457, "grad_norm": 1.583708643913269, "learning_rate": 6.083815028901734e-07, "loss": 0.8999, "step": 17525 }, { "epoch": 0.42219599720623296, "grad_norm": 1.38755464553833, "learning_rate": 6.082547408984889e-07, "loss": 0.9087, "step": 17530 }, { "epoch": 0.4223164181980203, "grad_norm": 1.5139927864074707, "learning_rate": 6.081279789068046e-07, "loss": 0.9185, "step": 17535 }, { "epoch": 0.42243683918980757, "grad_norm": 1.3745379447937012, "learning_rate": 6.080012169151202e-07, "loss": 0.9573, "step": 17540 }, { "epoch": 0.42255726018159484, "grad_norm": 1.3747448921203613, "learning_rate": 6.078744549234356e-07, "loss": 0.9007, "step": 17545 }, { "epoch": 0.42267768117338217, "grad_norm": 1.657645344734192, "learning_rate": 6.077476929317513e-07, "loss": 0.927, "step": 17550 }, { "epoch": 0.42279810216516944, "grad_norm": 1.5407798290252686, "learning_rate": 6.076209309400669e-07, "loss": 0.8979, "step": 17555 }, { "epoch": 0.4229185231569567, "grad_norm": 1.5311154127120972, "learning_rate": 6.074941689483825e-07, "loss": 0.8862, "step": 17560 }, { "epoch": 0.423038944148744, "grad_norm": 1.4764913320541382, "learning_rate": 6.073674069566981e-07, "loss": 0.8988, "step": 17565 }, { "epoch": 0.4231593651405313, "grad_norm": 1.3411235809326172, "learning_rate": 6.072406449650137e-07, "loss": 0.9283, "step": 17570 }, { "epoch": 0.4232797861323186, "grad_norm": 1.579624891281128, "learning_rate": 6.071138829733292e-07, "loss": 0.9296, "step": 17575 }, { "epoch": 0.42340020712410587, "grad_norm": 1.8971341848373413, "learning_rate": 6.069871209816448e-07, "loss": 0.9012, "step": 17580 }, { "epoch": 0.42352062811589314, "grad_norm": 1.5456421375274658, "learning_rate": 6.068603589899605e-07, "loss": 0.8915, "step": 17585 }, { "epoch": 0.42364104910768047, "grad_norm": 1.4029710292816162, "learning_rate": 6.067335969982759e-07, "loss": 0.9787, "step": 17590 }, { "epoch": 0.42376147009946774, "grad_norm": 1.7143163681030273, "learning_rate": 6.066068350065916e-07, "loss": 0.8726, "step": 17595 }, { "epoch": 0.423881891091255, "grad_norm": 1.680330514907837, "learning_rate": 6.064800730149072e-07, "loss": 0.9316, "step": 17600 }, { "epoch": 0.42400231208304234, "grad_norm": 1.5830532312393188, "learning_rate": 6.063533110232228e-07, "loss": 0.9317, "step": 17605 }, { "epoch": 0.4241227330748296, "grad_norm": 1.6646406650543213, "learning_rate": 6.062265490315384e-07, "loss": 0.9463, "step": 17610 }, { "epoch": 0.4242431540666169, "grad_norm": 1.6631529331207275, "learning_rate": 6.060997870398539e-07, "loss": 0.9289, "step": 17615 }, { "epoch": 0.42436357505840416, "grad_norm": 1.5877068042755127, "learning_rate": 6.059730250481695e-07, "loss": 0.8933, "step": 17620 }, { "epoch": 0.4244839960501915, "grad_norm": 1.5696178674697876, "learning_rate": 6.058462630564851e-07, "loss": 0.9762, "step": 17625 }, { "epoch": 0.42460441704197877, "grad_norm": 2.281834363937378, "learning_rate": 6.057195010648008e-07, "loss": 0.8999, "step": 17630 }, { "epoch": 0.42472483803376604, "grad_norm": 1.560080647468567, "learning_rate": 6.055927390731163e-07, "loss": 0.9323, "step": 17635 }, { "epoch": 0.4248452590255533, "grad_norm": 1.710875153541565, "learning_rate": 6.054659770814318e-07, "loss": 0.9317, "step": 17640 }, { "epoch": 0.42496568001734064, "grad_norm": 1.432875156402588, "learning_rate": 6.053392150897475e-07, "loss": 0.9254, "step": 17645 }, { "epoch": 0.4250861010091279, "grad_norm": 1.6327581405639648, "learning_rate": 6.05212453098063e-07, "loss": 0.8601, "step": 17650 }, { "epoch": 0.4252065220009152, "grad_norm": 1.535698413848877, "learning_rate": 6.050856911063787e-07, "loss": 0.9295, "step": 17655 }, { "epoch": 0.42532694299270246, "grad_norm": 1.5619745254516602, "learning_rate": 6.049589291146942e-07, "loss": 0.9188, "step": 17660 }, { "epoch": 0.4254473639844898, "grad_norm": 1.8153740167617798, "learning_rate": 6.048321671230097e-07, "loss": 0.9126, "step": 17665 }, { "epoch": 0.42556778497627706, "grad_norm": 1.6192477941513062, "learning_rate": 6.047054051313254e-07, "loss": 0.8888, "step": 17670 }, { "epoch": 0.42568820596806434, "grad_norm": 1.4542292356491089, "learning_rate": 6.04578643139641e-07, "loss": 0.9421, "step": 17675 }, { "epoch": 0.42580862695985167, "grad_norm": 1.72112238407135, "learning_rate": 6.044518811479566e-07, "loss": 0.8955, "step": 17680 }, { "epoch": 0.42592904795163894, "grad_norm": 1.7958778142929077, "learning_rate": 6.043251191562721e-07, "loss": 0.8612, "step": 17685 }, { "epoch": 0.4260494689434262, "grad_norm": 1.5801275968551636, "learning_rate": 6.041983571645878e-07, "loss": 0.9085, "step": 17690 }, { "epoch": 0.4261698899352135, "grad_norm": 1.7030131816864014, "learning_rate": 6.040715951729033e-07, "loss": 0.8984, "step": 17695 }, { "epoch": 0.4262903109270008, "grad_norm": 1.5439563989639282, "learning_rate": 6.039448331812189e-07, "loss": 0.882, "step": 17700 }, { "epoch": 0.4264107319187881, "grad_norm": 1.5502954721450806, "learning_rate": 6.038180711895346e-07, "loss": 0.8829, "step": 17705 }, { "epoch": 0.42653115291057536, "grad_norm": 1.400022029876709, "learning_rate": 6.0369130919785e-07, "loss": 0.9075, "step": 17710 }, { "epoch": 0.42665157390236264, "grad_norm": 1.654332160949707, "learning_rate": 6.035645472061657e-07, "loss": 0.9059, "step": 17715 }, { "epoch": 0.42677199489414996, "grad_norm": 1.4520788192749023, "learning_rate": 6.034377852144813e-07, "loss": 0.8819, "step": 17720 }, { "epoch": 0.42689241588593724, "grad_norm": 1.511816143989563, "learning_rate": 6.033110232227968e-07, "loss": 0.888, "step": 17725 }, { "epoch": 0.4270128368777245, "grad_norm": 1.648866891860962, "learning_rate": 6.031842612311124e-07, "loss": 0.9111, "step": 17730 }, { "epoch": 0.42713325786951184, "grad_norm": 1.6627436876296997, "learning_rate": 6.03057499239428e-07, "loss": 0.9159, "step": 17735 }, { "epoch": 0.4272536788612991, "grad_norm": 1.4436696767807007, "learning_rate": 6.029307372477436e-07, "loss": 0.8866, "step": 17740 }, { "epoch": 0.4273740998530864, "grad_norm": 1.6138030290603638, "learning_rate": 6.028039752560592e-07, "loss": 0.9277, "step": 17745 }, { "epoch": 0.42749452084487366, "grad_norm": 1.652036190032959, "learning_rate": 6.026772132643749e-07, "loss": 0.9126, "step": 17750 }, { "epoch": 0.427614941836661, "grad_norm": 1.5107982158660889, "learning_rate": 6.025504512726903e-07, "loss": 0.9208, "step": 17755 }, { "epoch": 0.42773536282844826, "grad_norm": 1.5715196132659912, "learning_rate": 6.024236892810059e-07, "loss": 0.9569, "step": 17760 }, { "epoch": 0.42785578382023554, "grad_norm": 1.4496690034866333, "learning_rate": 6.022969272893216e-07, "loss": 0.9336, "step": 17765 }, { "epoch": 0.4279762048120228, "grad_norm": 1.6113660335540771, "learning_rate": 6.021701652976371e-07, "loss": 0.9136, "step": 17770 }, { "epoch": 0.42809662580381014, "grad_norm": 1.7576104402542114, "learning_rate": 6.020434033059527e-07, "loss": 0.9168, "step": 17775 }, { "epoch": 0.4282170467955974, "grad_norm": 1.6658543348312378, "learning_rate": 6.019166413142683e-07, "loss": 0.913, "step": 17780 }, { "epoch": 0.4283374677873847, "grad_norm": 1.5523247718811035, "learning_rate": 6.017898793225838e-07, "loss": 0.9239, "step": 17785 }, { "epoch": 0.42845788877917196, "grad_norm": 1.594376802444458, "learning_rate": 6.016631173308995e-07, "loss": 0.9631, "step": 17790 }, { "epoch": 0.4285783097709593, "grad_norm": 1.3364665508270264, "learning_rate": 6.015363553392151e-07, "loss": 0.8864, "step": 17795 }, { "epoch": 0.42869873076274656, "grad_norm": 1.6243319511413574, "learning_rate": 6.014095933475306e-07, "loss": 0.8928, "step": 17800 }, { "epoch": 0.42881915175453383, "grad_norm": 2.047043561935425, "learning_rate": 6.012828313558462e-07, "loss": 0.9565, "step": 17805 }, { "epoch": 0.42893957274632116, "grad_norm": 1.4199920892715454, "learning_rate": 6.011560693641619e-07, "loss": 0.8812, "step": 17810 }, { "epoch": 0.42905999373810844, "grad_norm": 1.5130432844161987, "learning_rate": 6.010293073724774e-07, "loss": 0.8514, "step": 17815 }, { "epoch": 0.4291804147298957, "grad_norm": 1.6105554103851318, "learning_rate": 6.00902545380793e-07, "loss": 0.8948, "step": 17820 }, { "epoch": 0.429300835721683, "grad_norm": 1.5266790390014648, "learning_rate": 6.007757833891086e-07, "loss": 0.9541, "step": 17825 }, { "epoch": 0.4294212567134703, "grad_norm": 1.5769493579864502, "learning_rate": 6.006490213974241e-07, "loss": 0.8985, "step": 17830 }, { "epoch": 0.4295416777052576, "grad_norm": 1.99751615524292, "learning_rate": 6.005222594057398e-07, "loss": 0.9314, "step": 17835 }, { "epoch": 0.42966209869704486, "grad_norm": 1.7150274515151978, "learning_rate": 6.003954974140554e-07, "loss": 0.9215, "step": 17840 }, { "epoch": 0.42978251968883213, "grad_norm": 1.5975068807601929, "learning_rate": 6.002687354223708e-07, "loss": 0.908, "step": 17845 }, { "epoch": 0.42990294068061946, "grad_norm": 1.5267257690429688, "learning_rate": 6.001419734306865e-07, "loss": 0.8962, "step": 17850 }, { "epoch": 0.43002336167240673, "grad_norm": 1.561611533164978, "learning_rate": 6.000152114390021e-07, "loss": 0.9718, "step": 17855 }, { "epoch": 0.430143782664194, "grad_norm": 1.575334072113037, "learning_rate": 5.998884494473177e-07, "loss": 0.9145, "step": 17860 }, { "epoch": 0.43026420365598134, "grad_norm": 1.507455587387085, "learning_rate": 5.997616874556333e-07, "loss": 0.9244, "step": 17865 }, { "epoch": 0.4303846246477686, "grad_norm": 1.6253801584243774, "learning_rate": 5.996349254639489e-07, "loss": 0.8836, "step": 17870 }, { "epoch": 0.4305050456395559, "grad_norm": 1.6473288536071777, "learning_rate": 5.995081634722644e-07, "loss": 0.9189, "step": 17875 }, { "epoch": 0.43062546663134316, "grad_norm": 1.3467274904251099, "learning_rate": 5.9938140148058e-07, "loss": 0.8914, "step": 17880 }, { "epoch": 0.4307458876231305, "grad_norm": 1.6535570621490479, "learning_rate": 5.992546394888957e-07, "loss": 0.8545, "step": 17885 }, { "epoch": 0.43086630861491776, "grad_norm": 1.9367197751998901, "learning_rate": 5.991278774972111e-07, "loss": 0.9876, "step": 17890 }, { "epoch": 0.43098672960670503, "grad_norm": 1.5257887840270996, "learning_rate": 5.990011155055268e-07, "loss": 0.8616, "step": 17895 }, { "epoch": 0.4311071505984923, "grad_norm": 1.6853076219558716, "learning_rate": 5.988743535138424e-07, "loss": 0.9274, "step": 17900 }, { "epoch": 0.43122757159027963, "grad_norm": 1.6795798540115356, "learning_rate": 5.987475915221579e-07, "loss": 0.8983, "step": 17905 }, { "epoch": 0.4313479925820669, "grad_norm": 1.533904790878296, "learning_rate": 5.986208295304736e-07, "loss": 0.8607, "step": 17910 }, { "epoch": 0.4314684135738542, "grad_norm": 1.7242625951766968, "learning_rate": 5.984940675387891e-07, "loss": 0.9448, "step": 17915 }, { "epoch": 0.4315888345656415, "grad_norm": 1.5450302362442017, "learning_rate": 5.983673055471047e-07, "loss": 0.9315, "step": 17920 }, { "epoch": 0.4317092555574288, "grad_norm": 1.547723650932312, "learning_rate": 5.982405435554203e-07, "loss": 0.9453, "step": 17925 }, { "epoch": 0.43182967654921606, "grad_norm": 1.5352214574813843, "learning_rate": 5.98113781563736e-07, "loss": 0.9061, "step": 17930 }, { "epoch": 0.43195009754100333, "grad_norm": 1.7504684925079346, "learning_rate": 5.979870195720515e-07, "loss": 0.903, "step": 17935 }, { "epoch": 0.43207051853279066, "grad_norm": 1.4188649654388428, "learning_rate": 5.97860257580367e-07, "loss": 0.91, "step": 17940 }, { "epoch": 0.43219093952457793, "grad_norm": 1.8634692430496216, "learning_rate": 5.977334955886827e-07, "loss": 0.9519, "step": 17945 }, { "epoch": 0.4323113605163652, "grad_norm": 1.4302127361297607, "learning_rate": 5.976067335969982e-07, "loss": 0.9287, "step": 17950 }, { "epoch": 0.4324317815081525, "grad_norm": 1.4653289318084717, "learning_rate": 5.974799716053139e-07, "loss": 0.9013, "step": 17955 }, { "epoch": 0.4325522024999398, "grad_norm": 1.4174020290374756, "learning_rate": 5.973532096136294e-07, "loss": 0.9004, "step": 17960 }, { "epoch": 0.4326726234917271, "grad_norm": 1.780843734741211, "learning_rate": 5.972264476219449e-07, "loss": 0.884, "step": 17965 }, { "epoch": 0.43279304448351436, "grad_norm": 1.7866368293762207, "learning_rate": 5.970996856302606e-07, "loss": 0.8893, "step": 17970 }, { "epoch": 0.43291346547530163, "grad_norm": 1.6399894952774048, "learning_rate": 5.969729236385762e-07, "loss": 0.8958, "step": 17975 }, { "epoch": 0.43303388646708896, "grad_norm": 1.3787689208984375, "learning_rate": 5.968461616468918e-07, "loss": 0.9251, "step": 17980 }, { "epoch": 0.43315430745887623, "grad_norm": 1.4486820697784424, "learning_rate": 5.967193996552073e-07, "loss": 0.9001, "step": 17985 }, { "epoch": 0.4332747284506635, "grad_norm": 1.5622378587722778, "learning_rate": 5.96592637663523e-07, "loss": 0.9648, "step": 17990 }, { "epoch": 0.43339514944245083, "grad_norm": 1.6893956661224365, "learning_rate": 5.964658756718385e-07, "loss": 0.9027, "step": 17995 }, { "epoch": 0.4335155704342381, "grad_norm": 1.5421922206878662, "learning_rate": 5.963391136801541e-07, "loss": 0.9626, "step": 18000 }, { "epoch": 0.4336359914260254, "grad_norm": 1.4266217947006226, "learning_rate": 5.962123516884698e-07, "loss": 0.8913, "step": 18005 }, { "epoch": 0.43375641241781265, "grad_norm": 1.4913381338119507, "learning_rate": 5.960855896967852e-07, "loss": 0.9139, "step": 18010 }, { "epoch": 0.4338768334096, "grad_norm": 1.609383225440979, "learning_rate": 5.959588277051009e-07, "loss": 0.8989, "step": 18015 }, { "epoch": 0.43399725440138726, "grad_norm": 1.436481237411499, "learning_rate": 5.958320657134165e-07, "loss": 0.9932, "step": 18020 }, { "epoch": 0.43411767539317453, "grad_norm": 1.5978612899780273, "learning_rate": 5.95705303721732e-07, "loss": 0.8818, "step": 18025 }, { "epoch": 0.4342380963849618, "grad_norm": 1.6705412864685059, "learning_rate": 5.955785417300476e-07, "loss": 0.8584, "step": 18030 }, { "epoch": 0.43435851737674913, "grad_norm": 1.4328734874725342, "learning_rate": 5.954517797383632e-07, "loss": 0.9302, "step": 18035 }, { "epoch": 0.4344789383685364, "grad_norm": 1.2172200679779053, "learning_rate": 5.953250177466788e-07, "loss": 0.9303, "step": 18040 }, { "epoch": 0.4345993593603237, "grad_norm": 1.629767656326294, "learning_rate": 5.951982557549944e-07, "loss": 0.8776, "step": 18045 }, { "epoch": 0.434719780352111, "grad_norm": 1.515710711479187, "learning_rate": 5.9507149376331e-07, "loss": 0.8947, "step": 18050 }, { "epoch": 0.4348402013438983, "grad_norm": 1.75547456741333, "learning_rate": 5.949447317716255e-07, "loss": 0.9042, "step": 18055 }, { "epoch": 0.43496062233568555, "grad_norm": 1.578229308128357, "learning_rate": 5.948179697799411e-07, "loss": 0.9092, "step": 18060 }, { "epoch": 0.4350810433274728, "grad_norm": 1.6973870992660522, "learning_rate": 5.946912077882568e-07, "loss": 0.9316, "step": 18065 }, { "epoch": 0.43520146431926016, "grad_norm": 1.6119117736816406, "learning_rate": 5.945644457965723e-07, "loss": 0.8674, "step": 18070 }, { "epoch": 0.43532188531104743, "grad_norm": 1.3468626737594604, "learning_rate": 5.944376838048879e-07, "loss": 0.8576, "step": 18075 }, { "epoch": 0.4354423063028347, "grad_norm": 1.451310396194458, "learning_rate": 5.943109218132035e-07, "loss": 0.9071, "step": 18080 }, { "epoch": 0.435562727294622, "grad_norm": 1.6123762130737305, "learning_rate": 5.94184159821519e-07, "loss": 0.9343, "step": 18085 }, { "epoch": 0.4356831482864093, "grad_norm": 1.454443335533142, "learning_rate": 5.940573978298347e-07, "loss": 0.9151, "step": 18090 }, { "epoch": 0.4358035692781966, "grad_norm": 1.4933301210403442, "learning_rate": 5.939306358381503e-07, "loss": 0.9507, "step": 18095 }, { "epoch": 0.43592399026998385, "grad_norm": 1.517813801765442, "learning_rate": 5.938038738464658e-07, "loss": 0.9592, "step": 18100 }, { "epoch": 0.4360444112617711, "grad_norm": 1.5954922437667847, "learning_rate": 5.936771118547814e-07, "loss": 0.9175, "step": 18105 }, { "epoch": 0.43616483225355845, "grad_norm": 1.4719862937927246, "learning_rate": 5.93550349863097e-07, "loss": 0.9304, "step": 18110 }, { "epoch": 0.4362852532453457, "grad_norm": 1.4959850311279297, "learning_rate": 5.934235878714126e-07, "loss": 0.9394, "step": 18115 }, { "epoch": 0.436405674237133, "grad_norm": 1.2718183994293213, "learning_rate": 5.932968258797282e-07, "loss": 0.9027, "step": 18120 }, { "epoch": 0.43652609522892033, "grad_norm": 1.5270932912826538, "learning_rate": 5.931700638880438e-07, "loss": 0.9114, "step": 18125 }, { "epoch": 0.4366465162207076, "grad_norm": 1.6102068424224854, "learning_rate": 5.930433018963593e-07, "loss": 0.9323, "step": 18130 }, { "epoch": 0.4367669372124949, "grad_norm": 1.6144814491271973, "learning_rate": 5.92916539904675e-07, "loss": 0.9071, "step": 18135 }, { "epoch": 0.43688735820428215, "grad_norm": 1.586299180984497, "learning_rate": 5.927897779129906e-07, "loss": 0.9377, "step": 18140 }, { "epoch": 0.4370077791960695, "grad_norm": 1.690378189086914, "learning_rate": 5.92663015921306e-07, "loss": 0.8917, "step": 18145 }, { "epoch": 0.43712820018785675, "grad_norm": 1.6998260021209717, "learning_rate": 5.925362539296217e-07, "loss": 0.9332, "step": 18150 }, { "epoch": 0.437248621179644, "grad_norm": 1.5584924221038818, "learning_rate": 5.924094919379373e-07, "loss": 0.9188, "step": 18155 }, { "epoch": 0.4373690421714313, "grad_norm": 1.5578699111938477, "learning_rate": 5.922827299462529e-07, "loss": 0.9049, "step": 18160 }, { "epoch": 0.43748946316321863, "grad_norm": 1.6602588891983032, "learning_rate": 5.921559679545685e-07, "loss": 0.9397, "step": 18165 }, { "epoch": 0.4376098841550059, "grad_norm": 1.7947601079940796, "learning_rate": 5.92029205962884e-07, "loss": 0.9206, "step": 18170 }, { "epoch": 0.4377303051467932, "grad_norm": 1.5118459463119507, "learning_rate": 5.919024439711996e-07, "loss": 0.9207, "step": 18175 }, { "epoch": 0.4378507261385805, "grad_norm": 1.6038711071014404, "learning_rate": 5.917756819795152e-07, "loss": 0.8964, "step": 18180 }, { "epoch": 0.4379711471303678, "grad_norm": 1.8039369583129883, "learning_rate": 5.916489199878309e-07, "loss": 0.9169, "step": 18185 }, { "epoch": 0.43809156812215505, "grad_norm": 1.5942789316177368, "learning_rate": 5.915221579961463e-07, "loss": 0.9819, "step": 18190 }, { "epoch": 0.4382119891139423, "grad_norm": 1.330572485923767, "learning_rate": 5.91395396004462e-07, "loss": 0.9146, "step": 18195 }, { "epoch": 0.43833241010572965, "grad_norm": 1.6067678928375244, "learning_rate": 5.912686340127776e-07, "loss": 0.8957, "step": 18200 }, { "epoch": 0.4384528310975169, "grad_norm": 1.4917579889297485, "learning_rate": 5.911418720210932e-07, "loss": 0.9636, "step": 18205 }, { "epoch": 0.4385732520893042, "grad_norm": 1.3807231187820435, "learning_rate": 5.910151100294088e-07, "loss": 0.8647, "step": 18210 }, { "epoch": 0.4386936730810915, "grad_norm": 1.4772977828979492, "learning_rate": 5.908883480377243e-07, "loss": 0.9232, "step": 18215 }, { "epoch": 0.4388140940728788, "grad_norm": 1.711417317390442, "learning_rate": 5.9076158604604e-07, "loss": 0.9004, "step": 18220 }, { "epoch": 0.4389345150646661, "grad_norm": 1.4783941507339478, "learning_rate": 5.906348240543555e-07, "loss": 0.9263, "step": 18225 }, { "epoch": 0.43905493605645335, "grad_norm": 1.916579008102417, "learning_rate": 5.905080620626711e-07, "loss": 0.8874, "step": 18230 }, { "epoch": 0.4391753570482406, "grad_norm": 1.3718454837799072, "learning_rate": 5.903813000709868e-07, "loss": 0.9168, "step": 18235 }, { "epoch": 0.43929577804002795, "grad_norm": 1.5684967041015625, "learning_rate": 5.902545380793022e-07, "loss": 0.8782, "step": 18240 }, { "epoch": 0.4394161990318152, "grad_norm": 1.6263731718063354, "learning_rate": 5.901277760876179e-07, "loss": 0.9195, "step": 18245 }, { "epoch": 0.4395366200236025, "grad_norm": 1.4979406595230103, "learning_rate": 5.900010140959335e-07, "loss": 0.9013, "step": 18250 }, { "epoch": 0.4396570410153898, "grad_norm": 1.8347595930099487, "learning_rate": 5.89874252104249e-07, "loss": 0.9248, "step": 18255 }, { "epoch": 0.4397774620071771, "grad_norm": 1.6108922958374023, "learning_rate": 5.897474901125646e-07, "loss": 0.8857, "step": 18260 }, { "epoch": 0.4398978829989644, "grad_norm": 1.4353135824203491, "learning_rate": 5.896207281208802e-07, "loss": 0.9357, "step": 18265 }, { "epoch": 0.44001830399075165, "grad_norm": 1.3888901472091675, "learning_rate": 5.894939661291958e-07, "loss": 0.9444, "step": 18270 }, { "epoch": 0.440138724982539, "grad_norm": 1.522700548171997, "learning_rate": 5.893672041375114e-07, "loss": 0.8662, "step": 18275 }, { "epoch": 0.44025914597432625, "grad_norm": 1.5019195079803467, "learning_rate": 5.892404421458271e-07, "loss": 0.955, "step": 18280 }, { "epoch": 0.4403795669661135, "grad_norm": 1.7402147054672241, "learning_rate": 5.891136801541425e-07, "loss": 0.9712, "step": 18285 }, { "epoch": 0.4404999879579008, "grad_norm": 1.4262351989746094, "learning_rate": 5.889869181624581e-07, "loss": 0.9333, "step": 18290 }, { "epoch": 0.4406204089496881, "grad_norm": 1.7041842937469482, "learning_rate": 5.888601561707738e-07, "loss": 0.8435, "step": 18295 }, { "epoch": 0.4407408299414754, "grad_norm": 1.5107301473617554, "learning_rate": 5.887333941790893e-07, "loss": 0.9032, "step": 18300 }, { "epoch": 0.44086125093326267, "grad_norm": 1.543336033821106, "learning_rate": 5.88606632187405e-07, "loss": 0.9219, "step": 18305 }, { "epoch": 0.44098167192505, "grad_norm": 1.5038975477218628, "learning_rate": 5.884798701957205e-07, "loss": 0.8895, "step": 18310 }, { "epoch": 0.4411020929168373, "grad_norm": 1.7226147651672363, "learning_rate": 5.88353108204036e-07, "loss": 0.917, "step": 18315 }, { "epoch": 0.44122251390862455, "grad_norm": 1.7164808511734009, "learning_rate": 5.882263462123517e-07, "loss": 0.9058, "step": 18320 }, { "epoch": 0.4413429349004118, "grad_norm": 1.4979898929595947, "learning_rate": 5.880995842206673e-07, "loss": 0.9308, "step": 18325 }, { "epoch": 0.44146335589219915, "grad_norm": 1.5202665328979492, "learning_rate": 5.879728222289828e-07, "loss": 0.9014, "step": 18330 }, { "epoch": 0.4415837768839864, "grad_norm": 1.4686599969863892, "learning_rate": 5.878460602372984e-07, "loss": 0.9461, "step": 18335 }, { "epoch": 0.4417041978757737, "grad_norm": 1.7539727687835693, "learning_rate": 5.877192982456141e-07, "loss": 0.9511, "step": 18340 }, { "epoch": 0.44182461886756097, "grad_norm": 1.5274763107299805, "learning_rate": 5.875925362539296e-07, "loss": 0.9521, "step": 18345 }, { "epoch": 0.4419450398593483, "grad_norm": 1.4581748247146606, "learning_rate": 5.874657742622452e-07, "loss": 0.9155, "step": 18350 }, { "epoch": 0.44206546085113557, "grad_norm": 1.493125319480896, "learning_rate": 5.873390122705608e-07, "loss": 0.8899, "step": 18355 }, { "epoch": 0.44218588184292285, "grad_norm": 1.47279953956604, "learning_rate": 5.872122502788763e-07, "loss": 0.8576, "step": 18360 }, { "epoch": 0.4423063028347102, "grad_norm": 1.8338985443115234, "learning_rate": 5.87085488287192e-07, "loss": 0.9497, "step": 18365 }, { "epoch": 0.44242672382649745, "grad_norm": 1.5390070676803589, "learning_rate": 5.869587262955076e-07, "loss": 0.9115, "step": 18370 }, { "epoch": 0.4425471448182847, "grad_norm": 1.5464487075805664, "learning_rate": 5.86831964303823e-07, "loss": 0.9, "step": 18375 }, { "epoch": 0.442667565810072, "grad_norm": 1.4387696981430054, "learning_rate": 5.867052023121387e-07, "loss": 0.9589, "step": 18380 }, { "epoch": 0.4427879868018593, "grad_norm": 1.3581068515777588, "learning_rate": 5.865784403204543e-07, "loss": 0.8743, "step": 18385 }, { "epoch": 0.4429084077936466, "grad_norm": 1.4792227745056152, "learning_rate": 5.864516783287699e-07, "loss": 0.9316, "step": 18390 }, { "epoch": 0.44302882878543387, "grad_norm": 1.7097699642181396, "learning_rate": 5.863249163370855e-07, "loss": 0.8889, "step": 18395 }, { "epoch": 0.44314924977722114, "grad_norm": 1.5005133152008057, "learning_rate": 5.86198154345401e-07, "loss": 0.9554, "step": 18400 }, { "epoch": 0.4432696707690085, "grad_norm": 1.667360782623291, "learning_rate": 5.860713923537166e-07, "loss": 0.8946, "step": 18405 }, { "epoch": 0.44339009176079575, "grad_norm": 1.39607572555542, "learning_rate": 5.859446303620322e-07, "loss": 0.9718, "step": 18410 }, { "epoch": 0.443510512752583, "grad_norm": 1.274971842765808, "learning_rate": 5.858178683703479e-07, "loss": 0.9119, "step": 18415 }, { "epoch": 0.4436309337443703, "grad_norm": 1.655137538909912, "learning_rate": 5.856911063786634e-07, "loss": 0.8827, "step": 18420 }, { "epoch": 0.4437513547361576, "grad_norm": 1.6791355609893799, "learning_rate": 5.85564344386979e-07, "loss": 0.9148, "step": 18425 }, { "epoch": 0.4438717757279449, "grad_norm": 1.470255732536316, "learning_rate": 5.854375823952946e-07, "loss": 0.9326, "step": 18430 }, { "epoch": 0.44399219671973217, "grad_norm": 1.4369548559188843, "learning_rate": 5.853108204036101e-07, "loss": 0.9058, "step": 18435 }, { "epoch": 0.4441126177115195, "grad_norm": 1.660200834274292, "learning_rate": 5.851840584119258e-07, "loss": 0.9008, "step": 18440 }, { "epoch": 0.44423303870330677, "grad_norm": 1.6829956769943237, "learning_rate": 5.850572964202413e-07, "loss": 0.9556, "step": 18445 }, { "epoch": 0.44435345969509404, "grad_norm": 1.4490734338760376, "learning_rate": 5.849305344285569e-07, "loss": 0.9025, "step": 18450 }, { "epoch": 0.4444738806868813, "grad_norm": 1.6446168422698975, "learning_rate": 5.848037724368725e-07, "loss": 0.9281, "step": 18455 }, { "epoch": 0.44459430167866865, "grad_norm": 1.7542520761489868, "learning_rate": 5.846770104451882e-07, "loss": 0.9345, "step": 18460 }, { "epoch": 0.4447147226704559, "grad_norm": 1.385473608970642, "learning_rate": 5.845502484535037e-07, "loss": 0.86, "step": 18465 }, { "epoch": 0.4448351436622432, "grad_norm": 1.7109313011169434, "learning_rate": 5.844234864618192e-07, "loss": 0.8704, "step": 18470 }, { "epoch": 0.44495556465403047, "grad_norm": 1.594736099243164, "learning_rate": 5.842967244701349e-07, "loss": 0.9048, "step": 18475 }, { "epoch": 0.4450759856458178, "grad_norm": 1.4663512706756592, "learning_rate": 5.841699624784504e-07, "loss": 0.9278, "step": 18480 }, { "epoch": 0.44519640663760507, "grad_norm": 1.726747751235962, "learning_rate": 5.840432004867661e-07, "loss": 0.8576, "step": 18485 }, { "epoch": 0.44531682762939234, "grad_norm": 1.5041638612747192, "learning_rate": 5.839164384950817e-07, "loss": 0.9079, "step": 18490 }, { "epoch": 0.44543724862117967, "grad_norm": 1.6056970357894897, "learning_rate": 5.837896765033971e-07, "loss": 0.9491, "step": 18495 }, { "epoch": 0.44555766961296694, "grad_norm": 1.565638780593872, "learning_rate": 5.836629145117128e-07, "loss": 0.9341, "step": 18500 }, { "epoch": 0.4456780906047542, "grad_norm": 1.3103954792022705, "learning_rate": 5.835361525200284e-07, "loss": 0.9042, "step": 18505 }, { "epoch": 0.4457985115965415, "grad_norm": 1.373991847038269, "learning_rate": 5.83409390528344e-07, "loss": 0.8608, "step": 18510 }, { "epoch": 0.4459189325883288, "grad_norm": 1.3458672761917114, "learning_rate": 5.832826285366595e-07, "loss": 0.9019, "step": 18515 }, { "epoch": 0.4460393535801161, "grad_norm": 1.499133586883545, "learning_rate": 5.831558665449751e-07, "loss": 0.902, "step": 18520 }, { "epoch": 0.44615977457190337, "grad_norm": 1.6567200422286987, "learning_rate": 5.830291045532907e-07, "loss": 0.9113, "step": 18525 }, { "epoch": 0.44628019556369064, "grad_norm": 1.7166463136672974, "learning_rate": 5.829023425616063e-07, "loss": 0.888, "step": 18530 }, { "epoch": 0.44640061655547797, "grad_norm": 1.7077884674072266, "learning_rate": 5.82775580569922e-07, "loss": 0.9058, "step": 18535 }, { "epoch": 0.44652103754726524, "grad_norm": 1.6900447607040405, "learning_rate": 5.826488185782374e-07, "loss": 0.8517, "step": 18540 }, { "epoch": 0.4466414585390525, "grad_norm": 1.6718276739120483, "learning_rate": 5.825220565865531e-07, "loss": 0.8974, "step": 18545 }, { "epoch": 0.4467618795308398, "grad_norm": 1.6190602779388428, "learning_rate": 5.823952945948687e-07, "loss": 0.9342, "step": 18550 }, { "epoch": 0.4468823005226271, "grad_norm": 1.8204940557479858, "learning_rate": 5.822685326031842e-07, "loss": 0.8827, "step": 18555 }, { "epoch": 0.4470027215144144, "grad_norm": 1.5450371503829956, "learning_rate": 5.821417706114998e-07, "loss": 0.9504, "step": 18560 }, { "epoch": 0.44712314250620167, "grad_norm": 1.7189927101135254, "learning_rate": 5.820150086198154e-07, "loss": 0.921, "step": 18565 }, { "epoch": 0.447243563497989, "grad_norm": 1.7274240255355835, "learning_rate": 5.81888246628131e-07, "loss": 0.9178, "step": 18570 }, { "epoch": 0.44736398448977627, "grad_norm": 1.5280604362487793, "learning_rate": 5.817614846364466e-07, "loss": 0.9305, "step": 18575 }, { "epoch": 0.44748440548156354, "grad_norm": 1.4250680208206177, "learning_rate": 5.816347226447623e-07, "loss": 0.8913, "step": 18580 }, { "epoch": 0.4476048264733508, "grad_norm": 1.6027624607086182, "learning_rate": 5.815079606530777e-07, "loss": 0.9009, "step": 18585 }, { "epoch": 0.44772524746513814, "grad_norm": 1.576629877090454, "learning_rate": 5.813811986613933e-07, "loss": 0.9222, "step": 18590 }, { "epoch": 0.4478456684569254, "grad_norm": 1.5910590887069702, "learning_rate": 5.81254436669709e-07, "loss": 0.9767, "step": 18595 }, { "epoch": 0.4479660894487127, "grad_norm": 1.580986499786377, "learning_rate": 5.811276746780245e-07, "loss": 0.8606, "step": 18600 }, { "epoch": 0.44808651044049996, "grad_norm": 1.5624583959579468, "learning_rate": 5.810009126863402e-07, "loss": 0.8826, "step": 18605 }, { "epoch": 0.4482069314322873, "grad_norm": 1.5432230234146118, "learning_rate": 5.808741506946557e-07, "loss": 0.9025, "step": 18610 }, { "epoch": 0.44832735242407457, "grad_norm": 1.5583560466766357, "learning_rate": 5.807473887029712e-07, "loss": 0.8883, "step": 18615 }, { "epoch": 0.44844777341586184, "grad_norm": 1.5831446647644043, "learning_rate": 5.806206267112869e-07, "loss": 0.9434, "step": 18620 }, { "epoch": 0.44856819440764917, "grad_norm": 1.3928394317626953, "learning_rate": 5.804938647196025e-07, "loss": 0.8973, "step": 18625 }, { "epoch": 0.44868861539943644, "grad_norm": 1.4622230529785156, "learning_rate": 5.80367102727918e-07, "loss": 0.8864, "step": 18630 }, { "epoch": 0.4488090363912237, "grad_norm": 1.3747080564498901, "learning_rate": 5.802403407362336e-07, "loss": 0.8946, "step": 18635 }, { "epoch": 0.448929457383011, "grad_norm": 1.7349574565887451, "learning_rate": 5.801135787445492e-07, "loss": 0.953, "step": 18640 }, { "epoch": 0.4490498783747983, "grad_norm": 1.4037730693817139, "learning_rate": 5.799868167528648e-07, "loss": 0.8879, "step": 18645 }, { "epoch": 0.4491702993665856, "grad_norm": 1.5784122943878174, "learning_rate": 5.798600547611804e-07, "loss": 0.9634, "step": 18650 }, { "epoch": 0.44929072035837286, "grad_norm": 1.639102816581726, "learning_rate": 5.79733292769496e-07, "loss": 0.9171, "step": 18655 }, { "epoch": 0.44941114135016014, "grad_norm": 1.5633991956710815, "learning_rate": 5.796065307778115e-07, "loss": 0.9123, "step": 18660 }, { "epoch": 0.44953156234194747, "grad_norm": 1.5898741483688354, "learning_rate": 5.794797687861272e-07, "loss": 0.873, "step": 18665 }, { "epoch": 0.44965198333373474, "grad_norm": 1.4367605447769165, "learning_rate": 5.793530067944428e-07, "loss": 0.846, "step": 18670 }, { "epoch": 0.449772404325522, "grad_norm": 1.4805914163589478, "learning_rate": 5.792262448027582e-07, "loss": 0.9828, "step": 18675 }, { "epoch": 0.4498928253173093, "grad_norm": 1.7379878759384155, "learning_rate": 5.790994828110739e-07, "loss": 0.9296, "step": 18680 }, { "epoch": 0.4500132463090966, "grad_norm": 1.6005162000656128, "learning_rate": 5.789727208193895e-07, "loss": 0.953, "step": 18685 }, { "epoch": 0.4501336673008839, "grad_norm": 1.7036206722259521, "learning_rate": 5.788459588277051e-07, "loss": 0.8372, "step": 18690 }, { "epoch": 0.45025408829267116, "grad_norm": 1.5740010738372803, "learning_rate": 5.787191968360207e-07, "loss": 0.9642, "step": 18695 }, { "epoch": 0.4503745092844585, "grad_norm": 1.5430411100387573, "learning_rate": 5.785924348443362e-07, "loss": 0.9323, "step": 18700 }, { "epoch": 0.45049493027624576, "grad_norm": 1.446671724319458, "learning_rate": 5.784656728526518e-07, "loss": 0.9183, "step": 18705 }, { "epoch": 0.45061535126803304, "grad_norm": 1.4905126094818115, "learning_rate": 5.783389108609674e-07, "loss": 0.897, "step": 18710 }, { "epoch": 0.4507357722598203, "grad_norm": 1.5193527936935425, "learning_rate": 5.782121488692831e-07, "loss": 0.9159, "step": 18715 }, { "epoch": 0.45085619325160764, "grad_norm": 1.6405127048492432, "learning_rate": 5.780853868775986e-07, "loss": 0.8922, "step": 18720 }, { "epoch": 0.4509766142433949, "grad_norm": 1.4062474966049194, "learning_rate": 5.779586248859141e-07, "loss": 0.9168, "step": 18725 }, { "epoch": 0.4510970352351822, "grad_norm": 1.3942046165466309, "learning_rate": 5.778318628942298e-07, "loss": 0.843, "step": 18730 }, { "epoch": 0.45121745622696946, "grad_norm": 1.5190876722335815, "learning_rate": 5.777051009025453e-07, "loss": 0.895, "step": 18735 }, { "epoch": 0.4513378772187568, "grad_norm": 1.42783784866333, "learning_rate": 5.77578338910861e-07, "loss": 0.8943, "step": 18740 }, { "epoch": 0.45145829821054406, "grad_norm": 1.7751065492630005, "learning_rate": 5.774515769191765e-07, "loss": 0.8743, "step": 18745 }, { "epoch": 0.45157871920233134, "grad_norm": 1.5588613748550415, "learning_rate": 5.773248149274921e-07, "loss": 0.9128, "step": 18750 }, { "epoch": 0.45169914019411866, "grad_norm": 1.5651437044143677, "learning_rate": 5.771980529358077e-07, "loss": 0.8912, "step": 18755 }, { "epoch": 0.45181956118590594, "grad_norm": 1.7240246534347534, "learning_rate": 5.770712909441233e-07, "loss": 0.9365, "step": 18760 }, { "epoch": 0.4519399821776932, "grad_norm": 1.4902751445770264, "learning_rate": 5.769445289524389e-07, "loss": 0.8875, "step": 18765 }, { "epoch": 0.4520604031694805, "grad_norm": 1.8112143278121948, "learning_rate": 5.768177669607544e-07, "loss": 0.9054, "step": 18770 }, { "epoch": 0.4521808241612678, "grad_norm": 2.001507520675659, "learning_rate": 5.766910049690701e-07, "loss": 0.9174, "step": 18775 }, { "epoch": 0.4523012451530551, "grad_norm": 1.568212628364563, "learning_rate": 5.765642429773856e-07, "loss": 0.9049, "step": 18780 }, { "epoch": 0.45242166614484236, "grad_norm": 1.4313302040100098, "learning_rate": 5.764374809857012e-07, "loss": 0.9175, "step": 18785 }, { "epoch": 0.45254208713662963, "grad_norm": 1.4737589359283447, "learning_rate": 5.763107189940169e-07, "loss": 0.8932, "step": 18790 }, { "epoch": 0.45266250812841696, "grad_norm": 1.3767342567443848, "learning_rate": 5.761839570023323e-07, "loss": 0.9753, "step": 18795 }, { "epoch": 0.45278292912020424, "grad_norm": 1.583419919013977, "learning_rate": 5.76057195010648e-07, "loss": 0.9356, "step": 18800 }, { "epoch": 0.4529033501119915, "grad_norm": 1.5111521482467651, "learning_rate": 5.759304330189636e-07, "loss": 0.9451, "step": 18805 }, { "epoch": 0.45302377110377884, "grad_norm": 1.6245722770690918, "learning_rate": 5.758036710272792e-07, "loss": 0.9008, "step": 18810 }, { "epoch": 0.4531441920955661, "grad_norm": 1.539433240890503, "learning_rate": 5.756769090355947e-07, "loss": 0.9434, "step": 18815 }, { "epoch": 0.4532646130873534, "grad_norm": 1.4418680667877197, "learning_rate": 5.755501470439103e-07, "loss": 0.9049, "step": 18820 }, { "epoch": 0.45338503407914066, "grad_norm": 1.708088994026184, "learning_rate": 5.754233850522259e-07, "loss": 0.9196, "step": 18825 }, { "epoch": 0.453505455070928, "grad_norm": 1.5235261917114258, "learning_rate": 5.752966230605415e-07, "loss": 0.9526, "step": 18830 }, { "epoch": 0.45362587606271526, "grad_norm": 1.585042119026184, "learning_rate": 5.751698610688572e-07, "loss": 0.8745, "step": 18835 }, { "epoch": 0.45374629705450253, "grad_norm": 1.689333200454712, "learning_rate": 5.750430990771726e-07, "loss": 0.9088, "step": 18840 }, { "epoch": 0.4538667180462898, "grad_norm": 1.4670780897140503, "learning_rate": 5.749163370854882e-07, "loss": 0.9259, "step": 18845 }, { "epoch": 0.45398713903807714, "grad_norm": 1.5117682218551636, "learning_rate": 5.747895750938039e-07, "loss": 0.8814, "step": 18850 }, { "epoch": 0.4541075600298644, "grad_norm": 1.6144859790802002, "learning_rate": 5.746628131021194e-07, "loss": 0.9731, "step": 18855 }, { "epoch": 0.4542279810216517, "grad_norm": 1.5023188591003418, "learning_rate": 5.74536051110435e-07, "loss": 0.8978, "step": 18860 }, { "epoch": 0.45434840201343896, "grad_norm": 1.6316559314727783, "learning_rate": 5.744092891187506e-07, "loss": 0.9299, "step": 18865 }, { "epoch": 0.4544688230052263, "grad_norm": 1.8118706941604614, "learning_rate": 5.742825271270662e-07, "loss": 0.8923, "step": 18870 }, { "epoch": 0.45458924399701356, "grad_norm": 1.6119071245193481, "learning_rate": 5.741557651353818e-07, "loss": 0.8898, "step": 18875 }, { "epoch": 0.45470966498880083, "grad_norm": 1.597812294960022, "learning_rate": 5.740290031436974e-07, "loss": 0.9185, "step": 18880 }, { "epoch": 0.45483008598058816, "grad_norm": 1.587369680404663, "learning_rate": 5.739022411520129e-07, "loss": 0.9109, "step": 18885 }, { "epoch": 0.45495050697237543, "grad_norm": 1.6539413928985596, "learning_rate": 5.737754791603285e-07, "loss": 0.8844, "step": 18890 }, { "epoch": 0.4550709279641627, "grad_norm": 1.4326918125152588, "learning_rate": 5.736487171686442e-07, "loss": 0.9227, "step": 18895 }, { "epoch": 0.45519134895595, "grad_norm": 1.6746175289154053, "learning_rate": 5.735219551769597e-07, "loss": 0.9478, "step": 18900 }, { "epoch": 0.4553117699477373, "grad_norm": 1.4657456874847412, "learning_rate": 5.733951931852753e-07, "loss": 0.9131, "step": 18905 }, { "epoch": 0.4554321909395246, "grad_norm": 1.613221287727356, "learning_rate": 5.732684311935909e-07, "loss": 0.9123, "step": 18910 }, { "epoch": 0.45555261193131186, "grad_norm": 1.4709984064102173, "learning_rate": 5.731416692019064e-07, "loss": 0.856, "step": 18915 }, { "epoch": 0.45567303292309913, "grad_norm": 1.6130681037902832, "learning_rate": 5.730149072102221e-07, "loss": 0.9035, "step": 18920 }, { "epoch": 0.45579345391488646, "grad_norm": 1.5863080024719238, "learning_rate": 5.728881452185377e-07, "loss": 0.9249, "step": 18925 }, { "epoch": 0.45591387490667373, "grad_norm": 1.593340516090393, "learning_rate": 5.727613832268531e-07, "loss": 0.9131, "step": 18930 }, { "epoch": 0.456034295898461, "grad_norm": 1.613885521888733, "learning_rate": 5.726346212351688e-07, "loss": 0.914, "step": 18935 }, { "epoch": 0.45615471689024834, "grad_norm": 1.6788082122802734, "learning_rate": 5.725078592434844e-07, "loss": 0.8911, "step": 18940 }, { "epoch": 0.4562751378820356, "grad_norm": 1.7046750783920288, "learning_rate": 5.723810972518e-07, "loss": 0.9332, "step": 18945 }, { "epoch": 0.4563955588738229, "grad_norm": 1.4113725423812866, "learning_rate": 5.722543352601156e-07, "loss": 0.9386, "step": 18950 }, { "epoch": 0.45651597986561016, "grad_norm": 1.8018156290054321, "learning_rate": 5.721275732684312e-07, "loss": 0.9314, "step": 18955 }, { "epoch": 0.4566364008573975, "grad_norm": 1.3819090127944946, "learning_rate": 5.720008112767467e-07, "loss": 0.9241, "step": 18960 }, { "epoch": 0.45675682184918476, "grad_norm": 1.5696793794631958, "learning_rate": 5.718740492850623e-07, "loss": 0.9194, "step": 18965 }, { "epoch": 0.45687724284097203, "grad_norm": 1.608102560043335, "learning_rate": 5.71747287293378e-07, "loss": 0.9204, "step": 18970 }, { "epoch": 0.4569976638327593, "grad_norm": 1.5484436750411987, "learning_rate": 5.716205253016934e-07, "loss": 0.9414, "step": 18975 }, { "epoch": 0.45711808482454663, "grad_norm": 1.551183819770813, "learning_rate": 5.714937633100091e-07, "loss": 0.9455, "step": 18980 }, { "epoch": 0.4572385058163339, "grad_norm": 1.6090680360794067, "learning_rate": 5.713670013183247e-07, "loss": 0.9278, "step": 18985 }, { "epoch": 0.4573589268081212, "grad_norm": 1.5775976181030273, "learning_rate": 5.712402393266402e-07, "loss": 0.9173, "step": 18990 }, { "epoch": 0.45747934779990845, "grad_norm": 1.6444989442825317, "learning_rate": 5.711134773349559e-07, "loss": 0.9548, "step": 18995 }, { "epoch": 0.4575997687916958, "grad_norm": 1.5032627582550049, "learning_rate": 5.709867153432714e-07, "loss": 0.9373, "step": 19000 }, { "epoch": 0.45772018978348306, "grad_norm": 1.5852969884872437, "learning_rate": 5.70859953351587e-07, "loss": 0.926, "step": 19005 }, { "epoch": 0.45784061077527033, "grad_norm": 1.5551520586013794, "learning_rate": 5.707331913599026e-07, "loss": 0.9585, "step": 19010 }, { "epoch": 0.45796103176705766, "grad_norm": 1.562559962272644, "learning_rate": 5.706064293682183e-07, "loss": 0.9485, "step": 19015 }, { "epoch": 0.45808145275884493, "grad_norm": 1.5225250720977783, "learning_rate": 5.704796673765337e-07, "loss": 0.8889, "step": 19020 }, { "epoch": 0.4582018737506322, "grad_norm": 1.78077232837677, "learning_rate": 5.703529053848493e-07, "loss": 0.9894, "step": 19025 }, { "epoch": 0.4583222947424195, "grad_norm": 1.660193681716919, "learning_rate": 5.70226143393165e-07, "loss": 0.9268, "step": 19030 }, { "epoch": 0.4584427157342068, "grad_norm": 1.4802565574645996, "learning_rate": 5.700993814014805e-07, "loss": 0.9387, "step": 19035 }, { "epoch": 0.4585631367259941, "grad_norm": 1.4176965951919556, "learning_rate": 5.699726194097962e-07, "loss": 0.917, "step": 19040 }, { "epoch": 0.45868355771778135, "grad_norm": 1.7834274768829346, "learning_rate": 5.698458574181117e-07, "loss": 0.9135, "step": 19045 }, { "epoch": 0.4588039787095686, "grad_norm": 1.7946574687957764, "learning_rate": 5.697190954264272e-07, "loss": 0.9362, "step": 19050 }, { "epoch": 0.45892439970135596, "grad_norm": 1.5921515226364136, "learning_rate": 5.695923334347429e-07, "loss": 0.8743, "step": 19055 }, { "epoch": 0.45904482069314323, "grad_norm": 1.4635932445526123, "learning_rate": 5.694655714430585e-07, "loss": 0.9383, "step": 19060 }, { "epoch": 0.4591652416849305, "grad_norm": 1.5557299852371216, "learning_rate": 5.693388094513741e-07, "loss": 0.9065, "step": 19065 }, { "epoch": 0.45928566267671783, "grad_norm": 1.3830097913742065, "learning_rate": 5.692120474596896e-07, "loss": 0.8941, "step": 19070 }, { "epoch": 0.4594060836685051, "grad_norm": 1.644334316253662, "learning_rate": 5.690852854680053e-07, "loss": 0.903, "step": 19075 }, { "epoch": 0.4595265046602924, "grad_norm": 1.4195899963378906, "learning_rate": 5.689585234763208e-07, "loss": 0.9194, "step": 19080 }, { "epoch": 0.45964692565207965, "grad_norm": 1.5357506275177002, "learning_rate": 5.688317614846364e-07, "loss": 0.9624, "step": 19085 }, { "epoch": 0.459767346643867, "grad_norm": 1.6892377138137817, "learning_rate": 5.68704999492952e-07, "loss": 0.9293, "step": 19090 }, { "epoch": 0.45988776763565425, "grad_norm": 1.3714441061019897, "learning_rate": 5.685782375012675e-07, "loss": 0.8866, "step": 19095 }, { "epoch": 0.4600081886274415, "grad_norm": 1.8560410737991333, "learning_rate": 5.684514755095832e-07, "loss": 0.9763, "step": 19100 }, { "epoch": 0.4601286096192288, "grad_norm": 1.7453593015670776, "learning_rate": 5.683247135178988e-07, "loss": 0.914, "step": 19105 }, { "epoch": 0.46024903061101613, "grad_norm": 1.5820306539535522, "learning_rate": 5.681979515262143e-07, "loss": 0.9245, "step": 19110 }, { "epoch": 0.4603694516028034, "grad_norm": 1.48934006690979, "learning_rate": 5.680711895345299e-07, "loss": 0.8885, "step": 19115 }, { "epoch": 0.4604898725945907, "grad_norm": 1.542831301689148, "learning_rate": 5.679444275428455e-07, "loss": 0.9049, "step": 19120 }, { "epoch": 0.46061029358637795, "grad_norm": 1.5863815546035767, "learning_rate": 5.678176655511611e-07, "loss": 0.9294, "step": 19125 }, { "epoch": 0.4607307145781653, "grad_norm": 1.705835223197937, "learning_rate": 5.676909035594767e-07, "loss": 0.9287, "step": 19130 }, { "epoch": 0.46085113556995255, "grad_norm": 1.6327176094055176, "learning_rate": 5.675641415677924e-07, "loss": 0.9219, "step": 19135 }, { "epoch": 0.4609715565617398, "grad_norm": 1.6016877889633179, "learning_rate": 5.674373795761078e-07, "loss": 0.8602, "step": 19140 }, { "epoch": 0.46109197755352715, "grad_norm": 1.5826456546783447, "learning_rate": 5.673106175844234e-07, "loss": 0.9342, "step": 19145 }, { "epoch": 0.46121239854531443, "grad_norm": 1.7967777252197266, "learning_rate": 5.671838555927391e-07, "loss": 0.8953, "step": 19150 }, { "epoch": 0.4613328195371017, "grad_norm": 1.5923316478729248, "learning_rate": 5.670570936010546e-07, "loss": 0.9272, "step": 19155 }, { "epoch": 0.461453240528889, "grad_norm": 1.5057430267333984, "learning_rate": 5.669303316093702e-07, "loss": 0.9219, "step": 19160 }, { "epoch": 0.4615736615206763, "grad_norm": 1.4920909404754639, "learning_rate": 5.668035696176858e-07, "loss": 0.8777, "step": 19165 }, { "epoch": 0.4616940825124636, "grad_norm": 1.5344513654708862, "learning_rate": 5.666768076260013e-07, "loss": 0.8931, "step": 19170 }, { "epoch": 0.46181450350425085, "grad_norm": 1.4009016752243042, "learning_rate": 5.66550045634317e-07, "loss": 0.8792, "step": 19175 }, { "epoch": 0.4619349244960381, "grad_norm": 1.6597226858139038, "learning_rate": 5.664232836426326e-07, "loss": 0.9244, "step": 19180 }, { "epoch": 0.46205534548782545, "grad_norm": 1.7540236711502075, "learning_rate": 5.662965216509481e-07, "loss": 0.8937, "step": 19185 }, { "epoch": 0.4621757664796127, "grad_norm": 1.5170707702636719, "learning_rate": 5.661697596592637e-07, "loss": 0.9194, "step": 19190 }, { "epoch": 0.4622961874714, "grad_norm": 1.8222031593322754, "learning_rate": 5.660429976675794e-07, "loss": 0.9276, "step": 19195 }, { "epoch": 0.46241660846318733, "grad_norm": 1.5705348253250122, "learning_rate": 5.659162356758949e-07, "loss": 0.9343, "step": 19200 }, { "epoch": 0.4625370294549746, "grad_norm": 1.4817367792129517, "learning_rate": 5.657894736842104e-07, "loss": 0.9395, "step": 19205 }, { "epoch": 0.4626574504467619, "grad_norm": 1.5779122114181519, "learning_rate": 5.656627116925261e-07, "loss": 0.8933, "step": 19210 }, { "epoch": 0.46277787143854915, "grad_norm": 1.3455822467803955, "learning_rate": 5.655359497008416e-07, "loss": 0.9075, "step": 19215 }, { "epoch": 0.4628982924303365, "grad_norm": 1.6021500825881958, "learning_rate": 5.654091877091573e-07, "loss": 0.9553, "step": 19220 }, { "epoch": 0.46301871342212375, "grad_norm": 1.4690455198287964, "learning_rate": 5.652824257174729e-07, "loss": 0.9146, "step": 19225 }, { "epoch": 0.463139134413911, "grad_norm": 1.4307332038879395, "learning_rate": 5.651556637257883e-07, "loss": 0.9641, "step": 19230 }, { "epoch": 0.4632595554056983, "grad_norm": 1.579603672027588, "learning_rate": 5.65028901734104e-07, "loss": 0.8914, "step": 19235 }, { "epoch": 0.4633799763974856, "grad_norm": 1.7392204999923706, "learning_rate": 5.649021397424196e-07, "loss": 0.8679, "step": 19240 }, { "epoch": 0.4635003973892729, "grad_norm": 1.5015277862548828, "learning_rate": 5.647753777507353e-07, "loss": 0.9354, "step": 19245 }, { "epoch": 0.4636208183810602, "grad_norm": 1.5179589986801147, "learning_rate": 5.646486157590508e-07, "loss": 0.9798, "step": 19250 }, { "epoch": 0.4637412393728475, "grad_norm": 1.3988590240478516, "learning_rate": 5.645218537673663e-07, "loss": 0.9186, "step": 19255 }, { "epoch": 0.4638616603646348, "grad_norm": 1.6146354675292969, "learning_rate": 5.64395091775682e-07, "loss": 0.8371, "step": 19260 }, { "epoch": 0.46398208135642205, "grad_norm": 1.4624241590499878, "learning_rate": 5.642683297839975e-07, "loss": 0.9093, "step": 19265 }, { "epoch": 0.4641025023482093, "grad_norm": 1.6220847368240356, "learning_rate": 5.641415677923132e-07, "loss": 0.8582, "step": 19270 }, { "epoch": 0.46422292333999665, "grad_norm": 1.5948646068572998, "learning_rate": 5.640148058006287e-07, "loss": 0.937, "step": 19275 }, { "epoch": 0.4643433443317839, "grad_norm": 1.5547462701797485, "learning_rate": 5.638880438089443e-07, "loss": 0.8907, "step": 19280 }, { "epoch": 0.4644637653235712, "grad_norm": 1.6174482107162476, "learning_rate": 5.637612818172599e-07, "loss": 0.9369, "step": 19285 }, { "epoch": 0.46458418631535847, "grad_norm": 1.4465388059616089, "learning_rate": 5.636345198255755e-07, "loss": 0.9182, "step": 19290 }, { "epoch": 0.4647046073071458, "grad_norm": 1.8930598497390747, "learning_rate": 5.635077578338911e-07, "loss": 0.8905, "step": 19295 }, { "epoch": 0.4648250282989331, "grad_norm": 1.3349248170852661, "learning_rate": 5.633809958422066e-07, "loss": 0.9202, "step": 19300 }, { "epoch": 0.46494544929072035, "grad_norm": 1.38579523563385, "learning_rate": 5.632542338505223e-07, "loss": 0.9545, "step": 19305 }, { "epoch": 0.4650658702825076, "grad_norm": 1.6983599662780762, "learning_rate": 5.631274718588378e-07, "loss": 0.9331, "step": 19310 }, { "epoch": 0.46518629127429495, "grad_norm": 1.6366914510726929, "learning_rate": 5.630007098671535e-07, "loss": 0.9384, "step": 19315 }, { "epoch": 0.4653067122660822, "grad_norm": 1.8956844806671143, "learning_rate": 5.628739478754691e-07, "loss": 0.8964, "step": 19320 }, { "epoch": 0.4654271332578695, "grad_norm": 1.6888880729675293, "learning_rate": 5.627471858837845e-07, "loss": 0.8892, "step": 19325 }, { "epoch": 0.4655475542496568, "grad_norm": 1.5253064632415771, "learning_rate": 5.626204238921002e-07, "loss": 0.8463, "step": 19330 }, { "epoch": 0.4656679752414441, "grad_norm": 1.632535696029663, "learning_rate": 5.624936619004158e-07, "loss": 0.886, "step": 19335 }, { "epoch": 0.46578839623323137, "grad_norm": 1.5638158321380615, "learning_rate": 5.623668999087314e-07, "loss": 0.95, "step": 19340 }, { "epoch": 0.46590881722501865, "grad_norm": 1.78264582157135, "learning_rate": 5.622401379170469e-07, "loss": 0.9205, "step": 19345 }, { "epoch": 0.466029238216806, "grad_norm": 1.6770012378692627, "learning_rate": 5.621133759253625e-07, "loss": 0.9276, "step": 19350 }, { "epoch": 0.46614965920859325, "grad_norm": 1.6042697429656982, "learning_rate": 5.619866139336781e-07, "loss": 0.8913, "step": 19355 }, { "epoch": 0.4662700802003805, "grad_norm": 1.4511951208114624, "learning_rate": 5.618598519419937e-07, "loss": 0.8983, "step": 19360 }, { "epoch": 0.4663905011921678, "grad_norm": 1.4056437015533447, "learning_rate": 5.617330899503094e-07, "loss": 0.8683, "step": 19365 }, { "epoch": 0.4665109221839551, "grad_norm": 1.493322491645813, "learning_rate": 5.616063279586248e-07, "loss": 0.916, "step": 19370 }, { "epoch": 0.4666313431757424, "grad_norm": 1.4477388858795166, "learning_rate": 5.614795659669404e-07, "loss": 0.9469, "step": 19375 }, { "epoch": 0.46675176416752967, "grad_norm": 1.498809576034546, "learning_rate": 5.613528039752561e-07, "loss": 0.9008, "step": 19380 }, { "epoch": 0.466872185159317, "grad_norm": 1.6682100296020508, "learning_rate": 5.612260419835716e-07, "loss": 0.9416, "step": 19385 }, { "epoch": 0.4669926061511043, "grad_norm": 1.6538997888565063, "learning_rate": 5.610992799918872e-07, "loss": 0.8469, "step": 19390 }, { "epoch": 0.46711302714289155, "grad_norm": 1.5839837789535522, "learning_rate": 5.609725180002028e-07, "loss": 0.8992, "step": 19395 }, { "epoch": 0.4672334481346788, "grad_norm": 1.6330792903900146, "learning_rate": 5.608457560085184e-07, "loss": 0.9026, "step": 19400 }, { "epoch": 0.46735386912646615, "grad_norm": 1.4835282564163208, "learning_rate": 5.60718994016834e-07, "loss": 0.9264, "step": 19405 }, { "epoch": 0.4674742901182534, "grad_norm": 1.6376011371612549, "learning_rate": 5.605922320251496e-07, "loss": 0.91, "step": 19410 }, { "epoch": 0.4675947111100407, "grad_norm": 1.3809139728546143, "learning_rate": 5.604654700334651e-07, "loss": 0.908, "step": 19415 }, { "epoch": 0.46771513210182797, "grad_norm": 1.5058355331420898, "learning_rate": 5.603387080417807e-07, "loss": 0.9446, "step": 19420 }, { "epoch": 0.4678355530936153, "grad_norm": 1.6111805438995361, "learning_rate": 5.602119460500964e-07, "loss": 0.8859, "step": 19425 }, { "epoch": 0.46795597408540257, "grad_norm": 1.508823037147522, "learning_rate": 5.600851840584119e-07, "loss": 0.9019, "step": 19430 }, { "epoch": 0.46807639507718984, "grad_norm": 1.765920639038086, "learning_rate": 5.599584220667275e-07, "loss": 0.8805, "step": 19435 }, { "epoch": 0.4681968160689771, "grad_norm": 1.5230178833007812, "learning_rate": 5.598316600750431e-07, "loss": 0.8594, "step": 19440 }, { "epoch": 0.46831723706076445, "grad_norm": 1.5808008909225464, "learning_rate": 5.597048980833586e-07, "loss": 0.8729, "step": 19445 }, { "epoch": 0.4684376580525517, "grad_norm": 1.6315981149673462, "learning_rate": 5.595781360916743e-07, "loss": 0.8992, "step": 19450 }, { "epoch": 0.468558079044339, "grad_norm": 1.5495814085006714, "learning_rate": 5.594513740999899e-07, "loss": 0.9098, "step": 19455 }, { "epoch": 0.4686785000361263, "grad_norm": 1.717214822769165, "learning_rate": 5.593246121083053e-07, "loss": 0.959, "step": 19460 }, { "epoch": 0.4687989210279136, "grad_norm": 1.468532919883728, "learning_rate": 5.59197850116621e-07, "loss": 0.8781, "step": 19465 }, { "epoch": 0.46891934201970087, "grad_norm": 1.4898240566253662, "learning_rate": 5.590710881249366e-07, "loss": 0.91, "step": 19470 }, { "epoch": 0.46903976301148814, "grad_norm": 1.5860544443130493, "learning_rate": 5.589443261332522e-07, "loss": 0.8995, "step": 19475 }, { "epoch": 0.46916018400327547, "grad_norm": 1.4536211490631104, "learning_rate": 5.588175641415678e-07, "loss": 0.9219, "step": 19480 }, { "epoch": 0.46928060499506274, "grad_norm": 1.3934434652328491, "learning_rate": 5.586908021498834e-07, "loss": 0.8859, "step": 19485 }, { "epoch": 0.46940102598685, "grad_norm": 1.4026354551315308, "learning_rate": 5.585640401581989e-07, "loss": 0.9145, "step": 19490 }, { "epoch": 0.4695214469786373, "grad_norm": 1.5729193687438965, "learning_rate": 5.584372781665145e-07, "loss": 0.9152, "step": 19495 }, { "epoch": 0.4696418679704246, "grad_norm": 1.5934133529663086, "learning_rate": 5.583105161748302e-07, "loss": 0.9541, "step": 19500 }, { "epoch": 0.4697622889622119, "grad_norm": 1.3092372417449951, "learning_rate": 5.581837541831456e-07, "loss": 0.9333, "step": 19505 }, { "epoch": 0.46988270995399917, "grad_norm": 1.6447803974151611, "learning_rate": 5.580569921914613e-07, "loss": 0.8509, "step": 19510 }, { "epoch": 0.4700031309457865, "grad_norm": 1.5421675443649292, "learning_rate": 5.579302301997769e-07, "loss": 0.8606, "step": 19515 }, { "epoch": 0.47012355193757377, "grad_norm": 1.5920692682266235, "learning_rate": 5.578034682080924e-07, "loss": 0.8919, "step": 19520 }, { "epoch": 0.47024397292936104, "grad_norm": 1.4933526515960693, "learning_rate": 5.576767062164081e-07, "loss": 0.946, "step": 19525 }, { "epoch": 0.4703643939211483, "grad_norm": 1.5667372941970825, "learning_rate": 5.575499442247236e-07, "loss": 0.8823, "step": 19530 }, { "epoch": 0.47048481491293564, "grad_norm": 1.3437079191207886, "learning_rate": 5.574231822330392e-07, "loss": 0.9155, "step": 19535 }, { "epoch": 0.4706052359047229, "grad_norm": 1.6043516397476196, "learning_rate": 5.572964202413548e-07, "loss": 0.9311, "step": 19540 }, { "epoch": 0.4707256568965102, "grad_norm": 1.5555745363235474, "learning_rate": 5.571696582496705e-07, "loss": 0.9066, "step": 19545 }, { "epoch": 0.47084607788829747, "grad_norm": 1.4690513610839844, "learning_rate": 5.57042896257986e-07, "loss": 0.915, "step": 19550 }, { "epoch": 0.4709664988800848, "grad_norm": 1.412894368171692, "learning_rate": 5.569161342663015e-07, "loss": 0.9205, "step": 19555 }, { "epoch": 0.47108691987187207, "grad_norm": 1.5198075771331787, "learning_rate": 5.567893722746172e-07, "loss": 0.9057, "step": 19560 }, { "epoch": 0.47120734086365934, "grad_norm": 1.4156620502471924, "learning_rate": 5.566626102829327e-07, "loss": 0.9071, "step": 19565 }, { "epoch": 0.4713277618554466, "grad_norm": 1.5950437784194946, "learning_rate": 5.565358482912484e-07, "loss": 0.926, "step": 19570 }, { "epoch": 0.47144818284723394, "grad_norm": 1.900969386100769, "learning_rate": 5.564090862995639e-07, "loss": 0.8585, "step": 19575 }, { "epoch": 0.4715686038390212, "grad_norm": 2.091481924057007, "learning_rate": 5.562823243078794e-07, "loss": 0.8834, "step": 19580 }, { "epoch": 0.4716890248308085, "grad_norm": 1.8308582305908203, "learning_rate": 5.561555623161951e-07, "loss": 0.9513, "step": 19585 }, { "epoch": 0.4718094458225958, "grad_norm": 1.667382001876831, "learning_rate": 5.560288003245107e-07, "loss": 0.8976, "step": 19590 }, { "epoch": 0.4719298668143831, "grad_norm": 1.412461757659912, "learning_rate": 5.559020383328263e-07, "loss": 0.9328, "step": 19595 }, { "epoch": 0.47205028780617037, "grad_norm": 1.6457659006118774, "learning_rate": 5.557752763411418e-07, "loss": 0.8995, "step": 19600 }, { "epoch": 0.47217070879795764, "grad_norm": 1.5809874534606934, "learning_rate": 5.556485143494575e-07, "loss": 0.9108, "step": 19605 }, { "epoch": 0.47229112978974497, "grad_norm": 1.529252290725708, "learning_rate": 5.55521752357773e-07, "loss": 0.8897, "step": 19610 }, { "epoch": 0.47241155078153224, "grad_norm": 1.672222375869751, "learning_rate": 5.553949903660886e-07, "loss": 0.8975, "step": 19615 }, { "epoch": 0.4725319717733195, "grad_norm": 1.5824137926101685, "learning_rate": 5.552682283744043e-07, "loss": 0.9066, "step": 19620 }, { "epoch": 0.4726523927651068, "grad_norm": 1.9257863759994507, "learning_rate": 5.551414663827197e-07, "loss": 0.9294, "step": 19625 }, { "epoch": 0.4727728137568941, "grad_norm": 1.6060237884521484, "learning_rate": 5.550147043910354e-07, "loss": 0.9267, "step": 19630 }, { "epoch": 0.4728932347486814, "grad_norm": 1.5399199724197388, "learning_rate": 5.54887942399351e-07, "loss": 0.8888, "step": 19635 }, { "epoch": 0.47301365574046866, "grad_norm": 1.5686084032058716, "learning_rate": 5.547611804076665e-07, "loss": 0.9176, "step": 19640 }, { "epoch": 0.473134076732256, "grad_norm": 1.4253500699996948, "learning_rate": 5.546344184159821e-07, "loss": 0.9271, "step": 19645 }, { "epoch": 0.47325449772404327, "grad_norm": 1.5969947576522827, "learning_rate": 5.545076564242977e-07, "loss": 0.9251, "step": 19650 }, { "epoch": 0.47337491871583054, "grad_norm": 1.586877465248108, "learning_rate": 5.543808944326133e-07, "loss": 0.8853, "step": 19655 }, { "epoch": 0.4734953397076178, "grad_norm": 1.579201340675354, "learning_rate": 5.542541324409289e-07, "loss": 0.8718, "step": 19660 }, { "epoch": 0.47361576069940514, "grad_norm": 1.5453004837036133, "learning_rate": 5.541273704492446e-07, "loss": 0.8859, "step": 19665 }, { "epoch": 0.4737361816911924, "grad_norm": 1.5748382806777954, "learning_rate": 5.5400060845756e-07, "loss": 0.8919, "step": 19670 }, { "epoch": 0.4738566026829797, "grad_norm": 1.6768748760223389, "learning_rate": 5.538738464658756e-07, "loss": 0.8696, "step": 19675 }, { "epoch": 0.47397702367476696, "grad_norm": 1.5177021026611328, "learning_rate": 5.537470844741913e-07, "loss": 0.8602, "step": 19680 }, { "epoch": 0.4740974446665543, "grad_norm": 1.3657416105270386, "learning_rate": 5.536203224825068e-07, "loss": 0.8537, "step": 19685 }, { "epoch": 0.47421786565834156, "grad_norm": 1.484498143196106, "learning_rate": 5.534935604908224e-07, "loss": 0.9626, "step": 19690 }, { "epoch": 0.47433828665012884, "grad_norm": 1.3851714134216309, "learning_rate": 5.53366798499138e-07, "loss": 0.9084, "step": 19695 }, { "epoch": 0.47445870764191617, "grad_norm": 1.6817436218261719, "learning_rate": 5.532400365074535e-07, "loss": 0.8925, "step": 19700 }, { "epoch": 0.47457912863370344, "grad_norm": 1.6906062364578247, "learning_rate": 5.531132745157692e-07, "loss": 0.937, "step": 19705 }, { "epoch": 0.4746995496254907, "grad_norm": 1.4443438053131104, "learning_rate": 5.529865125240848e-07, "loss": 0.9102, "step": 19710 }, { "epoch": 0.474819970617278, "grad_norm": 1.527987003326416, "learning_rate": 5.528597505324003e-07, "loss": 0.8944, "step": 19715 }, { "epoch": 0.4749403916090653, "grad_norm": 1.8593183755874634, "learning_rate": 5.527329885407159e-07, "loss": 0.8978, "step": 19720 }, { "epoch": 0.4750608126008526, "grad_norm": 1.5174137353897095, "learning_rate": 5.526062265490316e-07, "loss": 0.8817, "step": 19725 }, { "epoch": 0.47518123359263986, "grad_norm": 1.453525424003601, "learning_rate": 5.524794645573471e-07, "loss": 0.9118, "step": 19730 }, { "epoch": 0.47530165458442714, "grad_norm": 1.5764050483703613, "learning_rate": 5.523527025656627e-07, "loss": 0.9359, "step": 19735 }, { "epoch": 0.47542207557621446, "grad_norm": 1.7837824821472168, "learning_rate": 5.522259405739783e-07, "loss": 0.9578, "step": 19740 }, { "epoch": 0.47554249656800174, "grad_norm": 1.5909477472305298, "learning_rate": 5.520991785822938e-07, "loss": 0.9389, "step": 19745 }, { "epoch": 0.475662917559789, "grad_norm": 1.485216498374939, "learning_rate": 5.519724165906095e-07, "loss": 0.8666, "step": 19750 }, { "epoch": 0.4757833385515763, "grad_norm": 1.3735274076461792, "learning_rate": 5.518456545989251e-07, "loss": 0.8216, "step": 19755 }, { "epoch": 0.4759037595433636, "grad_norm": 1.5103178024291992, "learning_rate": 5.517188926072405e-07, "loss": 0.8766, "step": 19760 }, { "epoch": 0.4760241805351509, "grad_norm": 1.46016263961792, "learning_rate": 5.515921306155562e-07, "loss": 0.9262, "step": 19765 }, { "epoch": 0.47614460152693816, "grad_norm": 1.7239071130752563, "learning_rate": 5.514653686238718e-07, "loss": 0.9191, "step": 19770 }, { "epoch": 0.4762650225187255, "grad_norm": 1.5743353366851807, "learning_rate": 5.513386066321874e-07, "loss": 0.9159, "step": 19775 }, { "epoch": 0.47638544351051276, "grad_norm": 1.7983384132385254, "learning_rate": 5.51211844640503e-07, "loss": 0.925, "step": 19780 }, { "epoch": 0.47650586450230004, "grad_norm": 1.5955098867416382, "learning_rate": 5.510850826488185e-07, "loss": 0.9405, "step": 19785 }, { "epoch": 0.4766262854940873, "grad_norm": 1.530182123184204, "learning_rate": 5.509583206571341e-07, "loss": 0.9166, "step": 19790 }, { "epoch": 0.47674670648587464, "grad_norm": 1.518515944480896, "learning_rate": 5.508315586654497e-07, "loss": 0.9451, "step": 19795 }, { "epoch": 0.4768671274776619, "grad_norm": 1.643058180809021, "learning_rate": 5.507047966737654e-07, "loss": 0.8849, "step": 19800 }, { "epoch": 0.4769875484694492, "grad_norm": 1.6911545991897583, "learning_rate": 5.505780346820808e-07, "loss": 0.8837, "step": 19805 }, { "epoch": 0.47710796946123646, "grad_norm": 1.3822886943817139, "learning_rate": 5.504512726903965e-07, "loss": 0.8948, "step": 19810 }, { "epoch": 0.4772283904530238, "grad_norm": 1.6200188398361206, "learning_rate": 5.503245106987121e-07, "loss": 0.9426, "step": 19815 }, { "epoch": 0.47734881144481106, "grad_norm": 1.4352916479110718, "learning_rate": 5.501977487070276e-07, "loss": 0.8842, "step": 19820 }, { "epoch": 0.47746923243659833, "grad_norm": 1.4828038215637207, "learning_rate": 5.500709867153433e-07, "loss": 0.8714, "step": 19825 }, { "epoch": 0.47758965342838566, "grad_norm": 1.4750306606292725, "learning_rate": 5.499442247236588e-07, "loss": 0.9484, "step": 19830 }, { "epoch": 0.47771007442017294, "grad_norm": 1.624162197113037, "learning_rate": 5.498174627319744e-07, "loss": 0.912, "step": 19835 }, { "epoch": 0.4778304954119602, "grad_norm": 1.9018586874008179, "learning_rate": 5.4969070074029e-07, "loss": 0.8916, "step": 19840 }, { "epoch": 0.4779509164037475, "grad_norm": 1.5208109617233276, "learning_rate": 5.495639387486057e-07, "loss": 0.9171, "step": 19845 }, { "epoch": 0.4780713373955348, "grad_norm": 1.521644115447998, "learning_rate": 5.494371767569212e-07, "loss": 0.9251, "step": 19850 }, { "epoch": 0.4781917583873221, "grad_norm": 1.4694710969924927, "learning_rate": 5.493104147652367e-07, "loss": 0.8663, "step": 19855 }, { "epoch": 0.47831217937910936, "grad_norm": 1.6884132623672485, "learning_rate": 5.491836527735524e-07, "loss": 0.8857, "step": 19860 }, { "epoch": 0.47843260037089663, "grad_norm": 1.7484136819839478, "learning_rate": 5.490568907818679e-07, "loss": 0.9107, "step": 19865 }, { "epoch": 0.47855302136268396, "grad_norm": 1.915028691291809, "learning_rate": 5.489301287901836e-07, "loss": 0.8986, "step": 19870 }, { "epoch": 0.47867344235447123, "grad_norm": 1.6282302141189575, "learning_rate": 5.488033667984991e-07, "loss": 0.91, "step": 19875 }, { "epoch": 0.4787938633462585, "grad_norm": 1.9310438632965088, "learning_rate": 5.486766048068146e-07, "loss": 0.9392, "step": 19880 }, { "epoch": 0.4789142843380458, "grad_norm": 1.624096393585205, "learning_rate": 5.485498428151303e-07, "loss": 0.8752, "step": 19885 }, { "epoch": 0.4790347053298331, "grad_norm": 1.579325556755066, "learning_rate": 5.484230808234459e-07, "loss": 0.8679, "step": 19890 }, { "epoch": 0.4791551263216204, "grad_norm": 1.6065541505813599, "learning_rate": 5.482963188317615e-07, "loss": 0.903, "step": 19895 }, { "epoch": 0.47927554731340766, "grad_norm": 1.509092092514038, "learning_rate": 5.48169556840077e-07, "loss": 0.8833, "step": 19900 }, { "epoch": 0.479395968305195, "grad_norm": 1.625239372253418, "learning_rate": 5.480427948483926e-07, "loss": 0.8796, "step": 19905 }, { "epoch": 0.47951638929698226, "grad_norm": 1.5438594818115234, "learning_rate": 5.479160328567082e-07, "loss": 0.8598, "step": 19910 }, { "epoch": 0.47963681028876953, "grad_norm": 1.5186994075775146, "learning_rate": 5.477892708650238e-07, "loss": 0.9471, "step": 19915 }, { "epoch": 0.4797572312805568, "grad_norm": 1.6406010389328003, "learning_rate": 5.476625088733395e-07, "loss": 0.8856, "step": 19920 }, { "epoch": 0.47987765227234414, "grad_norm": 1.6568893194198608, "learning_rate": 5.475357468816549e-07, "loss": 0.9892, "step": 19925 }, { "epoch": 0.4799980732641314, "grad_norm": 1.6029942035675049, "learning_rate": 5.474089848899706e-07, "loss": 0.8746, "step": 19930 }, { "epoch": 0.4801184942559187, "grad_norm": 1.5155408382415771, "learning_rate": 5.472822228982862e-07, "loss": 0.8756, "step": 19935 }, { "epoch": 0.48023891524770596, "grad_norm": 1.720414638519287, "learning_rate": 5.471554609066017e-07, "loss": 0.8976, "step": 19940 }, { "epoch": 0.4803593362394933, "grad_norm": 1.5202049016952515, "learning_rate": 5.470286989149173e-07, "loss": 0.9006, "step": 19945 }, { "epoch": 0.48047975723128056, "grad_norm": 1.6243274211883545, "learning_rate": 5.469019369232329e-07, "loss": 0.9025, "step": 19950 }, { "epoch": 0.48060017822306783, "grad_norm": 1.5468374490737915, "learning_rate": 5.467751749315485e-07, "loss": 0.891, "step": 19955 }, { "epoch": 0.48072059921485516, "grad_norm": 1.7020221948623657, "learning_rate": 5.466484129398641e-07, "loss": 0.9049, "step": 19960 }, { "epoch": 0.48084102020664243, "grad_norm": 1.6207133531570435, "learning_rate": 5.465216509481797e-07, "loss": 0.9273, "step": 19965 }, { "epoch": 0.4809614411984297, "grad_norm": 1.7263379096984863, "learning_rate": 5.463948889564952e-07, "loss": 0.904, "step": 19970 }, { "epoch": 0.481081862190217, "grad_norm": 1.5589679479599, "learning_rate": 5.462681269648108e-07, "loss": 0.9133, "step": 19975 }, { "epoch": 0.4812022831820043, "grad_norm": 1.868116021156311, "learning_rate": 5.461413649731265e-07, "loss": 0.9224, "step": 19980 }, { "epoch": 0.4813227041737916, "grad_norm": 1.5733035802841187, "learning_rate": 5.46014602981442e-07, "loss": 0.9143, "step": 19985 }, { "epoch": 0.48144312516557886, "grad_norm": 1.7124382257461548, "learning_rate": 5.458878409897575e-07, "loss": 0.8175, "step": 19990 }, { "epoch": 0.48156354615736613, "grad_norm": 1.5227895975112915, "learning_rate": 5.457610789980732e-07, "loss": 0.8792, "step": 19995 }, { "epoch": 0.48168396714915346, "grad_norm": 1.7614829540252686, "learning_rate": 5.456343170063887e-07, "loss": 0.9365, "step": 20000 } ], "logging_steps": 5, "max_steps": 41521, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.786613589519735e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }