| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 4.9928673323823105, |
| "eval_steps": 500, |
| "global_step": 2625, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0019020446980504042, |
| "grad_norm": 0.9932524561882019, |
| "learning_rate": 2e-05, |
| "loss": 1.3348, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0038040893961008085, |
| "grad_norm": 0.9241018295288086, |
| "learning_rate": 4e-05, |
| "loss": 1.3131, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.005706134094151213, |
| "grad_norm": 1.1556137800216675, |
| "learning_rate": 6e-05, |
| "loss": 1.5644, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.007608178792201617, |
| "grad_norm": 0.8612737059593201, |
| "learning_rate": 8e-05, |
| "loss": 1.2192, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.009510223490252021, |
| "grad_norm": 0.8998388648033142, |
| "learning_rate": 0.0001, |
| "loss": 1.3651, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.011412268188302425, |
| "grad_norm": 0.7211980819702148, |
| "learning_rate": 9.999364877738964e-05, |
| "loss": 1.2525, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.01331431288635283, |
| "grad_norm": 0.44894707202911377, |
| "learning_rate": 9.998729755477931e-05, |
| "loss": 1.1999, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.015216357584403234, |
| "grad_norm": 0.4338511824607849, |
| "learning_rate": 9.998094633216895e-05, |
| "loss": 1.0147, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.017118402282453638, |
| "grad_norm": 0.5658989548683167, |
| "learning_rate": 9.99745951095586e-05, |
| "loss": 1.1997, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.019020446980504042, |
| "grad_norm": 0.4467356503009796, |
| "learning_rate": 9.996824388694824e-05, |
| "loss": 1.0424, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.020922491678554447, |
| "grad_norm": 0.3743385374546051, |
| "learning_rate": 9.996189266433789e-05, |
| "loss": 1.0902, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.02282453637660485, |
| "grad_norm": 0.30667275190353394, |
| "learning_rate": 9.995554144172754e-05, |
| "loss": 0.8736, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.024726581074655255, |
| "grad_norm": 0.48634254932403564, |
| "learning_rate": 9.994919021911718e-05, |
| "loss": 0.977, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.02662862577270566, |
| "grad_norm": 0.4229658246040344, |
| "learning_rate": 9.994283899650683e-05, |
| "loss": 0.9673, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.028530670470756064, |
| "grad_norm": 0.39269882440567017, |
| "learning_rate": 9.993648777389648e-05, |
| "loss": 1.0001, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.030432715168806468, |
| "grad_norm": 0.38597363233566284, |
| "learning_rate": 9.993013655128612e-05, |
| "loss": 0.9705, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.03233475986685687, |
| "grad_norm": 0.40809136629104614, |
| "learning_rate": 9.992378532867577e-05, |
| "loss": 0.9246, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.034236804564907276, |
| "grad_norm": 0.4431133270263672, |
| "learning_rate": 9.991743410606542e-05, |
| "loss": 1.0409, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.03613884926295768, |
| "grad_norm": 0.5659255981445312, |
| "learning_rate": 9.991108288345506e-05, |
| "loss": 1.1118, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.038040893961008085, |
| "grad_norm": 0.4943106472492218, |
| "learning_rate": 9.990473166084471e-05, |
| "loss": 0.9213, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.039942938659058486, |
| "grad_norm": 0.48820945620536804, |
| "learning_rate": 9.989838043823437e-05, |
| "loss": 0.9108, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.04184498335710889, |
| "grad_norm": 0.4464576542377472, |
| "learning_rate": 9.989202921562402e-05, |
| "loss": 0.8959, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.043747028055159294, |
| "grad_norm": 0.3870016038417816, |
| "learning_rate": 9.988567799301366e-05, |
| "loss": 0.8013, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.0456490727532097, |
| "grad_norm": 0.42381179332733154, |
| "learning_rate": 9.987932677040331e-05, |
| "loss": 0.8584, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.0475511174512601, |
| "grad_norm": 0.37170907855033875, |
| "learning_rate": 9.987297554779296e-05, |
| "loss": 0.7849, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.04945316214931051, |
| "grad_norm": 0.4516700506210327, |
| "learning_rate": 9.98666243251826e-05, |
| "loss": 0.8902, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.05135520684736091, |
| "grad_norm": 0.3525027334690094, |
| "learning_rate": 9.986027310257225e-05, |
| "loss": 0.6029, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.05325725154541132, |
| "grad_norm": 0.437707781791687, |
| "learning_rate": 9.98539218799619e-05, |
| "loss": 0.7387, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.05515929624346172, |
| "grad_norm": 0.45205071568489075, |
| "learning_rate": 9.984757065735154e-05, |
| "loss": 0.7468, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.05706134094151213, |
| "grad_norm": 0.3709086775779724, |
| "learning_rate": 9.984121943474119e-05, |
| "loss": 0.7365, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.05896338563956253, |
| "grad_norm": 0.4089844822883606, |
| "learning_rate": 9.983486821213084e-05, |
| "loss": 0.6563, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.060865430337612936, |
| "grad_norm": 0.45955532789230347, |
| "learning_rate": 9.982851698952048e-05, |
| "loss": 0.8021, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.06276747503566334, |
| "grad_norm": 0.5240988731384277, |
| "learning_rate": 9.982216576691013e-05, |
| "loss": 0.6933, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.06466951973371374, |
| "grad_norm": 0.4703526496887207, |
| "learning_rate": 9.981581454429977e-05, |
| "loss": 0.7339, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.06657156443176415, |
| "grad_norm": 0.5659805536270142, |
| "learning_rate": 9.980946332168944e-05, |
| "loss": 0.8139, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.06847360912981455, |
| "grad_norm": 0.39259326457977295, |
| "learning_rate": 9.980311209907908e-05, |
| "loss": 0.5838, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.07037565382786495, |
| "grad_norm": 0.4165003001689911, |
| "learning_rate": 9.979676087646871e-05, |
| "loss": 0.674, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.07227769852591535, |
| "grad_norm": 0.4533802568912506, |
| "learning_rate": 9.979040965385838e-05, |
| "loss": 0.6974, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.07417974322396577, |
| "grad_norm": 0.5213814973831177, |
| "learning_rate": 9.978405843124802e-05, |
| "loss": 0.7896, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.07608178792201617, |
| "grad_norm": 0.3241259753704071, |
| "learning_rate": 9.977770720863767e-05, |
| "loss": 0.5895, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.07798383262006657, |
| "grad_norm": 0.34446167945861816, |
| "learning_rate": 9.977135598602731e-05, |
| "loss": 0.6222, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.07988587731811697, |
| "grad_norm": 0.49035167694091797, |
| "learning_rate": 9.976500476341696e-05, |
| "loss": 0.6978, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.08178792201616739, |
| "grad_norm": 0.4795296788215637, |
| "learning_rate": 9.975865354080661e-05, |
| "loss": 0.7368, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.08368996671421779, |
| "grad_norm": 0.44959381222724915, |
| "learning_rate": 9.975230231819625e-05, |
| "loss": 0.57, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.08559201141226819, |
| "grad_norm": 0.4577605426311493, |
| "learning_rate": 9.974595109558592e-05, |
| "loss": 0.691, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.08749405611031859, |
| "grad_norm": 0.41654840111732483, |
| "learning_rate": 9.973959987297555e-05, |
| "loss": 0.6346, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.089396100808369, |
| "grad_norm": 0.6599829196929932, |
| "learning_rate": 9.973324865036519e-05, |
| "loss": 0.6358, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.0912981455064194, |
| "grad_norm": 0.38539162278175354, |
| "learning_rate": 9.972689742775484e-05, |
| "loss": 0.5723, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.0932001902044698, |
| "grad_norm": 0.4626316428184509, |
| "learning_rate": 9.97205462051445e-05, |
| "loss": 0.6845, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.0951022349025202, |
| "grad_norm": 0.348387211561203, |
| "learning_rate": 9.971419498253413e-05, |
| "loss": 0.4857, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.09700427960057062, |
| "grad_norm": 0.4964020252227783, |
| "learning_rate": 9.970784375992379e-05, |
| "loss": 0.7141, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.09890632429862102, |
| "grad_norm": 0.4282241463661194, |
| "learning_rate": 9.970149253731344e-05, |
| "loss": 0.6619, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.10080836899667142, |
| "grad_norm": 0.35991716384887695, |
| "learning_rate": 9.969514131470309e-05, |
| "loss": 0.4727, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.10271041369472182, |
| "grad_norm": 0.3936012387275696, |
| "learning_rate": 9.968879009209273e-05, |
| "loss": 0.5644, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.10461245839277224, |
| "grad_norm": 0.39267924427986145, |
| "learning_rate": 9.968243886948238e-05, |
| "loss": 0.5126, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.10651450309082264, |
| "grad_norm": 0.4119136333465576, |
| "learning_rate": 9.967608764687203e-05, |
| "loss": 0.471, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.10841654778887304, |
| "grad_norm": 0.5160384178161621, |
| "learning_rate": 9.966973642426167e-05, |
| "loss": 0.6555, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.11031859248692344, |
| "grad_norm": 0.4742174744606018, |
| "learning_rate": 9.966338520165132e-05, |
| "loss": 0.6093, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.11222063718497385, |
| "grad_norm": 0.3615169823169708, |
| "learning_rate": 9.965703397904097e-05, |
| "loss": 0.5527, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.11412268188302425, |
| "grad_norm": 0.5700575113296509, |
| "learning_rate": 9.965068275643061e-05, |
| "loss": 0.5713, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.11602472658107466, |
| "grad_norm": 0.4825727939605713, |
| "learning_rate": 9.964433153382026e-05, |
| "loss": 0.5142, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.11792677127912506, |
| "grad_norm": 0.392088919878006, |
| "learning_rate": 9.963798031120992e-05, |
| "loss": 0.513, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.11982881597717546, |
| "grad_norm": 0.35883110761642456, |
| "learning_rate": 9.963162908859957e-05, |
| "loss": 0.501, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.12173086067522587, |
| "grad_norm": 0.39946749806404114, |
| "learning_rate": 9.96252778659892e-05, |
| "loss": 0.5532, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.12363290537327627, |
| "grad_norm": 0.4191288352012634, |
| "learning_rate": 9.961892664337886e-05, |
| "loss": 0.5258, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.12553495007132667, |
| "grad_norm": 0.3662487268447876, |
| "learning_rate": 9.961257542076851e-05, |
| "loss": 0.5121, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.1274369947693771, |
| "grad_norm": 0.5582164525985718, |
| "learning_rate": 9.960622419815815e-05, |
| "loss": 0.6494, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.12933903946742747, |
| "grad_norm": 0.485128790140152, |
| "learning_rate": 9.959987297554779e-05, |
| "loss": 0.6022, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.1312410841654779, |
| "grad_norm": 0.3816944360733032, |
| "learning_rate": 9.959352175293745e-05, |
| "loss": 0.4851, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.1331431288635283, |
| "grad_norm": 0.3637336194515228, |
| "learning_rate": 9.958717053032709e-05, |
| "loss": 0.4344, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.1350451735615787, |
| "grad_norm": 0.4418705105781555, |
| "learning_rate": 9.958081930771674e-05, |
| "loss": 0.6008, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.1369472182596291, |
| "grad_norm": 0.44138631224632263, |
| "learning_rate": 9.95744680851064e-05, |
| "loss": 0.5319, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.1388492629576795, |
| "grad_norm": 0.37523001432418823, |
| "learning_rate": 9.956811686249603e-05, |
| "loss": 0.657, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.1407513076557299, |
| "grad_norm": 0.4489665627479553, |
| "learning_rate": 9.956176563988568e-05, |
| "loss": 0.5526, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.14265335235378032, |
| "grad_norm": 0.39318791031837463, |
| "learning_rate": 9.955541441727532e-05, |
| "loss": 0.6046, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.1445553970518307, |
| "grad_norm": 0.4817538261413574, |
| "learning_rate": 9.954906319466499e-05, |
| "loss": 0.5149, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.14645744174988112, |
| "grad_norm": 0.4451163411140442, |
| "learning_rate": 9.954271197205463e-05, |
| "loss": 0.4892, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.14835948644793154, |
| "grad_norm": 0.29836660623550415, |
| "learning_rate": 9.953636074944426e-05, |
| "loss": 0.4005, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.15026153114598192, |
| "grad_norm": 0.3185100555419922, |
| "learning_rate": 9.953000952683393e-05, |
| "loss": 0.4168, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.15216357584403234, |
| "grad_norm": 0.26550424098968506, |
| "learning_rate": 9.952365830422357e-05, |
| "loss": 0.39, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.15406562054208273, |
| "grad_norm": 0.4328240156173706, |
| "learning_rate": 9.951730708161322e-05, |
| "loss": 0.5041, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.15596766524013314, |
| "grad_norm": 0.5178936123847961, |
| "learning_rate": 9.951095585900286e-05, |
| "loss": 0.6017, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.15786970993818356, |
| "grad_norm": 0.45657551288604736, |
| "learning_rate": 9.950460463639251e-05, |
| "loss": 0.5734, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.15977175463623394, |
| "grad_norm": 0.5482913851737976, |
| "learning_rate": 9.949825341378216e-05, |
| "loss": 0.6015, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.16167379933428436, |
| "grad_norm": 0.39362308382987976, |
| "learning_rate": 9.94919021911718e-05, |
| "loss": 0.5712, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.16357584403233477, |
| "grad_norm": 0.4381113350391388, |
| "learning_rate": 9.948555096856145e-05, |
| "loss": 0.5194, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.16547788873038516, |
| "grad_norm": 0.5021312236785889, |
| "learning_rate": 9.94791997459511e-05, |
| "loss": 0.5279, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.16737993342843557, |
| "grad_norm": 0.4364267587661743, |
| "learning_rate": 9.947284852334074e-05, |
| "loss": 0.5892, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.16928197812648596, |
| "grad_norm": 0.37873050570487976, |
| "learning_rate": 9.94664973007304e-05, |
| "loss": 0.5328, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.17118402282453637, |
| "grad_norm": 0.4768919050693512, |
| "learning_rate": 9.946014607812005e-05, |
| "loss": 0.4889, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.1730860675225868, |
| "grad_norm": 0.3834541440010071, |
| "learning_rate": 9.945379485550968e-05, |
| "loss": 0.4642, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.17498811222063718, |
| "grad_norm": 0.48581764101982117, |
| "learning_rate": 9.944744363289934e-05, |
| "loss": 0.4741, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.1768901569186876, |
| "grad_norm": 0.39364808797836304, |
| "learning_rate": 9.944109241028899e-05, |
| "loss": 0.5684, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.178792201616738, |
| "grad_norm": 0.4657204747200012, |
| "learning_rate": 9.943474118767864e-05, |
| "loss": 0.609, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.1806942463147884, |
| "grad_norm": 0.40989887714385986, |
| "learning_rate": 9.942838996506828e-05, |
| "loss": 0.4319, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.1825962910128388, |
| "grad_norm": 0.43797624111175537, |
| "learning_rate": 9.942203874245793e-05, |
| "loss": 0.4997, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.1844983357108892, |
| "grad_norm": 0.3887675106525421, |
| "learning_rate": 9.941568751984758e-05, |
| "loss": 0.5548, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.1864003804089396, |
| "grad_norm": 0.39017003774642944, |
| "learning_rate": 9.940933629723722e-05, |
| "loss": 0.5113, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.18830242510699002, |
| "grad_norm": 0.41409194469451904, |
| "learning_rate": 9.940298507462687e-05, |
| "loss": 0.5496, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.1902044698050404, |
| "grad_norm": 0.34578803181648254, |
| "learning_rate": 9.939663385201652e-05, |
| "loss": 0.4048, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.19210651450309082, |
| "grad_norm": 0.32233092188835144, |
| "learning_rate": 9.939028262940616e-05, |
| "loss": 0.4442, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.19400855920114124, |
| "grad_norm": 0.45841965079307556, |
| "learning_rate": 9.938393140679581e-05, |
| "loss": 0.5646, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.19591060389919163, |
| "grad_norm": 0.3825596272945404, |
| "learning_rate": 9.937758018418547e-05, |
| "loss": 0.4583, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.19781264859724204, |
| "grad_norm": 0.44690102338790894, |
| "learning_rate": 9.93712289615751e-05, |
| "loss": 0.5799, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.19971469329529243, |
| "grad_norm": 0.4881773591041565, |
| "learning_rate": 9.936487773896476e-05, |
| "loss": 0.4094, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.20161673799334284, |
| "grad_norm": 0.4745669960975647, |
| "learning_rate": 9.93585265163544e-05, |
| "loss": 0.6068, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.20351878269139326, |
| "grad_norm": 0.5497081279754639, |
| "learning_rate": 9.935217529374406e-05, |
| "loss": 0.4654, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.20542082738944364, |
| "grad_norm": 0.3564707636833191, |
| "learning_rate": 9.93458240711337e-05, |
| "loss": 0.5678, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.20732287208749406, |
| "grad_norm": 0.446321964263916, |
| "learning_rate": 9.933947284852334e-05, |
| "loss": 0.4503, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.20922491678554447, |
| "grad_norm": 0.4253140389919281, |
| "learning_rate": 9.9333121625913e-05, |
| "loss": 0.538, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.21112696148359486, |
| "grad_norm": 0.4123047888278961, |
| "learning_rate": 9.932677040330264e-05, |
| "loss": 0.4359, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.21302900618164528, |
| "grad_norm": 0.3887772262096405, |
| "learning_rate": 9.932041918069229e-05, |
| "loss": 0.5534, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.21493105087969566, |
| "grad_norm": 0.38153669238090515, |
| "learning_rate": 9.931406795808193e-05, |
| "loss": 0.4296, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.21683309557774608, |
| "grad_norm": 0.43017521500587463, |
| "learning_rate": 9.930771673547158e-05, |
| "loss": 0.5899, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.2187351402757965, |
| "grad_norm": 0.40156394243240356, |
| "learning_rate": 9.930136551286123e-05, |
| "loss": 0.3917, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.22063718497384688, |
| "grad_norm": 0.3576590120792389, |
| "learning_rate": 9.929501429025087e-05, |
| "loss": 0.3908, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.2225392296718973, |
| "grad_norm": 0.33245769143104553, |
| "learning_rate": 9.928866306764054e-05, |
| "loss": 0.4043, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.2244412743699477, |
| "grad_norm": 0.43169739842414856, |
| "learning_rate": 9.928231184503018e-05, |
| "loss": 0.5569, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.2263433190679981, |
| "grad_norm": 0.4004412293434143, |
| "learning_rate": 9.927596062241981e-05, |
| "loss": 0.4931, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.2282453637660485, |
| "grad_norm": 0.3550797998905182, |
| "learning_rate": 9.926960939980947e-05, |
| "loss": 0.4505, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.2301474084640989, |
| "grad_norm": 0.3701287508010864, |
| "learning_rate": 9.926325817719912e-05, |
| "loss": 0.4967, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.2320494531621493, |
| "grad_norm": 0.4120308756828308, |
| "learning_rate": 9.925690695458876e-05, |
| "loss": 0.4408, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.23395149786019973, |
| "grad_norm": 0.4737403392791748, |
| "learning_rate": 9.925055573197841e-05, |
| "loss": 0.7221, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.2358535425582501, |
| "grad_norm": 0.37103158235549927, |
| "learning_rate": 9.924420450936806e-05, |
| "loss": 0.4419, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.23775558725630053, |
| "grad_norm": 0.48644623160362244, |
| "learning_rate": 9.923785328675771e-05, |
| "loss": 0.5006, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.2396576319543509, |
| "grad_norm": 0.3381918966770172, |
| "learning_rate": 9.923150206414735e-05, |
| "loss": 0.4786, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.24155967665240133, |
| "grad_norm": 0.4500490128993988, |
| "learning_rate": 9.9225150841537e-05, |
| "loss": 0.4984, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.24346172135045174, |
| "grad_norm": 0.5506143569946289, |
| "learning_rate": 9.921879961892665e-05, |
| "loss": 0.4857, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.24536376604850213, |
| "grad_norm": 0.4111080467700958, |
| "learning_rate": 9.921244839631629e-05, |
| "loss": 0.4464, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.24726581074655254, |
| "grad_norm": 0.52936851978302, |
| "learning_rate": 9.920609717370594e-05, |
| "loss": 0.5664, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.24916785544460296, |
| "grad_norm": 0.465009480714798, |
| "learning_rate": 9.91997459510956e-05, |
| "loss": 0.4318, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.25106990014265335, |
| "grad_norm": 0.3044665455818176, |
| "learning_rate": 9.919339472848523e-05, |
| "loss": 0.4284, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.25297194484070373, |
| "grad_norm": 0.4849638342857361, |
| "learning_rate": 9.918704350587488e-05, |
| "loss": 0.5956, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.2548739895387542, |
| "grad_norm": 0.4701893925666809, |
| "learning_rate": 9.918069228326454e-05, |
| "loss": 0.4541, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.25677603423680456, |
| "grad_norm": 0.42524924874305725, |
| "learning_rate": 9.917434106065419e-05, |
| "loss": 0.4991, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.25867807893485495, |
| "grad_norm": 0.46284592151641846, |
| "learning_rate": 9.916798983804383e-05, |
| "loss": 0.453, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.2605801236329054, |
| "grad_norm": 0.40281572937965393, |
| "learning_rate": 9.916163861543348e-05, |
| "loss": 0.4771, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.2624821683309558, |
| "grad_norm": 0.425214558839798, |
| "learning_rate": 9.915528739282313e-05, |
| "loss": 0.4665, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.26438421302900617, |
| "grad_norm": 0.4181045889854431, |
| "learning_rate": 9.914893617021277e-05, |
| "loss": 0.5014, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.2662862577270566, |
| "grad_norm": 0.4024779498577118, |
| "learning_rate": 9.914258494760241e-05, |
| "loss": 0.5905, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.268188302425107, |
| "grad_norm": 0.3768770694732666, |
| "learning_rate": 9.913623372499207e-05, |
| "loss": 0.408, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.2700903471231574, |
| "grad_norm": 0.4033905267715454, |
| "learning_rate": 9.912988250238171e-05, |
| "loss": 0.4511, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.2719923918212078, |
| "grad_norm": 0.32505708932876587, |
| "learning_rate": 9.912353127977136e-05, |
| "loss": 0.4395, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.2738944365192582, |
| "grad_norm": 0.3487790822982788, |
| "learning_rate": 9.9117180057161e-05, |
| "loss": 0.3601, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.2757964812173086, |
| "grad_norm": 0.30558326840400696, |
| "learning_rate": 9.911082883455065e-05, |
| "loss": 0.4607, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.277698525915359, |
| "grad_norm": 0.3752080500125885, |
| "learning_rate": 9.91044776119403e-05, |
| "loss": 0.3957, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.2796005706134094, |
| "grad_norm": 0.3506644368171692, |
| "learning_rate": 9.909812638932994e-05, |
| "loss": 0.366, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.2815026153114598, |
| "grad_norm": 0.43430307507514954, |
| "learning_rate": 9.909177516671961e-05, |
| "loss": 0.4542, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.2834046600095102, |
| "grad_norm": 0.41930171847343445, |
| "learning_rate": 9.908542394410925e-05, |
| "loss": 0.709, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.28530670470756064, |
| "grad_norm": 0.3717108964920044, |
| "learning_rate": 9.907907272149888e-05, |
| "loss": 0.4701, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.28720874940561103, |
| "grad_norm": 0.4177984595298767, |
| "learning_rate": 9.907272149888854e-05, |
| "loss": 0.6189, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.2891107941036614, |
| "grad_norm": 0.37706881761550903, |
| "learning_rate": 9.906637027627819e-05, |
| "loss": 0.4546, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.29101283880171186, |
| "grad_norm": 0.4210599660873413, |
| "learning_rate": 9.906001905366784e-05, |
| "loss": 0.4716, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.29291488349976225, |
| "grad_norm": 0.3707990050315857, |
| "learning_rate": 9.905366783105748e-05, |
| "loss": 0.4644, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.29481692819781263, |
| "grad_norm": 0.36913537979125977, |
| "learning_rate": 9.904731660844713e-05, |
| "loss": 0.4605, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.2967189728958631, |
| "grad_norm": 0.41291072964668274, |
| "learning_rate": 9.904096538583678e-05, |
| "loss": 0.4294, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.29862101759391346, |
| "grad_norm": 0.30809640884399414, |
| "learning_rate": 9.903461416322642e-05, |
| "loss": 0.4369, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.30052306229196385, |
| "grad_norm": 0.4266267716884613, |
| "learning_rate": 9.902826294061607e-05, |
| "loss": 0.456, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.3024251069900143, |
| "grad_norm": 0.37408629059791565, |
| "learning_rate": 9.902191171800572e-05, |
| "loss": 0.4359, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.3043271516880647, |
| "grad_norm": 0.40199100971221924, |
| "learning_rate": 9.901556049539536e-05, |
| "loss": 0.4433, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.30622919638611507, |
| "grad_norm": 0.3430602252483368, |
| "learning_rate": 9.900920927278501e-05, |
| "loss": 0.4317, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.30813124108416545, |
| "grad_norm": 0.5091786980628967, |
| "learning_rate": 9.900285805017467e-05, |
| "loss": 0.5824, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.3100332857822159, |
| "grad_norm": 0.34287527203559875, |
| "learning_rate": 9.89965068275643e-05, |
| "loss": 0.4025, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.3119353304802663, |
| "grad_norm": 0.4919246733188629, |
| "learning_rate": 9.899015560495396e-05, |
| "loss": 0.5612, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.31383737517831667, |
| "grad_norm": 0.35404297709465027, |
| "learning_rate": 9.898380438234361e-05, |
| "loss": 0.4731, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.3157394198763671, |
| "grad_norm": 0.3590085506439209, |
| "learning_rate": 9.897745315973326e-05, |
| "loss": 0.4365, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.3176414645744175, |
| "grad_norm": 0.4132196605205536, |
| "learning_rate": 9.89711019371229e-05, |
| "loss": 0.3485, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.3195435092724679, |
| "grad_norm": 0.46459728479385376, |
| "learning_rate": 9.896475071451255e-05, |
| "loss": 0.4327, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.3214455539705183, |
| "grad_norm": 0.435651957988739, |
| "learning_rate": 9.89583994919022e-05, |
| "loss": 0.4684, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.3233475986685687, |
| "grad_norm": 0.38278958201408386, |
| "learning_rate": 9.895204826929184e-05, |
| "loss": 0.4265, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.3252496433666191, |
| "grad_norm": 0.31499558687210083, |
| "learning_rate": 9.894569704668149e-05, |
| "loss": 0.4099, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.32715168806466954, |
| "grad_norm": 0.40141284465789795, |
| "learning_rate": 9.893934582407114e-05, |
| "loss": 0.4461, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.32905373276271993, |
| "grad_norm": 0.42945384979248047, |
| "learning_rate": 9.893299460146078e-05, |
| "loss": 0.4379, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.3309557774607703, |
| "grad_norm": 0.5186269283294678, |
| "learning_rate": 9.892664337885043e-05, |
| "loss": 0.5134, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.33285782215882076, |
| "grad_norm": 0.3771612048149109, |
| "learning_rate": 9.892029215624009e-05, |
| "loss": 0.4617, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.33475986685687115, |
| "grad_norm": 0.48396849632263184, |
| "learning_rate": 9.891394093362972e-05, |
| "loss": 0.4944, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.33666191155492153, |
| "grad_norm": 0.5303121209144592, |
| "learning_rate": 9.890758971101938e-05, |
| "loss": 0.4049, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.3385639562529719, |
| "grad_norm": 0.33063024282455444, |
| "learning_rate": 9.890123848840901e-05, |
| "loss": 0.401, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.34046600095102236, |
| "grad_norm": 0.3764759302139282, |
| "learning_rate": 9.889488726579868e-05, |
| "loss": 0.4222, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.34236804564907275, |
| "grad_norm": 0.27206951379776, |
| "learning_rate": 9.888853604318832e-05, |
| "loss": 0.3206, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.34427009034712314, |
| "grad_norm": 0.3893122971057892, |
| "learning_rate": 9.888218482057796e-05, |
| "loss": 0.3558, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.3461721350451736, |
| "grad_norm": 0.42340540885925293, |
| "learning_rate": 9.887583359796762e-05, |
| "loss": 0.3948, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.34807417974322397, |
| "grad_norm": 0.4103796184062958, |
| "learning_rate": 9.886948237535726e-05, |
| "loss": 0.4769, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.34997622444127435, |
| "grad_norm": 0.39225244522094727, |
| "learning_rate": 9.886313115274691e-05, |
| "loss": 0.441, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.3518782691393248, |
| "grad_norm": 0.3774043023586273, |
| "learning_rate": 9.885677993013655e-05, |
| "loss": 0.3018, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.3537803138373752, |
| "grad_norm": 0.4012366235256195, |
| "learning_rate": 9.88504287075262e-05, |
| "loss": 0.4217, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.35568235853542557, |
| "grad_norm": 0.37299972772598267, |
| "learning_rate": 9.884407748491585e-05, |
| "loss": 0.4518, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.357584403233476, |
| "grad_norm": 0.34713125228881836, |
| "learning_rate": 9.883772626230549e-05, |
| "loss": 0.3882, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.3594864479315264, |
| "grad_norm": 0.4148958623409271, |
| "learning_rate": 9.883137503969516e-05, |
| "loss": 0.4979, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.3613884926295768, |
| "grad_norm": 0.3979155421257019, |
| "learning_rate": 9.88250238170848e-05, |
| "loss": 0.3854, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.36329053732762717, |
| "grad_norm": 0.42723751068115234, |
| "learning_rate": 9.881867259447443e-05, |
| "loss": 0.4325, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.3651925820256776, |
| "grad_norm": 0.4195951521396637, |
| "learning_rate": 9.881232137186409e-05, |
| "loss": 0.3917, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.367094626723728, |
| "grad_norm": 0.43937554955482483, |
| "learning_rate": 9.880597014925374e-05, |
| "loss": 0.3907, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.3689966714217784, |
| "grad_norm": 0.3176072835922241, |
| "learning_rate": 9.879961892664338e-05, |
| "loss": 0.3581, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.37089871611982883, |
| "grad_norm": 0.39909854531288147, |
| "learning_rate": 9.879326770403303e-05, |
| "loss": 0.5881, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.3728007608178792, |
| "grad_norm": 0.35058659315109253, |
| "learning_rate": 9.878691648142268e-05, |
| "loss": 0.4753, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.3747028055159296, |
| "grad_norm": 0.3353765904903412, |
| "learning_rate": 9.878056525881233e-05, |
| "loss": 0.4014, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.37660485021398005, |
| "grad_norm": 0.4102007746696472, |
| "learning_rate": 9.877421403620197e-05, |
| "loss": 0.4841, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.37850689491203043, |
| "grad_norm": 0.45450812578201294, |
| "learning_rate": 9.876786281359162e-05, |
| "loss": 0.4655, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.3804089396100808, |
| "grad_norm": 0.32525572180747986, |
| "learning_rate": 9.876151159098127e-05, |
| "loss": 0.3869, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.38231098430813126, |
| "grad_norm": 0.4488207697868347, |
| "learning_rate": 9.875516036837091e-05, |
| "loss": 0.4743, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.38421302900618165, |
| "grad_norm": 0.432962030172348, |
| "learning_rate": 9.874880914576056e-05, |
| "loss": 0.4171, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.38611507370423204, |
| "grad_norm": 0.4264095723628998, |
| "learning_rate": 9.874245792315022e-05, |
| "loss": 0.4344, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.3880171184022825, |
| "grad_norm": 0.43752139806747437, |
| "learning_rate": 9.873610670053985e-05, |
| "loss": 0.5248, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.38991916310033287, |
| "grad_norm": 0.42547503113746643, |
| "learning_rate": 9.87297554779295e-05, |
| "loss": 0.4011, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.39182120779838325, |
| "grad_norm": 0.34600159525871277, |
| "learning_rate": 9.872340425531916e-05, |
| "loss": 0.3444, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.39372325249643364, |
| "grad_norm": 0.3614776134490967, |
| "learning_rate": 9.871705303270881e-05, |
| "loss": 0.4784, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.3956252971944841, |
| "grad_norm": 0.47591882944107056, |
| "learning_rate": 9.871070181009845e-05, |
| "loss": 0.5159, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.39752734189253447, |
| "grad_norm": 0.3321515917778015, |
| "learning_rate": 9.870435058748809e-05, |
| "loss": 0.4382, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.39942938659058486, |
| "grad_norm": 0.45849499106407166, |
| "learning_rate": 9.869799936487775e-05, |
| "loss": 0.4269, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.4013314312886353, |
| "grad_norm": 0.3666900098323822, |
| "learning_rate": 9.869164814226739e-05, |
| "loss": 0.4077, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.4032334759866857, |
| "grad_norm": 0.3387741446495056, |
| "learning_rate": 9.868529691965703e-05, |
| "loss": 0.4485, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.4051355206847361, |
| "grad_norm": 0.3360239267349243, |
| "learning_rate": 9.86789456970467e-05, |
| "loss": 0.4042, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.4070375653827865, |
| "grad_norm": 0.40923500061035156, |
| "learning_rate": 9.867259447443633e-05, |
| "loss": 0.5001, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.4089396100808369, |
| "grad_norm": 0.3974573314189911, |
| "learning_rate": 9.866624325182598e-05, |
| "loss": 0.4984, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.4108416547788873, |
| "grad_norm": 0.4095960557460785, |
| "learning_rate": 9.865989202921562e-05, |
| "loss": 0.3837, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.41274369947693773, |
| "grad_norm": 0.3334168493747711, |
| "learning_rate": 9.865354080660527e-05, |
| "loss": 0.3935, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.4146457441749881, |
| "grad_norm": 0.5007266998291016, |
| "learning_rate": 9.864718958399493e-05, |
| "loss": 0.4443, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.4165477888730385, |
| "grad_norm": 0.35881495475769043, |
| "learning_rate": 9.864083836138456e-05, |
| "loss": 0.3835, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.41844983357108895, |
| "grad_norm": 0.3785092830657959, |
| "learning_rate": 9.863448713877423e-05, |
| "loss": 0.3884, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.42035187826913933, |
| "grad_norm": 0.41435107588768005, |
| "learning_rate": 9.862813591616387e-05, |
| "loss": 0.4116, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.4222539229671897, |
| "grad_norm": 0.41338756680488586, |
| "learning_rate": 9.86217846935535e-05, |
| "loss": 0.5235, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.4241559676652401, |
| "grad_norm": 0.4335710406303406, |
| "learning_rate": 9.861543347094316e-05, |
| "loss": 0.516, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.42605801236329055, |
| "grad_norm": 0.37374967336654663, |
| "learning_rate": 9.860908224833281e-05, |
| "loss": 0.4663, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.42796005706134094, |
| "grad_norm": 0.3213825821876526, |
| "learning_rate": 9.860273102572246e-05, |
| "loss": 0.3636, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.4298621017593913, |
| "grad_norm": 0.41535523533821106, |
| "learning_rate": 9.85963798031121e-05, |
| "loss": 0.3677, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.43176414645744177, |
| "grad_norm": 0.3543884754180908, |
| "learning_rate": 9.859002858050175e-05, |
| "loss": 0.376, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.43366619115549215, |
| "grad_norm": 0.4012312889099121, |
| "learning_rate": 9.85836773578914e-05, |
| "loss": 0.4886, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.43556823585354254, |
| "grad_norm": 0.3928169310092926, |
| "learning_rate": 9.857732613528104e-05, |
| "loss": 0.3741, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.437470280551593, |
| "grad_norm": 0.4982980191707611, |
| "learning_rate": 9.85709749126707e-05, |
| "loss": 0.5704, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.43937232524964337, |
| "grad_norm": 0.356545090675354, |
| "learning_rate": 9.856462369006035e-05, |
| "loss": 0.3618, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.44127436994769376, |
| "grad_norm": 0.5087487697601318, |
| "learning_rate": 9.855827246744998e-05, |
| "loss": 0.4733, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.4431764146457442, |
| "grad_norm": 0.3566097021102905, |
| "learning_rate": 9.855192124483964e-05, |
| "loss": 0.3771, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.4450784593437946, |
| "grad_norm": 0.3210541605949402, |
| "learning_rate": 9.854557002222929e-05, |
| "loss": 0.4341, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.446980504041845, |
| "grad_norm": 0.25422924757003784, |
| "learning_rate": 9.853921879961893e-05, |
| "loss": 0.3987, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.4488825487398954, |
| "grad_norm": 0.39164894819259644, |
| "learning_rate": 9.853286757700858e-05, |
| "loss": 0.4149, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.4507845934379458, |
| "grad_norm": 0.37471455335617065, |
| "learning_rate": 9.852651635439823e-05, |
| "loss": 0.4471, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.4526866381359962, |
| "grad_norm": 0.37678262591362, |
| "learning_rate": 9.852016513178788e-05, |
| "loss": 0.3943, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.4545886828340466, |
| "grad_norm": 0.4653976857662201, |
| "learning_rate": 9.851381390917752e-05, |
| "loss": 0.4848, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.456490727532097, |
| "grad_norm": 0.46764564514160156, |
| "learning_rate": 9.850746268656717e-05, |
| "loss": 0.4624, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.4583927722301474, |
| "grad_norm": 0.3803463876247406, |
| "learning_rate": 9.850111146395682e-05, |
| "loss": 0.442, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.4602948169281978, |
| "grad_norm": 0.33662229776382446, |
| "learning_rate": 9.849476024134646e-05, |
| "loss": 0.4564, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.46219686162624823, |
| "grad_norm": 0.42181041836738586, |
| "learning_rate": 9.848840901873611e-05, |
| "loss": 0.4702, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.4640989063242986, |
| "grad_norm": 0.40373390913009644, |
| "learning_rate": 9.848205779612576e-05, |
| "loss": 0.3745, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.466000951022349, |
| "grad_norm": 0.36634379625320435, |
| "learning_rate": 9.84757065735154e-05, |
| "loss": 0.428, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.46790299572039945, |
| "grad_norm": 0.35369235277175903, |
| "learning_rate": 9.846935535090506e-05, |
| "loss": 0.3986, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.46980504041844984, |
| "grad_norm": 0.4154004454612732, |
| "learning_rate": 9.846300412829471e-05, |
| "loss": 0.3512, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.4717070851165002, |
| "grad_norm": 0.3689868450164795, |
| "learning_rate": 9.845665290568435e-05, |
| "loss": 0.3708, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.47360912981455067, |
| "grad_norm": 0.38414841890335083, |
| "learning_rate": 9.8450301683074e-05, |
| "loss": 0.3401, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.47551117451260105, |
| "grad_norm": 0.39936143159866333, |
| "learning_rate": 9.844395046046364e-05, |
| "loss": 0.4328, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.47741321921065144, |
| "grad_norm": 0.30578187108039856, |
| "learning_rate": 9.84375992378533e-05, |
| "loss": 0.3694, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.4793152639087018, |
| "grad_norm": 0.39497658610343933, |
| "learning_rate": 9.843124801524294e-05, |
| "loss": 0.3945, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.48121730860675227, |
| "grad_norm": 0.44466689229011536, |
| "learning_rate": 9.842489679263258e-05, |
| "loss": 0.4485, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.48311935330480266, |
| "grad_norm": 0.3614617586135864, |
| "learning_rate": 9.841854557002223e-05, |
| "loss": 0.3701, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.48502139800285304, |
| "grad_norm": 0.3102608621120453, |
| "learning_rate": 9.841219434741188e-05, |
| "loss": 0.3677, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.4869234427009035, |
| "grad_norm": 0.36049678921699524, |
| "learning_rate": 9.840584312480153e-05, |
| "loss": 0.411, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.4888254873989539, |
| "grad_norm": 0.4025668501853943, |
| "learning_rate": 9.839949190219117e-05, |
| "loss": 0.433, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.49072753209700426, |
| "grad_norm": 0.4131562113761902, |
| "learning_rate": 9.839314067958082e-05, |
| "loss": 0.4818, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.4926295767950547, |
| "grad_norm": 0.481468141078949, |
| "learning_rate": 9.838678945697047e-05, |
| "loss": 0.5226, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.4945316214931051, |
| "grad_norm": 0.2845190167427063, |
| "learning_rate": 9.838043823436011e-05, |
| "loss": 0.3323, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.4964336661911555, |
| "grad_norm": 0.40381497144699097, |
| "learning_rate": 9.837408701174976e-05, |
| "loss": 0.4025, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.4983357108892059, |
| "grad_norm": 0.4109043478965759, |
| "learning_rate": 9.836773578913942e-05, |
| "loss": 0.4429, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.5002377555872562, |
| "grad_norm": 0.4256783425807953, |
| "learning_rate": 9.836138456652906e-05, |
| "loss": 0.3994, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.5021398002853067, |
| "grad_norm": 0.35044407844543457, |
| "learning_rate": 9.835503334391871e-05, |
| "loss": 0.4431, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.5040418449833571, |
| "grad_norm": 0.4456939697265625, |
| "learning_rate": 9.834868212130836e-05, |
| "loss": 0.5424, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.5059438896814075, |
| "grad_norm": 0.36340197920799255, |
| "learning_rate": 9.8342330898698e-05, |
| "loss": 0.4199, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.5078459343794579, |
| "grad_norm": 0.4018803536891937, |
| "learning_rate": 9.833597967608765e-05, |
| "loss": 0.4132, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.5097479790775084, |
| "grad_norm": 0.3372616469860077, |
| "learning_rate": 9.83296284534773e-05, |
| "loss": 0.3239, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.5116500237755587, |
| "grad_norm": 0.4497722387313843, |
| "learning_rate": 9.832327723086695e-05, |
| "loss": 0.4019, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.5135520684736091, |
| "grad_norm": 0.422269344329834, |
| "learning_rate": 9.831692600825659e-05, |
| "loss": 0.45, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.5154541131716596, |
| "grad_norm": 0.4167305529117584, |
| "learning_rate": 9.831057478564624e-05, |
| "loss": 0.4172, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.5173561578697099, |
| "grad_norm": 0.4340919554233551, |
| "learning_rate": 9.83042235630359e-05, |
| "loss": 0.5042, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.5192582025677603, |
| "grad_norm": 0.4179072380065918, |
| "learning_rate": 9.829787234042553e-05, |
| "loss": 0.3499, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.5211602472658108, |
| "grad_norm": 0.39216554164886475, |
| "learning_rate": 9.829152111781518e-05, |
| "loss": 0.4729, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.5230622919638611, |
| "grad_norm": 0.4485825002193451, |
| "learning_rate": 9.828516989520484e-05, |
| "loss": 0.4449, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.5249643366619116, |
| "grad_norm": 0.3843270242214203, |
| "learning_rate": 9.827881867259447e-05, |
| "loss": 0.5416, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.526866381359962, |
| "grad_norm": 0.30829140543937683, |
| "learning_rate": 9.827246744998413e-05, |
| "loss": 0.4004, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.5287684260580123, |
| "grad_norm": 0.2905525863170624, |
| "learning_rate": 9.826611622737378e-05, |
| "loss": 0.3574, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.5306704707560628, |
| "grad_norm": 0.3848637342453003, |
| "learning_rate": 9.825976500476343e-05, |
| "loss": 0.4021, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.5325725154541132, |
| "grad_norm": 0.32691988348960876, |
| "learning_rate": 9.825341378215307e-05, |
| "loss": 0.4317, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.5344745601521635, |
| "grad_norm": 0.3506065011024475, |
| "learning_rate": 9.824706255954271e-05, |
| "loss": 0.329, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.536376604850214, |
| "grad_norm": 0.3102387487888336, |
| "learning_rate": 9.824071133693237e-05, |
| "loss": 0.3695, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.5382786495482644, |
| "grad_norm": 0.45750680565834045, |
| "learning_rate": 9.823436011432201e-05, |
| "loss": 0.4232, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.5401806942463148, |
| "grad_norm": 0.297134131193161, |
| "learning_rate": 9.822800889171165e-05, |
| "loss": 0.4137, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.5420827389443652, |
| "grad_norm": 0.3696708679199219, |
| "learning_rate": 9.822165766910131e-05, |
| "loss": 0.4598, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.5439847836424156, |
| "grad_norm": 0.31236112117767334, |
| "learning_rate": 9.821530644649095e-05, |
| "loss": 0.314, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.545886828340466, |
| "grad_norm": 0.3596087694168091, |
| "learning_rate": 9.82089552238806e-05, |
| "loss": 0.4164, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.5477888730385164, |
| "grad_norm": 0.33347079157829285, |
| "learning_rate": 9.820260400127024e-05, |
| "loss": 0.3915, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.5496909177365669, |
| "grad_norm": 0.37818920612335205, |
| "learning_rate": 9.81962527786599e-05, |
| "loss": 0.3994, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.5515929624346172, |
| "grad_norm": 0.3968106806278229, |
| "learning_rate": 9.818990155604955e-05, |
| "loss": 0.3611, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.5534950071326676, |
| "grad_norm": 0.34991270303726196, |
| "learning_rate": 9.818355033343918e-05, |
| "loss": 0.3703, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.555397051830718, |
| "grad_norm": 0.4046263098716736, |
| "learning_rate": 9.817719911082885e-05, |
| "loss": 0.3302, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.5572990965287684, |
| "grad_norm": 0.35804587602615356, |
| "learning_rate": 9.817084788821849e-05, |
| "loss": 0.373, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.5592011412268189, |
| "grad_norm": 0.3538301885128021, |
| "learning_rate": 9.816449666560813e-05, |
| "loss": 0.3482, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.5611031859248692, |
| "grad_norm": 0.36835455894470215, |
| "learning_rate": 9.815814544299778e-05, |
| "loss": 0.3393, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.5630052306229196, |
| "grad_norm": 0.48919835686683655, |
| "learning_rate": 9.815179422038743e-05, |
| "loss": 0.4213, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.5649072753209701, |
| "grad_norm": 0.3472330570220947, |
| "learning_rate": 9.814544299777708e-05, |
| "loss": 0.3996, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.5668093200190204, |
| "grad_norm": 0.428611159324646, |
| "learning_rate": 9.813909177516672e-05, |
| "loss": 0.4524, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.5687113647170708, |
| "grad_norm": 0.4176979959011078, |
| "learning_rate": 9.813274055255637e-05, |
| "loss": 0.3787, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.5706134094151213, |
| "grad_norm": 0.41548797488212585, |
| "learning_rate": 9.812638932994602e-05, |
| "loss": 0.4758, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.5725154541131716, |
| "grad_norm": 0.3926902413368225, |
| "learning_rate": 9.812003810733566e-05, |
| "loss": 0.434, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.5744174988112221, |
| "grad_norm": 0.392846018075943, |
| "learning_rate": 9.811368688472531e-05, |
| "loss": 0.3928, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.5763195435092725, |
| "grad_norm": 0.36347585916519165, |
| "learning_rate": 9.810733566211497e-05, |
| "loss": 0.4264, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.5782215882073228, |
| "grad_norm": 0.4314410090446472, |
| "learning_rate": 9.81009844395046e-05, |
| "loss": 0.4199, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.5801236329053733, |
| "grad_norm": 0.337494820356369, |
| "learning_rate": 9.809463321689426e-05, |
| "loss": 0.4181, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.5820256776034237, |
| "grad_norm": 0.27786335349082947, |
| "learning_rate": 9.808828199428391e-05, |
| "loss": 0.3, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.583927722301474, |
| "grad_norm": 0.37235599756240845, |
| "learning_rate": 9.808193077167355e-05, |
| "loss": 0.3927, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.5858297669995245, |
| "grad_norm": 0.37353670597076416, |
| "learning_rate": 9.80755795490632e-05, |
| "loss": 0.4146, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.5877318116975749, |
| "grad_norm": 0.3919946551322937, |
| "learning_rate": 9.806922832645285e-05, |
| "loss": 0.5055, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.5896338563956253, |
| "grad_norm": 0.45411062240600586, |
| "learning_rate": 9.80628771038425e-05, |
| "loss": 0.5347, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.5915359010936757, |
| "grad_norm": 0.4087005853652954, |
| "learning_rate": 9.805652588123214e-05, |
| "loss": 0.3732, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.5934379457917262, |
| "grad_norm": 0.313297837972641, |
| "learning_rate": 9.805017465862178e-05, |
| "loss": 0.3093, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.5953399904897765, |
| "grad_norm": 0.40149226784706116, |
| "learning_rate": 9.804382343601144e-05, |
| "loss": 0.4404, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.5972420351878269, |
| "grad_norm": 0.34245574474334717, |
| "learning_rate": 9.803747221340108e-05, |
| "loss": 0.4036, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.5991440798858774, |
| "grad_norm": 0.38059449195861816, |
| "learning_rate": 9.803112099079073e-05, |
| "loss": 0.3763, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.6010461245839277, |
| "grad_norm": 0.4539381265640259, |
| "learning_rate": 9.802476976818039e-05, |
| "loss": 0.4551, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.6029481692819781, |
| "grad_norm": 0.4077235460281372, |
| "learning_rate": 9.801841854557002e-05, |
| "loss": 0.4641, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.6048502139800286, |
| "grad_norm": 0.3426643908023834, |
| "learning_rate": 9.801206732295968e-05, |
| "loss": 0.3684, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.6067522586780789, |
| "grad_norm": 0.3042270839214325, |
| "learning_rate": 9.800571610034931e-05, |
| "loss": 0.373, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.6086543033761294, |
| "grad_norm": 0.4373973309993744, |
| "learning_rate": 9.799936487773897e-05, |
| "loss": 0.5442, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.6105563480741797, |
| "grad_norm": 0.385797917842865, |
| "learning_rate": 9.799301365512862e-05, |
| "loss": 0.4218, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.6124583927722301, |
| "grad_norm": 0.33210891485214233, |
| "learning_rate": 9.798666243251826e-05, |
| "loss": 0.3062, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.6143604374702806, |
| "grad_norm": 0.3997063636779785, |
| "learning_rate": 9.798031120990792e-05, |
| "loss": 0.4104, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.6162624821683309, |
| "grad_norm": 0.4837460219860077, |
| "learning_rate": 9.797395998729756e-05, |
| "loss": 0.5271, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.6181645268663813, |
| "grad_norm": 0.36420971155166626, |
| "learning_rate": 9.79676087646872e-05, |
| "loss": 0.4033, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.6200665715644318, |
| "grad_norm": 0.33610865473747253, |
| "learning_rate": 9.796125754207685e-05, |
| "loss": 0.3992, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.6219686162624821, |
| "grad_norm": 0.28999099135398865, |
| "learning_rate": 9.79549063194665e-05, |
| "loss": 0.3675, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.6238706609605326, |
| "grad_norm": 0.359401673078537, |
| "learning_rate": 9.794855509685615e-05, |
| "loss": 0.4363, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.625772705658583, |
| "grad_norm": 0.3948569595813751, |
| "learning_rate": 9.794220387424579e-05, |
| "loss": 0.3698, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.6276747503566333, |
| "grad_norm": 0.3753513991832733, |
| "learning_rate": 9.793585265163544e-05, |
| "loss": 0.4397, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.6295767950546838, |
| "grad_norm": 0.32612451910972595, |
| "learning_rate": 9.79295014290251e-05, |
| "loss": 0.3846, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.6314788397527342, |
| "grad_norm": 0.40796539187431335, |
| "learning_rate": 9.792315020641473e-05, |
| "loss": 0.371, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.6333808844507846, |
| "grad_norm": 0.4358294904232025, |
| "learning_rate": 9.791679898380439e-05, |
| "loss": 0.4052, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.635282929148835, |
| "grad_norm": 0.39615437388420105, |
| "learning_rate": 9.791044776119404e-05, |
| "loss": 0.3686, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.6371849738468854, |
| "grad_norm": 0.32977715134620667, |
| "learning_rate": 9.790409653858368e-05, |
| "loss": 0.4404, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.6390870185449358, |
| "grad_norm": 0.38361093401908875, |
| "learning_rate": 9.789774531597333e-05, |
| "loss": 0.3709, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.6409890632429862, |
| "grad_norm": 0.40280988812446594, |
| "learning_rate": 9.789139409336298e-05, |
| "loss": 0.3322, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.6428911079410367, |
| "grad_norm": 0.3682766854763031, |
| "learning_rate": 9.788504287075262e-05, |
| "loss": 0.4144, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.644793152639087, |
| "grad_norm": 0.39864271879196167, |
| "learning_rate": 9.787869164814227e-05, |
| "loss": 0.4404, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.6466951973371374, |
| "grad_norm": 0.3244321048259735, |
| "learning_rate": 9.787234042553192e-05, |
| "loss": 0.3541, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.6485972420351879, |
| "grad_norm": 0.323403924703598, |
| "learning_rate": 9.786598920292157e-05, |
| "loss": 0.3374, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.6504992867332382, |
| "grad_norm": 0.3881044387817383, |
| "learning_rate": 9.785963798031121e-05, |
| "loss": 0.4415, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.6524013314312886, |
| "grad_norm": 0.35189467668533325, |
| "learning_rate": 9.785328675770086e-05, |
| "loss": 0.401, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.6543033761293391, |
| "grad_norm": 0.3553767800331116, |
| "learning_rate": 9.784693553509052e-05, |
| "loss": 0.456, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.6562054208273894, |
| "grad_norm": 0.3302605152130127, |
| "learning_rate": 9.784058431248015e-05, |
| "loss": 0.472, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.6581074655254399, |
| "grad_norm": 0.4526873826980591, |
| "learning_rate": 9.78342330898698e-05, |
| "loss": 0.3908, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.6600095102234903, |
| "grad_norm": 0.3232348561286926, |
| "learning_rate": 9.782788186725946e-05, |
| "loss": 0.3421, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.6619115549215406, |
| "grad_norm": 0.38508203625679016, |
| "learning_rate": 9.78215306446491e-05, |
| "loss": 0.4093, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.6638135996195911, |
| "grad_norm": 0.3187748193740845, |
| "learning_rate": 9.781517942203875e-05, |
| "loss": 0.4319, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.6657156443176415, |
| "grad_norm": 0.2614807188510895, |
| "learning_rate": 9.78088281994284e-05, |
| "loss": 0.314, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.6676176890156919, |
| "grad_norm": 0.40218180418014526, |
| "learning_rate": 9.780247697681805e-05, |
| "loss": 0.4404, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.6695197337137423, |
| "grad_norm": 0.4016517996788025, |
| "learning_rate": 9.779612575420769e-05, |
| "loss": 0.5063, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.6714217784117926, |
| "grad_norm": 0.3333278000354767, |
| "learning_rate": 9.778977453159733e-05, |
| "loss": 0.2966, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.6733238231098431, |
| "grad_norm": 0.4535547196865082, |
| "learning_rate": 9.778342330898699e-05, |
| "loss": 0.4077, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.6752258678078935, |
| "grad_norm": 0.4180653393268585, |
| "learning_rate": 9.777707208637663e-05, |
| "loss": 0.4554, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.6771279125059438, |
| "grad_norm": 0.43454670906066895, |
| "learning_rate": 9.777072086376627e-05, |
| "loss": 0.4403, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.6790299572039943, |
| "grad_norm": 0.45290321111679077, |
| "learning_rate": 9.776436964115594e-05, |
| "loss": 0.4037, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.6809320019020447, |
| "grad_norm": 0.34165212512016296, |
| "learning_rate": 9.775801841854557e-05, |
| "loss": 0.3044, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.6828340466000951, |
| "grad_norm": 0.435138463973999, |
| "learning_rate": 9.775166719593523e-05, |
| "loss": 0.4293, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.6847360912981455, |
| "grad_norm": 0.36061882972717285, |
| "learning_rate": 9.774531597332486e-05, |
| "loss": 0.4052, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.6866381359961959, |
| "grad_norm": 0.4023354947566986, |
| "learning_rate": 9.773896475071452e-05, |
| "loss": 0.4232, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.6885401806942463, |
| "grad_norm": 0.39200109243392944, |
| "learning_rate": 9.773261352810417e-05, |
| "loss": 0.3882, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.6904422253922967, |
| "grad_norm": 0.34504035115242004, |
| "learning_rate": 9.77262623054938e-05, |
| "loss": 0.4063, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.6923442700903472, |
| "grad_norm": 0.31081900000572205, |
| "learning_rate": 9.771991108288346e-05, |
| "loss": 0.251, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.6942463147883975, |
| "grad_norm": 0.3800300061702728, |
| "learning_rate": 9.771355986027311e-05, |
| "loss": 0.3722, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.6961483594864479, |
| "grad_norm": 0.3476494550704956, |
| "learning_rate": 9.770720863766275e-05, |
| "loss": 0.382, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.6980504041844984, |
| "grad_norm": 0.38069918751716614, |
| "learning_rate": 9.77008574150524e-05, |
| "loss": 0.4329, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.6999524488825487, |
| "grad_norm": 0.4034759998321533, |
| "learning_rate": 9.769450619244205e-05, |
| "loss": 0.4112, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.7018544935805991, |
| "grad_norm": 0.4232093393802643, |
| "learning_rate": 9.76881549698317e-05, |
| "loss": 0.4524, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.7037565382786496, |
| "grad_norm": 0.40627321600914, |
| "learning_rate": 9.768180374722134e-05, |
| "loss": 0.388, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.7056585829766999, |
| "grad_norm": 0.41021519899368286, |
| "learning_rate": 9.767545252461099e-05, |
| "loss": 0.3741, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.7075606276747504, |
| "grad_norm": 0.3615809679031372, |
| "learning_rate": 9.766910130200065e-05, |
| "loss": 0.4432, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.7094626723728008, |
| "grad_norm": 0.3088645935058594, |
| "learning_rate": 9.766275007939028e-05, |
| "loss": 0.3343, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.7113647170708511, |
| "grad_norm": 0.380659818649292, |
| "learning_rate": 9.765639885677994e-05, |
| "loss": 0.4092, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.7132667617689016, |
| "grad_norm": 0.28462380170822144, |
| "learning_rate": 9.765004763416959e-05, |
| "loss": 0.31, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.715168806466952, |
| "grad_norm": 0.3215513229370117, |
| "learning_rate": 9.764369641155923e-05, |
| "loss": 0.4115, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.7170708511650024, |
| "grad_norm": 0.397651731967926, |
| "learning_rate": 9.763734518894888e-05, |
| "loss": 0.4369, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.7189728958630528, |
| "grad_norm": 0.31436121463775635, |
| "learning_rate": 9.763099396633853e-05, |
| "loss": 0.4339, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.7208749405611032, |
| "grad_norm": 0.4024806320667267, |
| "learning_rate": 9.762464274372817e-05, |
| "loss": 0.4252, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.7227769852591536, |
| "grad_norm": 0.37994107604026794, |
| "learning_rate": 9.761829152111782e-05, |
| "loss": 0.3483, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.724679029957204, |
| "grad_norm": 0.44616061449050903, |
| "learning_rate": 9.761194029850747e-05, |
| "loss": 0.3809, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.7265810746552543, |
| "grad_norm": 0.3396744728088379, |
| "learning_rate": 9.760558907589712e-05, |
| "loss": 0.3382, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.7284831193533048, |
| "grad_norm": 0.334839791059494, |
| "learning_rate": 9.759923785328676e-05, |
| "loss": 0.3465, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.7303851640513552, |
| "grad_norm": 0.417478084564209, |
| "learning_rate": 9.75928866306764e-05, |
| "loss": 0.3191, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.7322872087494056, |
| "grad_norm": 0.30790823698043823, |
| "learning_rate": 9.758653540806606e-05, |
| "loss": 0.3139, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.734189253447456, |
| "grad_norm": 0.4008057415485382, |
| "learning_rate": 9.75801841854557e-05, |
| "loss": 0.419, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.7360912981455064, |
| "grad_norm": 0.42966723442077637, |
| "learning_rate": 9.757383296284535e-05, |
| "loss": 0.3634, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.7379933428435568, |
| "grad_norm": 0.33789002895355225, |
| "learning_rate": 9.7567481740235e-05, |
| "loss": 0.3966, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.7398953875416072, |
| "grad_norm": 0.35244229435920715, |
| "learning_rate": 9.756113051762464e-05, |
| "loss": 0.3991, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.7417974322396577, |
| "grad_norm": 0.3581864833831787, |
| "learning_rate": 9.75547792950143e-05, |
| "loss": 0.347, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.743699476937708, |
| "grad_norm": 0.30788975954055786, |
| "learning_rate": 9.754842807240394e-05, |
| "loss": 0.3485, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.7456015216357584, |
| "grad_norm": 0.5155593156814575, |
| "learning_rate": 9.754207684979359e-05, |
| "loss": 0.4793, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.7475035663338089, |
| "grad_norm": 0.4183029532432556, |
| "learning_rate": 9.753572562718324e-05, |
| "loss": 0.4064, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.7494056110318592, |
| "grad_norm": 0.36132046580314636, |
| "learning_rate": 9.752937440457288e-05, |
| "loss": 0.3539, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.7513076557299097, |
| "grad_norm": 0.4269217252731323, |
| "learning_rate": 9.752302318196254e-05, |
| "loss": 0.4358, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.7532097004279601, |
| "grad_norm": 0.38872459530830383, |
| "learning_rate": 9.751667195935218e-05, |
| "loss": 0.3238, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.7551117451260104, |
| "grad_norm": 0.4668743312358856, |
| "learning_rate": 9.751032073674182e-05, |
| "loss": 0.4218, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.7570137898240609, |
| "grad_norm": 0.3817143738269806, |
| "learning_rate": 9.750396951413147e-05, |
| "loss": 0.4332, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.7589158345221113, |
| "grad_norm": 0.4089401960372925, |
| "learning_rate": 9.749761829152112e-05, |
| "loss": 0.319, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.7608178792201616, |
| "grad_norm": 0.36516866087913513, |
| "learning_rate": 9.749126706891077e-05, |
| "loss": 0.3858, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.7627199239182121, |
| "grad_norm": 0.3843027949333191, |
| "learning_rate": 9.748491584630041e-05, |
| "loss": 0.4682, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.7646219686162625, |
| "grad_norm": 0.36987295746803284, |
| "learning_rate": 9.747856462369006e-05, |
| "loss": 0.3328, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.7665240133143129, |
| "grad_norm": 0.4972301721572876, |
| "learning_rate": 9.747221340107972e-05, |
| "loss": 0.3939, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.7684260580123633, |
| "grad_norm": 0.4319972097873688, |
| "learning_rate": 9.746586217846935e-05, |
| "loss": 0.3918, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.7703281027104137, |
| "grad_norm": 0.364364892244339, |
| "learning_rate": 9.7459510955859e-05, |
| "loss": 0.3871, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.7722301474084641, |
| "grad_norm": 0.43767908215522766, |
| "learning_rate": 9.745315973324866e-05, |
| "loss": 0.3973, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.7741321921065145, |
| "grad_norm": 0.44734928011894226, |
| "learning_rate": 9.74468085106383e-05, |
| "loss": 0.3884, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.776034236804565, |
| "grad_norm": 0.3817954957485199, |
| "learning_rate": 9.744045728802795e-05, |
| "loss": 0.3647, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.7779362815026153, |
| "grad_norm": 0.3619462251663208, |
| "learning_rate": 9.74341060654176e-05, |
| "loss": 0.4994, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.7798383262006657, |
| "grad_norm": 0.38225993514060974, |
| "learning_rate": 9.742775484280724e-05, |
| "loss": 0.4116, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.7817403708987162, |
| "grad_norm": 0.39784252643585205, |
| "learning_rate": 9.742140362019689e-05, |
| "loss": 0.3729, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.7836424155967665, |
| "grad_norm": 0.3188072443008423, |
| "learning_rate": 9.741505239758654e-05, |
| "loss": 0.3767, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.785544460294817, |
| "grad_norm": 0.4509223401546478, |
| "learning_rate": 9.74087011749762e-05, |
| "loss": 0.4595, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.7874465049928673, |
| "grad_norm": 0.40249937772750854, |
| "learning_rate": 9.740234995236583e-05, |
| "loss": 0.3761, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.7893485496909177, |
| "grad_norm": 0.3387410044670105, |
| "learning_rate": 9.739599872975547e-05, |
| "loss": 0.401, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.7912505943889682, |
| "grad_norm": 0.47670629620552063, |
| "learning_rate": 9.738964750714514e-05, |
| "loss": 0.3656, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.7931526390870185, |
| "grad_norm": 0.37239211797714233, |
| "learning_rate": 9.738329628453477e-05, |
| "loss": 0.4885, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.7950546837850689, |
| "grad_norm": 0.3347351849079132, |
| "learning_rate": 9.737694506192443e-05, |
| "loss": 0.291, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.7969567284831194, |
| "grad_norm": 0.3727717399597168, |
| "learning_rate": 9.737059383931408e-05, |
| "loss": 0.3506, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.7988587731811697, |
| "grad_norm": 0.3866841793060303, |
| "learning_rate": 9.736424261670372e-05, |
| "loss": 0.4355, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.8007608178792202, |
| "grad_norm": 0.39670372009277344, |
| "learning_rate": 9.735789139409337e-05, |
| "loss": 0.4041, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.8026628625772706, |
| "grad_norm": 0.35946765542030334, |
| "learning_rate": 9.7351540171483e-05, |
| "loss": 0.3378, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.8045649072753209, |
| "grad_norm": 0.24180381000041962, |
| "learning_rate": 9.734518894887267e-05, |
| "loss": 0.3133, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.8064669519733714, |
| "grad_norm": 0.4238085150718689, |
| "learning_rate": 9.733883772626231e-05, |
| "loss": 0.3968, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.8083689966714218, |
| "grad_norm": 0.35451412200927734, |
| "learning_rate": 9.733248650365195e-05, |
| "loss": 0.3456, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.8102710413694721, |
| "grad_norm": 0.49277418851852417, |
| "learning_rate": 9.732613528104161e-05, |
| "loss": 0.3916, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.8121730860675226, |
| "grad_norm": 0.34536874294281006, |
| "learning_rate": 9.731978405843125e-05, |
| "loss": 0.537, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.814075130765573, |
| "grad_norm": 0.3002311885356903, |
| "learning_rate": 9.731343283582089e-05, |
| "loss": 0.3842, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.8159771754636234, |
| "grad_norm": 0.29766812920570374, |
| "learning_rate": 9.730708161321054e-05, |
| "loss": 0.2979, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.8178792201616738, |
| "grad_norm": 0.34347230195999146, |
| "learning_rate": 9.73007303906002e-05, |
| "loss": 0.3996, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.8197812648597242, |
| "grad_norm": 0.42430102825164795, |
| "learning_rate": 9.729437916798985e-05, |
| "loss": 0.4677, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.8216833095577746, |
| "grad_norm": 0.3375668227672577, |
| "learning_rate": 9.728802794537948e-05, |
| "loss": 0.4257, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.823585354255825, |
| "grad_norm": 0.3718586266040802, |
| "learning_rate": 9.728167672276914e-05, |
| "loss": 0.3555, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.8254873989538755, |
| "grad_norm": 0.4310496151447296, |
| "learning_rate": 9.727532550015879e-05, |
| "loss": 0.4026, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.8273894436519258, |
| "grad_norm": 0.43832001090049744, |
| "learning_rate": 9.726897427754843e-05, |
| "loss": 0.4421, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.8292914883499762, |
| "grad_norm": 0.42209911346435547, |
| "learning_rate": 9.726262305493808e-05, |
| "loss": 0.397, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.8311935330480267, |
| "grad_norm": 0.4297396242618561, |
| "learning_rate": 9.725627183232773e-05, |
| "loss": 0.4244, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.833095577746077, |
| "grad_norm": 0.40587079524993896, |
| "learning_rate": 9.724992060971737e-05, |
| "loss": 0.3753, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.8349976224441275, |
| "grad_norm": 0.4127040505409241, |
| "learning_rate": 9.724356938710702e-05, |
| "loss": 0.3926, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.8368996671421779, |
| "grad_norm": 0.3734678030014038, |
| "learning_rate": 9.723721816449667e-05, |
| "loss": 0.3338, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.8388017118402282, |
| "grad_norm": 0.38152286410331726, |
| "learning_rate": 9.723086694188632e-05, |
| "loss": 0.3893, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.8407037565382787, |
| "grad_norm": 0.4234791398048401, |
| "learning_rate": 9.722451571927596e-05, |
| "loss": 0.3104, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.842605801236329, |
| "grad_norm": 0.49204525351524353, |
| "learning_rate": 9.721816449666561e-05, |
| "loss": 0.3698, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.8445078459343794, |
| "grad_norm": 0.40980932116508484, |
| "learning_rate": 9.721181327405527e-05, |
| "loss": 0.3901, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.8464098906324299, |
| "grad_norm": 0.3330426514148712, |
| "learning_rate": 9.72054620514449e-05, |
| "loss": 0.3118, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.8483119353304802, |
| "grad_norm": 0.3042624890804291, |
| "learning_rate": 9.719911082883456e-05, |
| "loss": 0.3003, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.8502139800285307, |
| "grad_norm": 0.34576475620269775, |
| "learning_rate": 9.719275960622421e-05, |
| "loss": 0.3332, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.8521160247265811, |
| "grad_norm": 0.2980082035064697, |
| "learning_rate": 9.718640838361385e-05, |
| "loss": 0.3285, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.8540180694246314, |
| "grad_norm": 0.31439459323883057, |
| "learning_rate": 9.71800571610035e-05, |
| "loss": 0.3178, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.8559201141226819, |
| "grad_norm": 0.37447845935821533, |
| "learning_rate": 9.717370593839315e-05, |
| "loss": 0.3861, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.8578221588207323, |
| "grad_norm": 0.4261024594306946, |
| "learning_rate": 9.716735471578279e-05, |
| "loss": 0.4377, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.8597242035187826, |
| "grad_norm": 0.3328630328178406, |
| "learning_rate": 9.716100349317244e-05, |
| "loss": 0.2791, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.8616262482168331, |
| "grad_norm": 0.41943463683128357, |
| "learning_rate": 9.715465227056209e-05, |
| "loss": 0.4693, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.8635282929148835, |
| "grad_norm": 0.4295640289783478, |
| "learning_rate": 9.714830104795174e-05, |
| "loss": 0.4105, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.8654303376129339, |
| "grad_norm": 0.3548508882522583, |
| "learning_rate": 9.714194982534138e-05, |
| "loss": 0.3024, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.8673323823109843, |
| "grad_norm": 0.5577777624130249, |
| "learning_rate": 9.713559860273102e-05, |
| "loss": 0.3961, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.8692344270090347, |
| "grad_norm": 0.4119040071964264, |
| "learning_rate": 9.712924738012069e-05, |
| "loss": 0.3143, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.8711364717070851, |
| "grad_norm": 0.40272560715675354, |
| "learning_rate": 9.712289615751032e-05, |
| "loss": 0.3452, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.8730385164051355, |
| "grad_norm": 0.456386536359787, |
| "learning_rate": 9.711654493489998e-05, |
| "loss": 0.403, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.874940561103186, |
| "grad_norm": 0.3982544541358948, |
| "learning_rate": 9.711019371228963e-05, |
| "loss": 0.4498, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.8768426058012363, |
| "grad_norm": 0.29361623525619507, |
| "learning_rate": 9.710384248967927e-05, |
| "loss": 0.3724, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.8787446504992867, |
| "grad_norm": 0.3854773938655853, |
| "learning_rate": 9.709749126706892e-05, |
| "loss": 0.4162, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.8806466951973372, |
| "grad_norm": 0.3760225474834442, |
| "learning_rate": 9.709114004445856e-05, |
| "loss": 0.4335, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.8825487398953875, |
| "grad_norm": 0.4936290383338928, |
| "learning_rate": 9.708478882184821e-05, |
| "loss": 0.3522, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.884450784593438, |
| "grad_norm": 0.3584468364715576, |
| "learning_rate": 9.707843759923786e-05, |
| "loss": 0.552, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.8863528292914884, |
| "grad_norm": 0.3523949086666107, |
| "learning_rate": 9.70720863766275e-05, |
| "loss": 0.3498, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.8882548739895387, |
| "grad_norm": 0.42082804441452026, |
| "learning_rate": 9.706573515401716e-05, |
| "loss": 0.4863, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.8901569186875892, |
| "grad_norm": 0.4284763038158417, |
| "learning_rate": 9.70593839314068e-05, |
| "loss": 0.4737, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.8920589633856396, |
| "grad_norm": 0.3609261214733124, |
| "learning_rate": 9.705303270879644e-05, |
| "loss": 0.3208, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.89396100808369, |
| "grad_norm": 0.31832849979400635, |
| "learning_rate": 9.704668148618609e-05, |
| "loss": 0.2545, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.8958630527817404, |
| "grad_norm": 0.38202738761901855, |
| "learning_rate": 9.704033026357574e-05, |
| "loss": 0.3952, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.8977650974797908, |
| "grad_norm": 0.347649484872818, |
| "learning_rate": 9.70339790409654e-05, |
| "loss": 0.3776, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.8996671421778412, |
| "grad_norm": 0.41626760363578796, |
| "learning_rate": 9.702762781835503e-05, |
| "loss": 0.4152, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.9015691868758916, |
| "grad_norm": 0.4042579233646393, |
| "learning_rate": 9.702127659574469e-05, |
| "loss": 0.3813, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.9034712315739419, |
| "grad_norm": 0.38196825981140137, |
| "learning_rate": 9.701492537313434e-05, |
| "loss": 0.4398, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.9053732762719924, |
| "grad_norm": 0.3867753744125366, |
| "learning_rate": 9.700857415052398e-05, |
| "loss": 0.4995, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.9072753209700428, |
| "grad_norm": 0.34228166937828064, |
| "learning_rate": 9.700222292791363e-05, |
| "loss": 0.284, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.9091773656680932, |
| "grad_norm": 0.3962937593460083, |
| "learning_rate": 9.699587170530328e-05, |
| "loss": 0.3501, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.9110794103661436, |
| "grad_norm": 0.3665268123149872, |
| "learning_rate": 9.698952048269292e-05, |
| "loss": 0.2737, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.912981455064194, |
| "grad_norm": 0.3775653839111328, |
| "learning_rate": 9.698316926008257e-05, |
| "loss": 0.3173, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.9148834997622444, |
| "grad_norm": 0.3584369421005249, |
| "learning_rate": 9.697681803747222e-05, |
| "loss": 0.3055, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.9167855444602948, |
| "grad_norm": 0.3510100245475769, |
| "learning_rate": 9.697046681486186e-05, |
| "loss": 0.3278, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.9186875891583453, |
| "grad_norm": 0.33394765853881836, |
| "learning_rate": 9.696411559225151e-05, |
| "loss": 0.2954, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.9205896338563956, |
| "grad_norm": 0.437014102935791, |
| "learning_rate": 9.695776436964116e-05, |
| "loss": 0.3797, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.922491678554446, |
| "grad_norm": 0.37421244382858276, |
| "learning_rate": 9.695141314703082e-05, |
| "loss": 0.3521, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.9243937232524965, |
| "grad_norm": 0.37696099281311035, |
| "learning_rate": 9.694506192442045e-05, |
| "loss": 0.3455, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.9262957679505468, |
| "grad_norm": 0.5452500581741333, |
| "learning_rate": 9.693871070181009e-05, |
| "loss": 0.3624, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.9281978126485972, |
| "grad_norm": 0.4049624502658844, |
| "learning_rate": 9.693235947919976e-05, |
| "loss": 0.4017, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.9300998573466477, |
| "grad_norm": 0.32757866382598877, |
| "learning_rate": 9.69260082565894e-05, |
| "loss": 0.3536, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.932001902044698, |
| "grad_norm": 0.298367977142334, |
| "learning_rate": 9.691965703397905e-05, |
| "loss": 0.3374, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.9339039467427485, |
| "grad_norm": 0.22035005688667297, |
| "learning_rate": 9.69133058113687e-05, |
| "loss": 0.2855, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.9358059914407989, |
| "grad_norm": 0.43000441789627075, |
| "learning_rate": 9.690695458875834e-05, |
| "loss": 0.4544, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.9377080361388492, |
| "grad_norm": 0.28024253249168396, |
| "learning_rate": 9.690060336614799e-05, |
| "loss": 0.308, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.9396100808368997, |
| "grad_norm": 0.53145432472229, |
| "learning_rate": 9.689425214353763e-05, |
| "loss": 0.4569, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.9415121255349501, |
| "grad_norm": 0.4006127715110779, |
| "learning_rate": 9.688790092092729e-05, |
| "loss": 0.419, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.9434141702330004, |
| "grad_norm": 0.4057261645793915, |
| "learning_rate": 9.688154969831693e-05, |
| "loss": 0.3553, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.9453162149310509, |
| "grad_norm": 0.40803465247154236, |
| "learning_rate": 9.687519847570657e-05, |
| "loss": 0.3735, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.9472182596291013, |
| "grad_norm": 0.34222155809402466, |
| "learning_rate": 9.686884725309623e-05, |
| "loss": 0.367, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.9491203043271517, |
| "grad_norm": 0.40403544902801514, |
| "learning_rate": 9.686249603048587e-05, |
| "loss": 0.416, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.9510223490252021, |
| "grad_norm": 0.33636951446533203, |
| "learning_rate": 9.685614480787551e-05, |
| "loss": 0.3423, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.9529243937232525, |
| "grad_norm": 0.3394258916378021, |
| "learning_rate": 9.684979358526516e-05, |
| "loss": 0.3282, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.9548264384213029, |
| "grad_norm": 0.3682473599910736, |
| "learning_rate": 9.684344236265482e-05, |
| "loss": 0.406, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.9567284831193533, |
| "grad_norm": 0.35073623061180115, |
| "learning_rate": 9.683709114004447e-05, |
| "loss": 0.376, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.9586305278174037, |
| "grad_norm": 0.36000022292137146, |
| "learning_rate": 9.68307399174341e-05, |
| "loss": 0.3969, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.9605325725154541, |
| "grad_norm": 0.361158162355423, |
| "learning_rate": 9.682438869482376e-05, |
| "loss": 0.347, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.9624346172135045, |
| "grad_norm": 0.3075178265571594, |
| "learning_rate": 9.681803747221341e-05, |
| "loss": 0.4362, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.9643366619115549, |
| "grad_norm": 0.30084747076034546, |
| "learning_rate": 9.681168624960305e-05, |
| "loss": 0.3563, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.9662387066096053, |
| "grad_norm": 0.3221014440059662, |
| "learning_rate": 9.68053350269927e-05, |
| "loss": 0.3366, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.9681407513076558, |
| "grad_norm": 0.36464688181877136, |
| "learning_rate": 9.679898380438235e-05, |
| "loss": 0.3992, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.9700427960057061, |
| "grad_norm": 0.32443803548812866, |
| "learning_rate": 9.679263258177199e-05, |
| "loss": 0.3293, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.9719448407037565, |
| "grad_norm": 0.3689454197883606, |
| "learning_rate": 9.678628135916164e-05, |
| "loss": 0.3546, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.973846885401807, |
| "grad_norm": 0.3754975199699402, |
| "learning_rate": 9.677993013655129e-05, |
| "loss": 0.3856, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.9757489300998573, |
| "grad_norm": 0.3642953634262085, |
| "learning_rate": 9.677357891394094e-05, |
| "loss": 0.4326, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.9776509747979077, |
| "grad_norm": 0.43278223276138306, |
| "learning_rate": 9.676722769133058e-05, |
| "loss": 0.3964, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.9795530194959582, |
| "grad_norm": 0.43771886825561523, |
| "learning_rate": 9.676087646872023e-05, |
| "loss": 0.3861, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.9814550641940085, |
| "grad_norm": 0.34908977150917053, |
| "learning_rate": 9.675452524610989e-05, |
| "loss": 0.3981, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.983357108892059, |
| "grad_norm": 0.35733312368392944, |
| "learning_rate": 9.674817402349953e-05, |
| "loss": 0.3636, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.9852591535901094, |
| "grad_norm": 0.3636298179626465, |
| "learning_rate": 9.674182280088918e-05, |
| "loss": 0.4336, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.9871611982881597, |
| "grad_norm": 0.32771605253219604, |
| "learning_rate": 9.673547157827883e-05, |
| "loss": 0.3481, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.9890632429862102, |
| "grad_norm": 0.40213117003440857, |
| "learning_rate": 9.672912035566847e-05, |
| "loss": 0.3707, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.9909652876842606, |
| "grad_norm": 0.3386654257774353, |
| "learning_rate": 9.672276913305812e-05, |
| "loss": 0.3384, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.992867332382311, |
| "grad_norm": 0.3965696096420288, |
| "learning_rate": 9.671641791044777e-05, |
| "loss": 0.3595, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.9947693770803614, |
| "grad_norm": 0.38238459825515747, |
| "learning_rate": 9.671006668783741e-05, |
| "loss": 0.3714, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.9966714217784118, |
| "grad_norm": 0.3248405456542969, |
| "learning_rate": 9.670371546522706e-05, |
| "loss": 0.394, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.9985734664764622, |
| "grad_norm": 0.3902266323566437, |
| "learning_rate": 9.66973642426167e-05, |
| "loss": 0.4115, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.0004755111745125, |
| "grad_norm": 0.4164808392524719, |
| "learning_rate": 9.669101302000636e-05, |
| "loss": 0.2972, |
| "step": 526 |
| }, |
| { |
| "epoch": 1.002377555872563, |
| "grad_norm": 0.33123117685317993, |
| "learning_rate": 9.6684661797396e-05, |
| "loss": 0.3211, |
| "step": 527 |
| }, |
| { |
| "epoch": 1.0042796005706134, |
| "grad_norm": 0.322803258895874, |
| "learning_rate": 9.667831057478564e-05, |
| "loss": 0.3424, |
| "step": 528 |
| }, |
| { |
| "epoch": 1.0061816452686638, |
| "grad_norm": 0.29135918617248535, |
| "learning_rate": 9.66719593521753e-05, |
| "loss": 0.2882, |
| "step": 529 |
| }, |
| { |
| "epoch": 1.0080836899667143, |
| "grad_norm": 0.3367983400821686, |
| "learning_rate": 9.666560812956494e-05, |
| "loss": 0.2776, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.0099857346647647, |
| "grad_norm": 0.304070383310318, |
| "learning_rate": 9.66592569069546e-05, |
| "loss": 0.249, |
| "step": 531 |
| }, |
| { |
| "epoch": 1.011887779362815, |
| "grad_norm": 0.3832727372646332, |
| "learning_rate": 9.665290568434423e-05, |
| "loss": 0.3118, |
| "step": 532 |
| }, |
| { |
| "epoch": 1.0137898240608654, |
| "grad_norm": 0.3365418612957001, |
| "learning_rate": 9.664655446173389e-05, |
| "loss": 0.197, |
| "step": 533 |
| }, |
| { |
| "epoch": 1.0156918687589158, |
| "grad_norm": 0.4367881119251251, |
| "learning_rate": 9.664020323912354e-05, |
| "loss": 0.3121, |
| "step": 534 |
| }, |
| { |
| "epoch": 1.0175939134569663, |
| "grad_norm": 0.43158653378486633, |
| "learning_rate": 9.663385201651318e-05, |
| "loss": 0.3543, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.0194959581550167, |
| "grad_norm": 0.43556904792785645, |
| "learning_rate": 9.662750079390283e-05, |
| "loss": 0.3121, |
| "step": 536 |
| }, |
| { |
| "epoch": 1.0213980028530671, |
| "grad_norm": 0.31828534603118896, |
| "learning_rate": 9.662114957129248e-05, |
| "loss": 0.24, |
| "step": 537 |
| }, |
| { |
| "epoch": 1.0233000475511174, |
| "grad_norm": 0.3935330808162689, |
| "learning_rate": 9.661479834868212e-05, |
| "loss": 0.2548, |
| "step": 538 |
| }, |
| { |
| "epoch": 1.0252020922491678, |
| "grad_norm": 0.3288602828979492, |
| "learning_rate": 9.660844712607177e-05, |
| "loss": 0.2219, |
| "step": 539 |
| }, |
| { |
| "epoch": 1.0271041369472182, |
| "grad_norm": 0.36314669251441956, |
| "learning_rate": 9.660209590346142e-05, |
| "loss": 0.2817, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.0290061816452687, |
| "grad_norm": 0.3528159558773041, |
| "learning_rate": 9.659574468085106e-05, |
| "loss": 0.2989, |
| "step": 541 |
| }, |
| { |
| "epoch": 1.0309082263433191, |
| "grad_norm": 0.3235621750354767, |
| "learning_rate": 9.658939345824071e-05, |
| "loss": 0.2443, |
| "step": 542 |
| }, |
| { |
| "epoch": 1.0328102710413696, |
| "grad_norm": 0.3819037675857544, |
| "learning_rate": 9.658304223563036e-05, |
| "loss": 0.3494, |
| "step": 543 |
| }, |
| { |
| "epoch": 1.0347123157394198, |
| "grad_norm": 0.3885079324245453, |
| "learning_rate": 9.657669101302002e-05, |
| "loss": 0.3033, |
| "step": 544 |
| }, |
| { |
| "epoch": 1.0366143604374702, |
| "grad_norm": 0.3339099884033203, |
| "learning_rate": 9.657033979040965e-05, |
| "loss": 0.2673, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.0385164051355207, |
| "grad_norm": 0.37009695172309875, |
| "learning_rate": 9.65639885677993e-05, |
| "loss": 0.3715, |
| "step": 546 |
| }, |
| { |
| "epoch": 1.0404184498335711, |
| "grad_norm": 0.3462003171443939, |
| "learning_rate": 9.655763734518896e-05, |
| "loss": 0.2664, |
| "step": 547 |
| }, |
| { |
| "epoch": 1.0423204945316216, |
| "grad_norm": 0.3916226327419281, |
| "learning_rate": 9.65512861225786e-05, |
| "loss": 0.3804, |
| "step": 548 |
| }, |
| { |
| "epoch": 1.044222539229672, |
| "grad_norm": 0.3801763951778412, |
| "learning_rate": 9.654493489996825e-05, |
| "loss": 0.2672, |
| "step": 549 |
| }, |
| { |
| "epoch": 1.0461245839277222, |
| "grad_norm": 0.37406545877456665, |
| "learning_rate": 9.65385836773579e-05, |
| "loss": 0.6203, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.0480266286257727, |
| "grad_norm": 0.43677276372909546, |
| "learning_rate": 9.653223245474754e-05, |
| "loss": 0.3866, |
| "step": 551 |
| }, |
| { |
| "epoch": 1.0499286733238231, |
| "grad_norm": 0.26939406991004944, |
| "learning_rate": 9.652588123213719e-05, |
| "loss": 0.2169, |
| "step": 552 |
| }, |
| { |
| "epoch": 1.0518307180218736, |
| "grad_norm": 0.41554608941078186, |
| "learning_rate": 9.651953000952684e-05, |
| "loss": 0.3705, |
| "step": 553 |
| }, |
| { |
| "epoch": 1.053732762719924, |
| "grad_norm": 0.3090009391307831, |
| "learning_rate": 9.651317878691648e-05, |
| "loss": 0.2471, |
| "step": 554 |
| }, |
| { |
| "epoch": 1.0556348074179742, |
| "grad_norm": 0.36705514788627625, |
| "learning_rate": 9.650682756430613e-05, |
| "loss": 0.2764, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.0575368521160247, |
| "grad_norm": 0.39900127053260803, |
| "learning_rate": 9.650047634169578e-05, |
| "loss": 0.2836, |
| "step": 556 |
| }, |
| { |
| "epoch": 1.059438896814075, |
| "grad_norm": 0.31405431032180786, |
| "learning_rate": 9.649412511908544e-05, |
| "loss": 0.2464, |
| "step": 557 |
| }, |
| { |
| "epoch": 1.0613409415121255, |
| "grad_norm": 0.39795488119125366, |
| "learning_rate": 9.648777389647507e-05, |
| "loss": 0.283, |
| "step": 558 |
| }, |
| { |
| "epoch": 1.063242986210176, |
| "grad_norm": 0.36270254850387573, |
| "learning_rate": 9.648142267386471e-05, |
| "loss": 0.26, |
| "step": 559 |
| }, |
| { |
| "epoch": 1.0651450309082264, |
| "grad_norm": 0.42650437355041504, |
| "learning_rate": 9.647507145125438e-05, |
| "loss": 0.2693, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.0670470756062767, |
| "grad_norm": 0.3075532019138336, |
| "learning_rate": 9.646872022864402e-05, |
| "loss": 0.2941, |
| "step": 561 |
| }, |
| { |
| "epoch": 1.068949120304327, |
| "grad_norm": 0.4509059190750122, |
| "learning_rate": 9.646236900603367e-05, |
| "loss": 0.3525, |
| "step": 562 |
| }, |
| { |
| "epoch": 1.0708511650023775, |
| "grad_norm": 0.3420471251010895, |
| "learning_rate": 9.645601778342332e-05, |
| "loss": 0.2601, |
| "step": 563 |
| }, |
| { |
| "epoch": 1.072753209700428, |
| "grad_norm": 0.422493577003479, |
| "learning_rate": 9.644966656081296e-05, |
| "loss": 0.3441, |
| "step": 564 |
| }, |
| { |
| "epoch": 1.0746552543984784, |
| "grad_norm": 0.3960445821285248, |
| "learning_rate": 9.644331533820261e-05, |
| "loss": 0.3049, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.0765572990965289, |
| "grad_norm": 0.32367074489593506, |
| "learning_rate": 9.643696411559225e-05, |
| "loss": 0.2694, |
| "step": 566 |
| }, |
| { |
| "epoch": 1.078459343794579, |
| "grad_norm": 0.3480624258518219, |
| "learning_rate": 9.643061289298191e-05, |
| "loss": 0.2667, |
| "step": 567 |
| }, |
| { |
| "epoch": 1.0803613884926295, |
| "grad_norm": 0.37603023648262024, |
| "learning_rate": 9.642426167037155e-05, |
| "loss": 0.2875, |
| "step": 568 |
| }, |
| { |
| "epoch": 1.08226343319068, |
| "grad_norm": 0.391438752412796, |
| "learning_rate": 9.641791044776119e-05, |
| "loss": 0.2844, |
| "step": 569 |
| }, |
| { |
| "epoch": 1.0841654778887304, |
| "grad_norm": 0.42726075649261475, |
| "learning_rate": 9.641155922515086e-05, |
| "loss": 0.3092, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.0860675225867809, |
| "grad_norm": 0.4007676839828491, |
| "learning_rate": 9.64052080025405e-05, |
| "loss": 0.2405, |
| "step": 571 |
| }, |
| { |
| "epoch": 1.0879695672848313, |
| "grad_norm": 0.401592493057251, |
| "learning_rate": 9.639885677993013e-05, |
| "loss": 0.297, |
| "step": 572 |
| }, |
| { |
| "epoch": 1.0898716119828815, |
| "grad_norm": 0.3883298635482788, |
| "learning_rate": 9.639250555731978e-05, |
| "loss": 0.3201, |
| "step": 573 |
| }, |
| { |
| "epoch": 1.091773656680932, |
| "grad_norm": 0.41852253675460815, |
| "learning_rate": 9.638615433470944e-05, |
| "loss": 0.259, |
| "step": 574 |
| }, |
| { |
| "epoch": 1.0936757013789824, |
| "grad_norm": 0.4559331238269806, |
| "learning_rate": 9.637980311209909e-05, |
| "loss": 0.3204, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.0955777460770328, |
| "grad_norm": 0.4163438379764557, |
| "learning_rate": 9.637345188948873e-05, |
| "loss": 0.267, |
| "step": 576 |
| }, |
| { |
| "epoch": 1.0974797907750833, |
| "grad_norm": 0.38813936710357666, |
| "learning_rate": 9.636710066687838e-05, |
| "loss": 0.2653, |
| "step": 577 |
| }, |
| { |
| "epoch": 1.0993818354731335, |
| "grad_norm": 0.373047798871994, |
| "learning_rate": 9.636074944426803e-05, |
| "loss": 0.2995, |
| "step": 578 |
| }, |
| { |
| "epoch": 1.101283880171184, |
| "grad_norm": 0.39488789439201355, |
| "learning_rate": 9.635439822165767e-05, |
| "loss": 0.2972, |
| "step": 579 |
| }, |
| { |
| "epoch": 1.1031859248692344, |
| "grad_norm": 0.37775856256484985, |
| "learning_rate": 9.634804699904732e-05, |
| "loss": 0.2833, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.1050879695672848, |
| "grad_norm": 0.3843298554420471, |
| "learning_rate": 9.634169577643697e-05, |
| "loss": 0.3413, |
| "step": 581 |
| }, |
| { |
| "epoch": 1.1069900142653353, |
| "grad_norm": 0.3834189176559448, |
| "learning_rate": 9.633534455382661e-05, |
| "loss": 0.2792, |
| "step": 582 |
| }, |
| { |
| "epoch": 1.1088920589633857, |
| "grad_norm": 0.37232789397239685, |
| "learning_rate": 9.632899333121626e-05, |
| "loss": 0.2724, |
| "step": 583 |
| }, |
| { |
| "epoch": 1.1107941036614362, |
| "grad_norm": 0.2608899772167206, |
| "learning_rate": 9.632264210860591e-05, |
| "loss": 0.1966, |
| "step": 584 |
| }, |
| { |
| "epoch": 1.1126961483594864, |
| "grad_norm": 0.2676723301410675, |
| "learning_rate": 9.631629088599557e-05, |
| "loss": 0.2149, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.1145981930575368, |
| "grad_norm": 0.40126022696495056, |
| "learning_rate": 9.63099396633852e-05, |
| "loss": 0.2937, |
| "step": 586 |
| }, |
| { |
| "epoch": 1.1165002377555873, |
| "grad_norm": 0.3493163287639618, |
| "learning_rate": 9.630358844077486e-05, |
| "loss": 0.2461, |
| "step": 587 |
| }, |
| { |
| "epoch": 1.1184022824536377, |
| "grad_norm": 0.39294591546058655, |
| "learning_rate": 9.629723721816451e-05, |
| "loss": 0.2922, |
| "step": 588 |
| }, |
| { |
| "epoch": 1.1203043271516882, |
| "grad_norm": 0.3855053186416626, |
| "learning_rate": 9.629088599555415e-05, |
| "loss": 0.2541, |
| "step": 589 |
| }, |
| { |
| "epoch": 1.1222063718497384, |
| "grad_norm": 0.3388477861881256, |
| "learning_rate": 9.628453477294378e-05, |
| "loss": 0.2234, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.1241084165477888, |
| "grad_norm": 0.3856431841850281, |
| "learning_rate": 9.627818355033345e-05, |
| "loss": 0.2836, |
| "step": 591 |
| }, |
| { |
| "epoch": 1.1260104612458393, |
| "grad_norm": 0.39824768900871277, |
| "learning_rate": 9.627183232772309e-05, |
| "loss": 0.2562, |
| "step": 592 |
| }, |
| { |
| "epoch": 1.1279125059438897, |
| "grad_norm": 0.44484448432922363, |
| "learning_rate": 9.626548110511274e-05, |
| "loss": 0.2685, |
| "step": 593 |
| }, |
| { |
| "epoch": 1.1298145506419401, |
| "grad_norm": 0.4581182599067688, |
| "learning_rate": 9.625912988250239e-05, |
| "loss": 0.3208, |
| "step": 594 |
| }, |
| { |
| "epoch": 1.1317165953399906, |
| "grad_norm": 0.3560565412044525, |
| "learning_rate": 9.625277865989203e-05, |
| "loss": 0.2834, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.1336186400380408, |
| "grad_norm": 0.4423635005950928, |
| "learning_rate": 9.624642743728168e-05, |
| "loss": 0.3154, |
| "step": 596 |
| }, |
| { |
| "epoch": 1.1355206847360912, |
| "grad_norm": 0.3797377943992615, |
| "learning_rate": 9.624007621467132e-05, |
| "loss": 0.28, |
| "step": 597 |
| }, |
| { |
| "epoch": 1.1374227294341417, |
| "grad_norm": 0.29780030250549316, |
| "learning_rate": 9.623372499206099e-05, |
| "loss": 0.2209, |
| "step": 598 |
| }, |
| { |
| "epoch": 1.1393247741321921, |
| "grad_norm": 0.3372732996940613, |
| "learning_rate": 9.622737376945062e-05, |
| "loss": 0.2502, |
| "step": 599 |
| }, |
| { |
| "epoch": 1.1412268188302426, |
| "grad_norm": 0.36365967988967896, |
| "learning_rate": 9.622102254684026e-05, |
| "loss": 0.2804, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.1431288635282928, |
| "grad_norm": 0.40790894627571106, |
| "learning_rate": 9.621467132422993e-05, |
| "loss": 0.3633, |
| "step": 601 |
| }, |
| { |
| "epoch": 1.1450309082263432, |
| "grad_norm": 0.35693496465682983, |
| "learning_rate": 9.620832010161957e-05, |
| "loss": 0.3193, |
| "step": 602 |
| }, |
| { |
| "epoch": 1.1469329529243937, |
| "grad_norm": 0.3701719045639038, |
| "learning_rate": 9.620196887900922e-05, |
| "loss": 0.2937, |
| "step": 603 |
| }, |
| { |
| "epoch": 1.1488349976224441, |
| "grad_norm": 0.4299123287200928, |
| "learning_rate": 9.619561765639886e-05, |
| "loss": 0.2732, |
| "step": 604 |
| }, |
| { |
| "epoch": 1.1507370423204946, |
| "grad_norm": 0.4082129895687103, |
| "learning_rate": 9.618926643378851e-05, |
| "loss": 0.2867, |
| "step": 605 |
| }, |
| { |
| "epoch": 1.152639087018545, |
| "grad_norm": 0.49353981018066406, |
| "learning_rate": 9.618291521117816e-05, |
| "loss": 0.266, |
| "step": 606 |
| }, |
| { |
| "epoch": 1.1545411317165954, |
| "grad_norm": 0.3889831006526947, |
| "learning_rate": 9.61765639885678e-05, |
| "loss": 0.2732, |
| "step": 607 |
| }, |
| { |
| "epoch": 1.1564431764146457, |
| "grad_norm": 0.3464524745941162, |
| "learning_rate": 9.617021276595745e-05, |
| "loss": 0.2616, |
| "step": 608 |
| }, |
| { |
| "epoch": 1.158345221112696, |
| "grad_norm": 0.3498656153678894, |
| "learning_rate": 9.61638615433471e-05, |
| "loss": 0.2538, |
| "step": 609 |
| }, |
| { |
| "epoch": 1.1602472658107466, |
| "grad_norm": 0.31552717089653015, |
| "learning_rate": 9.615751032073674e-05, |
| "loss": 0.2283, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.162149310508797, |
| "grad_norm": 0.3225223422050476, |
| "learning_rate": 9.615115909812639e-05, |
| "loss": 0.2428, |
| "step": 611 |
| }, |
| { |
| "epoch": 1.1640513552068474, |
| "grad_norm": 0.3108568489551544, |
| "learning_rate": 9.614480787551604e-05, |
| "loss": 0.2207, |
| "step": 612 |
| }, |
| { |
| "epoch": 1.1659533999048977, |
| "grad_norm": 0.42909371852874756, |
| "learning_rate": 9.613845665290568e-05, |
| "loss": 0.3285, |
| "step": 613 |
| }, |
| { |
| "epoch": 1.167855444602948, |
| "grad_norm": 0.3831368088722229, |
| "learning_rate": 9.613210543029533e-05, |
| "loss": 0.2425, |
| "step": 614 |
| }, |
| { |
| "epoch": 1.1697574893009985, |
| "grad_norm": 0.3891592025756836, |
| "learning_rate": 9.612575420768499e-05, |
| "loss": 0.2849, |
| "step": 615 |
| }, |
| { |
| "epoch": 1.171659533999049, |
| "grad_norm": 0.5383257865905762, |
| "learning_rate": 9.611940298507464e-05, |
| "loss": 0.3444, |
| "step": 616 |
| }, |
| { |
| "epoch": 1.1735615786970994, |
| "grad_norm": 0.4203440845012665, |
| "learning_rate": 9.611305176246428e-05, |
| "loss": 0.3198, |
| "step": 617 |
| }, |
| { |
| "epoch": 1.1754636233951499, |
| "grad_norm": 0.42422881722450256, |
| "learning_rate": 9.610670053985393e-05, |
| "loss": 0.3873, |
| "step": 618 |
| }, |
| { |
| "epoch": 1.1773656680932003, |
| "grad_norm": 0.34799742698669434, |
| "learning_rate": 9.610034931724358e-05, |
| "loss": 0.2645, |
| "step": 619 |
| }, |
| { |
| "epoch": 1.1792677127912505, |
| "grad_norm": 0.37579119205474854, |
| "learning_rate": 9.609399809463322e-05, |
| "loss": 0.3379, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.181169757489301, |
| "grad_norm": 0.3958894610404968, |
| "learning_rate": 9.608764687202287e-05, |
| "loss": 0.2792, |
| "step": 621 |
| }, |
| { |
| "epoch": 1.1830718021873514, |
| "grad_norm": 0.30366870760917664, |
| "learning_rate": 9.608129564941252e-05, |
| "loss": 0.1871, |
| "step": 622 |
| }, |
| { |
| "epoch": 1.1849738468854019, |
| "grad_norm": 0.39878007769584656, |
| "learning_rate": 9.607494442680216e-05, |
| "loss": 0.2675, |
| "step": 623 |
| }, |
| { |
| "epoch": 1.1868758915834523, |
| "grad_norm": 0.35332080721855164, |
| "learning_rate": 9.606859320419181e-05, |
| "loss": 0.2856, |
| "step": 624 |
| }, |
| { |
| "epoch": 1.1887779362815025, |
| "grad_norm": 0.3391731381416321, |
| "learning_rate": 9.606224198158146e-05, |
| "loss": 0.254, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.190679980979553, |
| "grad_norm": 0.39363861083984375, |
| "learning_rate": 9.60558907589711e-05, |
| "loss": 0.2447, |
| "step": 626 |
| }, |
| { |
| "epoch": 1.1925820256776034, |
| "grad_norm": 0.4773564040660858, |
| "learning_rate": 9.604953953636075e-05, |
| "loss": 0.3447, |
| "step": 627 |
| }, |
| { |
| "epoch": 1.1944840703756539, |
| "grad_norm": 0.34327152371406555, |
| "learning_rate": 9.60431883137504e-05, |
| "loss": 0.2353, |
| "step": 628 |
| }, |
| { |
| "epoch": 1.1963861150737043, |
| "grad_norm": 0.37386631965637207, |
| "learning_rate": 9.603683709114006e-05, |
| "loss": 0.2792, |
| "step": 629 |
| }, |
| { |
| "epoch": 1.1982881597717547, |
| "grad_norm": 0.4061308801174164, |
| "learning_rate": 9.60304858685297e-05, |
| "loss": 0.3216, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.200190204469805, |
| "grad_norm": 0.3440467417240143, |
| "learning_rate": 9.602413464591933e-05, |
| "loss": 0.2653, |
| "step": 631 |
| }, |
| { |
| "epoch": 1.2020922491678554, |
| "grad_norm": 0.36648881435394287, |
| "learning_rate": 9.6017783423309e-05, |
| "loss": 0.2471, |
| "step": 632 |
| }, |
| { |
| "epoch": 1.2039942938659058, |
| "grad_norm": 0.3737157881259918, |
| "learning_rate": 9.601143220069864e-05, |
| "loss": 0.3255, |
| "step": 633 |
| }, |
| { |
| "epoch": 1.2058963385639563, |
| "grad_norm": 0.3840744197368622, |
| "learning_rate": 9.600508097808829e-05, |
| "loss": 0.2457, |
| "step": 634 |
| }, |
| { |
| "epoch": 1.2077983832620067, |
| "grad_norm": 0.34374961256980896, |
| "learning_rate": 9.599872975547793e-05, |
| "loss": 0.2705, |
| "step": 635 |
| }, |
| { |
| "epoch": 1.209700427960057, |
| "grad_norm": 0.3460882306098938, |
| "learning_rate": 9.599237853286758e-05, |
| "loss": 0.2308, |
| "step": 636 |
| }, |
| { |
| "epoch": 1.2116024726581074, |
| "grad_norm": 0.33316507935523987, |
| "learning_rate": 9.598602731025723e-05, |
| "loss": 0.2562, |
| "step": 637 |
| }, |
| { |
| "epoch": 1.2135045173561578, |
| "grad_norm": 0.3132528066635132, |
| "learning_rate": 9.597967608764687e-05, |
| "loss": 0.2331, |
| "step": 638 |
| }, |
| { |
| "epoch": 1.2154065620542083, |
| "grad_norm": 0.3329333961009979, |
| "learning_rate": 9.597332486503653e-05, |
| "loss": 0.2224, |
| "step": 639 |
| }, |
| { |
| "epoch": 1.2173086067522587, |
| "grad_norm": 0.35949432849884033, |
| "learning_rate": 9.596697364242617e-05, |
| "loss": 0.2337, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.2192106514503092, |
| "grad_norm": 0.33591121435165405, |
| "learning_rate": 9.596062241981581e-05, |
| "loss": 0.2441, |
| "step": 641 |
| }, |
| { |
| "epoch": 1.2211126961483596, |
| "grad_norm": 0.38212794065475464, |
| "learning_rate": 9.595427119720546e-05, |
| "loss": 0.2569, |
| "step": 642 |
| }, |
| { |
| "epoch": 1.2230147408464098, |
| "grad_norm": 0.4124354124069214, |
| "learning_rate": 9.594791997459512e-05, |
| "loss": 0.3143, |
| "step": 643 |
| }, |
| { |
| "epoch": 1.2249167855444603, |
| "grad_norm": 0.4712159037590027, |
| "learning_rate": 9.594156875198475e-05, |
| "loss": 0.3153, |
| "step": 644 |
| }, |
| { |
| "epoch": 1.2268188302425107, |
| "grad_norm": 0.3652181923389435, |
| "learning_rate": 9.59352175293744e-05, |
| "loss": 0.2448, |
| "step": 645 |
| }, |
| { |
| "epoch": 1.2287208749405611, |
| "grad_norm": 0.40058213472366333, |
| "learning_rate": 9.592886630676406e-05, |
| "loss": 0.304, |
| "step": 646 |
| }, |
| { |
| "epoch": 1.2306229196386116, |
| "grad_norm": 0.4105280041694641, |
| "learning_rate": 9.592251508415371e-05, |
| "loss": 0.251, |
| "step": 647 |
| }, |
| { |
| "epoch": 1.2325249643366618, |
| "grad_norm": 0.3609527349472046, |
| "learning_rate": 9.591616386154335e-05, |
| "loss": 0.2311, |
| "step": 648 |
| }, |
| { |
| "epoch": 1.2344270090347123, |
| "grad_norm": 0.3686671257019043, |
| "learning_rate": 9.5909812638933e-05, |
| "loss": 0.2214, |
| "step": 649 |
| }, |
| { |
| "epoch": 1.2363290537327627, |
| "grad_norm": 0.27986517548561096, |
| "learning_rate": 9.590346141632265e-05, |
| "loss": 0.2531, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.2382310984308131, |
| "grad_norm": 0.4477519690990448, |
| "learning_rate": 9.589711019371229e-05, |
| "loss": 0.3039, |
| "step": 651 |
| }, |
| { |
| "epoch": 1.2401331431288636, |
| "grad_norm": 0.33017873764038086, |
| "learning_rate": 9.589075897110194e-05, |
| "loss": 0.205, |
| "step": 652 |
| }, |
| { |
| "epoch": 1.242035187826914, |
| "grad_norm": 0.31245800852775574, |
| "learning_rate": 9.588440774849159e-05, |
| "loss": 0.2493, |
| "step": 653 |
| }, |
| { |
| "epoch": 1.2439372325249642, |
| "grad_norm": 0.33620285987854004, |
| "learning_rate": 9.587805652588123e-05, |
| "loss": 0.2629, |
| "step": 654 |
| }, |
| { |
| "epoch": 1.2458392772230147, |
| "grad_norm": 0.34820401668548584, |
| "learning_rate": 9.587170530327088e-05, |
| "loss": 0.2446, |
| "step": 655 |
| }, |
| { |
| "epoch": 1.2477413219210651, |
| "grad_norm": 0.4110179543495178, |
| "learning_rate": 9.586535408066053e-05, |
| "loss": 0.3345, |
| "step": 656 |
| }, |
| { |
| "epoch": 1.2496433666191156, |
| "grad_norm": 0.3637439012527466, |
| "learning_rate": 9.585900285805019e-05, |
| "loss": 0.2052, |
| "step": 657 |
| }, |
| { |
| "epoch": 1.251545411317166, |
| "grad_norm": 0.39023682475090027, |
| "learning_rate": 9.585265163543982e-05, |
| "loss": 0.2841, |
| "step": 658 |
| }, |
| { |
| "epoch": 1.2534474560152162, |
| "grad_norm": 0.3623685836791992, |
| "learning_rate": 9.584630041282948e-05, |
| "loss": 0.2286, |
| "step": 659 |
| }, |
| { |
| "epoch": 1.2553495007132667, |
| "grad_norm": 0.38151344656944275, |
| "learning_rate": 9.583994919021913e-05, |
| "loss": 0.2357, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.2572515454113171, |
| "grad_norm": 0.38236725330352783, |
| "learning_rate": 9.583359796760877e-05, |
| "loss": 0.2966, |
| "step": 661 |
| }, |
| { |
| "epoch": 1.2591535901093676, |
| "grad_norm": 0.38568076491355896, |
| "learning_rate": 9.58272467449984e-05, |
| "loss": 0.3018, |
| "step": 662 |
| }, |
| { |
| "epoch": 1.261055634807418, |
| "grad_norm": 0.3488738238811493, |
| "learning_rate": 9.582089552238807e-05, |
| "loss": 0.354, |
| "step": 663 |
| }, |
| { |
| "epoch": 1.2629576795054684, |
| "grad_norm": 0.352860689163208, |
| "learning_rate": 9.581454429977771e-05, |
| "loss": 0.2143, |
| "step": 664 |
| }, |
| { |
| "epoch": 1.2648597242035189, |
| "grad_norm": 0.3734944760799408, |
| "learning_rate": 9.580819307716736e-05, |
| "loss": 0.3486, |
| "step": 665 |
| }, |
| { |
| "epoch": 1.266761768901569, |
| "grad_norm": 0.4024759531021118, |
| "learning_rate": 9.580184185455701e-05, |
| "loss": 0.2922, |
| "step": 666 |
| }, |
| { |
| "epoch": 1.2686638135996195, |
| "grad_norm": 0.37389662861824036, |
| "learning_rate": 9.579549063194665e-05, |
| "loss": 0.2545, |
| "step": 667 |
| }, |
| { |
| "epoch": 1.27056585829767, |
| "grad_norm": 0.42338186502456665, |
| "learning_rate": 9.57891394093363e-05, |
| "loss": 0.2961, |
| "step": 668 |
| }, |
| { |
| "epoch": 1.2724679029957204, |
| "grad_norm": 0.3795355260372162, |
| "learning_rate": 9.578278818672594e-05, |
| "loss": 0.2777, |
| "step": 669 |
| }, |
| { |
| "epoch": 1.2743699476937709, |
| "grad_norm": 0.3439030945301056, |
| "learning_rate": 9.57764369641156e-05, |
| "loss": 0.2179, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.276271992391821, |
| "grad_norm": 0.39637741446495056, |
| "learning_rate": 9.577008574150524e-05, |
| "loss": 0.2701, |
| "step": 671 |
| }, |
| { |
| "epoch": 1.2781740370898715, |
| "grad_norm": 0.3348701298236847, |
| "learning_rate": 9.576373451889488e-05, |
| "loss": 0.2632, |
| "step": 672 |
| }, |
| { |
| "epoch": 1.280076081787922, |
| "grad_norm": 0.3696272671222687, |
| "learning_rate": 9.575738329628455e-05, |
| "loss": 0.2228, |
| "step": 673 |
| }, |
| { |
| "epoch": 1.2819781264859724, |
| "grad_norm": 0.3261694610118866, |
| "learning_rate": 9.575103207367419e-05, |
| "loss": 0.2589, |
| "step": 674 |
| }, |
| { |
| "epoch": 1.2838801711840229, |
| "grad_norm": 0.39266085624694824, |
| "learning_rate": 9.574468085106384e-05, |
| "loss": 0.2893, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.2857822158820733, |
| "grad_norm": 0.4356357157230377, |
| "learning_rate": 9.573832962845348e-05, |
| "loss": 0.3249, |
| "step": 676 |
| }, |
| { |
| "epoch": 1.2876842605801238, |
| "grad_norm": 0.38992395997047424, |
| "learning_rate": 9.573197840584313e-05, |
| "loss": 0.2697, |
| "step": 677 |
| }, |
| { |
| "epoch": 1.289586305278174, |
| "grad_norm": 0.35415610671043396, |
| "learning_rate": 9.572562718323278e-05, |
| "loss": 0.2538, |
| "step": 678 |
| }, |
| { |
| "epoch": 1.2914883499762244, |
| "grad_norm": 0.38410142064094543, |
| "learning_rate": 9.571927596062242e-05, |
| "loss": 0.2325, |
| "step": 679 |
| }, |
| { |
| "epoch": 1.2933903946742749, |
| "grad_norm": 0.36036771535873413, |
| "learning_rate": 9.571292473801207e-05, |
| "loss": 0.242, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.2952924393723253, |
| "grad_norm": 0.3901429772377014, |
| "learning_rate": 9.570657351540172e-05, |
| "loss": 0.3141, |
| "step": 681 |
| }, |
| { |
| "epoch": 1.2971944840703755, |
| "grad_norm": 0.3684573769569397, |
| "learning_rate": 9.570022229279136e-05, |
| "loss": 0.2725, |
| "step": 682 |
| }, |
| { |
| "epoch": 1.299096528768426, |
| "grad_norm": 0.44199153780937195, |
| "learning_rate": 9.569387107018101e-05, |
| "loss": 0.2938, |
| "step": 683 |
| }, |
| { |
| "epoch": 1.3009985734664764, |
| "grad_norm": 0.4435335695743561, |
| "learning_rate": 9.568751984757066e-05, |
| "loss": 0.3454, |
| "step": 684 |
| }, |
| { |
| "epoch": 1.3029006181645268, |
| "grad_norm": 0.3713487386703491, |
| "learning_rate": 9.56811686249603e-05, |
| "loss": 0.25, |
| "step": 685 |
| }, |
| { |
| "epoch": 1.3048026628625773, |
| "grad_norm": 0.394452840089798, |
| "learning_rate": 9.567481740234995e-05, |
| "loss": 0.3062, |
| "step": 686 |
| }, |
| { |
| "epoch": 1.3067047075606277, |
| "grad_norm": 0.47593292593955994, |
| "learning_rate": 9.56684661797396e-05, |
| "loss": 0.3131, |
| "step": 687 |
| }, |
| { |
| "epoch": 1.3086067522586782, |
| "grad_norm": 0.39060479402542114, |
| "learning_rate": 9.566211495712926e-05, |
| "loss": 0.3267, |
| "step": 688 |
| }, |
| { |
| "epoch": 1.3105087969567286, |
| "grad_norm": 0.40931451320648193, |
| "learning_rate": 9.56557637345189e-05, |
| "loss": 0.2979, |
| "step": 689 |
| }, |
| { |
| "epoch": 1.3124108416547788, |
| "grad_norm": 0.3557567000389099, |
| "learning_rate": 9.564941251190855e-05, |
| "loss": 0.213, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.3143128863528293, |
| "grad_norm": 0.43843701481819153, |
| "learning_rate": 9.56430612892982e-05, |
| "loss": 0.2835, |
| "step": 691 |
| }, |
| { |
| "epoch": 1.3162149310508797, |
| "grad_norm": 0.33530867099761963, |
| "learning_rate": 9.563671006668784e-05, |
| "loss": 0.2392, |
| "step": 692 |
| }, |
| { |
| "epoch": 1.3181169757489302, |
| "grad_norm": 0.35071656107902527, |
| "learning_rate": 9.563035884407749e-05, |
| "loss": 0.1916, |
| "step": 693 |
| }, |
| { |
| "epoch": 1.3200190204469804, |
| "grad_norm": 0.3808371126651764, |
| "learning_rate": 9.562400762146714e-05, |
| "loss": 0.2426, |
| "step": 694 |
| }, |
| { |
| "epoch": 1.3219210651450308, |
| "grad_norm": 0.46641990542411804, |
| "learning_rate": 9.561765639885678e-05, |
| "loss": 0.3399, |
| "step": 695 |
| }, |
| { |
| "epoch": 1.3238231098430813, |
| "grad_norm": 0.4153888523578644, |
| "learning_rate": 9.561130517624643e-05, |
| "loss": 0.4152, |
| "step": 696 |
| }, |
| { |
| "epoch": 1.3257251545411317, |
| "grad_norm": 0.4004898965358734, |
| "learning_rate": 9.560495395363608e-05, |
| "loss": 0.3637, |
| "step": 697 |
| }, |
| { |
| "epoch": 1.3276271992391822, |
| "grad_norm": 0.421058714389801, |
| "learning_rate": 9.559860273102572e-05, |
| "loss": 0.2625, |
| "step": 698 |
| }, |
| { |
| "epoch": 1.3295292439372326, |
| "grad_norm": 0.39722004532814026, |
| "learning_rate": 9.559225150841537e-05, |
| "loss": 0.3563, |
| "step": 699 |
| }, |
| { |
| "epoch": 1.331431288635283, |
| "grad_norm": 0.3793489634990692, |
| "learning_rate": 9.558590028580501e-05, |
| "loss": 0.2306, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.3333333333333333, |
| "grad_norm": 0.43592244386672974, |
| "learning_rate": 9.557954906319468e-05, |
| "loss": 0.4354, |
| "step": 701 |
| }, |
| { |
| "epoch": 1.3352353780313837, |
| "grad_norm": 0.30159738659858704, |
| "learning_rate": 9.557319784058432e-05, |
| "loss": 0.2062, |
| "step": 702 |
| }, |
| { |
| "epoch": 1.3371374227294341, |
| "grad_norm": 0.34011465311050415, |
| "learning_rate": 9.556684661797395e-05, |
| "loss": 0.2363, |
| "step": 703 |
| }, |
| { |
| "epoch": 1.3390394674274846, |
| "grad_norm": 0.41224443912506104, |
| "learning_rate": 9.556049539536362e-05, |
| "loss": 0.2913, |
| "step": 704 |
| }, |
| { |
| "epoch": 1.340941512125535, |
| "grad_norm": 0.4105536937713623, |
| "learning_rate": 9.555414417275326e-05, |
| "loss": 0.2459, |
| "step": 705 |
| }, |
| { |
| "epoch": 1.3428435568235852, |
| "grad_norm": 0.3158798813819885, |
| "learning_rate": 9.554779295014291e-05, |
| "loss": 0.1921, |
| "step": 706 |
| }, |
| { |
| "epoch": 1.3447456015216357, |
| "grad_norm": 0.4023972451686859, |
| "learning_rate": 9.554144172753255e-05, |
| "loss": 0.2406, |
| "step": 707 |
| }, |
| { |
| "epoch": 1.3466476462196861, |
| "grad_norm": 0.4204084277153015, |
| "learning_rate": 9.55350905049222e-05, |
| "loss": 0.2977, |
| "step": 708 |
| }, |
| { |
| "epoch": 1.3485496909177366, |
| "grad_norm": 0.4853519797325134, |
| "learning_rate": 9.552873928231185e-05, |
| "loss": 0.3871, |
| "step": 709 |
| }, |
| { |
| "epoch": 1.350451735615787, |
| "grad_norm": 0.3755006194114685, |
| "learning_rate": 9.552238805970149e-05, |
| "loss": 0.2399, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.3523537803138375, |
| "grad_norm": 0.37587347626686096, |
| "learning_rate": 9.551603683709116e-05, |
| "loss": 0.3029, |
| "step": 711 |
| }, |
| { |
| "epoch": 1.354255825011888, |
| "grad_norm": 0.4257625937461853, |
| "learning_rate": 9.55096856144808e-05, |
| "loss": 0.2541, |
| "step": 712 |
| }, |
| { |
| "epoch": 1.3561578697099381, |
| "grad_norm": 0.29570913314819336, |
| "learning_rate": 9.550333439187043e-05, |
| "loss": 0.1668, |
| "step": 713 |
| }, |
| { |
| "epoch": 1.3580599144079886, |
| "grad_norm": 0.5089273452758789, |
| "learning_rate": 9.549698316926008e-05, |
| "loss": 0.4006, |
| "step": 714 |
| }, |
| { |
| "epoch": 1.359961959106039, |
| "grad_norm": 0.43584999442100525, |
| "learning_rate": 9.549063194664974e-05, |
| "loss": 0.2996, |
| "step": 715 |
| }, |
| { |
| "epoch": 1.3618640038040895, |
| "grad_norm": 0.4071057140827179, |
| "learning_rate": 9.548428072403937e-05, |
| "loss": 0.308, |
| "step": 716 |
| }, |
| { |
| "epoch": 1.3637660485021397, |
| "grad_norm": 0.37772196531295776, |
| "learning_rate": 9.547792950142903e-05, |
| "loss": 0.2235, |
| "step": 717 |
| }, |
| { |
| "epoch": 1.3656680932001901, |
| "grad_norm": 0.44488438963890076, |
| "learning_rate": 9.547157827881868e-05, |
| "loss": 0.2748, |
| "step": 718 |
| }, |
| { |
| "epoch": 1.3675701378982406, |
| "grad_norm": 0.3227798640727997, |
| "learning_rate": 9.546522705620833e-05, |
| "loss": 0.2609, |
| "step": 719 |
| }, |
| { |
| "epoch": 1.369472182596291, |
| "grad_norm": 0.3742448389530182, |
| "learning_rate": 9.545887583359797e-05, |
| "loss": 0.2417, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.3713742272943414, |
| "grad_norm": 0.3582020699977875, |
| "learning_rate": 9.545252461098762e-05, |
| "loss": 0.2688, |
| "step": 721 |
| }, |
| { |
| "epoch": 1.3732762719923919, |
| "grad_norm": 0.3762567341327667, |
| "learning_rate": 9.544617338837727e-05, |
| "loss": 0.2939, |
| "step": 722 |
| }, |
| { |
| "epoch": 1.3751783166904423, |
| "grad_norm": 0.38103973865509033, |
| "learning_rate": 9.543982216576691e-05, |
| "loss": 0.3335, |
| "step": 723 |
| }, |
| { |
| "epoch": 1.3770803613884925, |
| "grad_norm": 0.3109844923019409, |
| "learning_rate": 9.543347094315656e-05, |
| "loss": 0.2094, |
| "step": 724 |
| }, |
| { |
| "epoch": 1.378982406086543, |
| "grad_norm": 0.3642789125442505, |
| "learning_rate": 9.542711972054621e-05, |
| "loss": 0.2879, |
| "step": 725 |
| }, |
| { |
| "epoch": 1.3808844507845934, |
| "grad_norm": 0.3879150152206421, |
| "learning_rate": 9.542076849793585e-05, |
| "loss": 0.2567, |
| "step": 726 |
| }, |
| { |
| "epoch": 1.3827864954826439, |
| "grad_norm": 0.3364320993423462, |
| "learning_rate": 9.54144172753255e-05, |
| "loss": 0.2773, |
| "step": 727 |
| }, |
| { |
| "epoch": 1.3846885401806943, |
| "grad_norm": 0.5071269273757935, |
| "learning_rate": 9.540806605271516e-05, |
| "loss": 0.2916, |
| "step": 728 |
| }, |
| { |
| "epoch": 1.3865905848787445, |
| "grad_norm": 0.425793319940567, |
| "learning_rate": 9.540171483010481e-05, |
| "loss": 0.2948, |
| "step": 729 |
| }, |
| { |
| "epoch": 1.388492629576795, |
| "grad_norm": 0.38478776812553406, |
| "learning_rate": 9.539536360749445e-05, |
| "loss": 0.2493, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.3903946742748454, |
| "grad_norm": 0.4016847014427185, |
| "learning_rate": 9.53890123848841e-05, |
| "loss": 0.3038, |
| "step": 731 |
| }, |
| { |
| "epoch": 1.3922967189728959, |
| "grad_norm": 0.2799355983734131, |
| "learning_rate": 9.538266116227375e-05, |
| "loss": 0.2964, |
| "step": 732 |
| }, |
| { |
| "epoch": 1.3941987636709463, |
| "grad_norm": 0.3720659613609314, |
| "learning_rate": 9.537630993966339e-05, |
| "loss": 0.2528, |
| "step": 733 |
| }, |
| { |
| "epoch": 1.3961008083689967, |
| "grad_norm": 0.2954385578632355, |
| "learning_rate": 9.536995871705303e-05, |
| "loss": 0.2119, |
| "step": 734 |
| }, |
| { |
| "epoch": 1.3980028530670472, |
| "grad_norm": 0.35636264085769653, |
| "learning_rate": 9.536360749444269e-05, |
| "loss": 0.3042, |
| "step": 735 |
| }, |
| { |
| "epoch": 1.3999048977650974, |
| "grad_norm": 0.3219160735607147, |
| "learning_rate": 9.535725627183233e-05, |
| "loss": 0.2977, |
| "step": 736 |
| }, |
| { |
| "epoch": 1.4018069424631479, |
| "grad_norm": 0.32340940833091736, |
| "learning_rate": 9.535090504922198e-05, |
| "loss": 0.2295, |
| "step": 737 |
| }, |
| { |
| "epoch": 1.4037089871611983, |
| "grad_norm": 0.3884155750274658, |
| "learning_rate": 9.534455382661163e-05, |
| "loss": 0.2367, |
| "step": 738 |
| }, |
| { |
| "epoch": 1.4056110318592487, |
| "grad_norm": 0.3708769381046295, |
| "learning_rate": 9.533820260400127e-05, |
| "loss": 0.2807, |
| "step": 739 |
| }, |
| { |
| "epoch": 1.407513076557299, |
| "grad_norm": 0.3377797603607178, |
| "learning_rate": 9.533185138139092e-05, |
| "loss": 0.2459, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.4094151212553494, |
| "grad_norm": 0.542662501335144, |
| "learning_rate": 9.532550015878056e-05, |
| "loss": 0.3883, |
| "step": 741 |
| }, |
| { |
| "epoch": 1.4113171659533998, |
| "grad_norm": 0.36908188462257385, |
| "learning_rate": 9.531914893617023e-05, |
| "loss": 0.2239, |
| "step": 742 |
| }, |
| { |
| "epoch": 1.4132192106514503, |
| "grad_norm": 0.2898438572883606, |
| "learning_rate": 9.531279771355987e-05, |
| "loss": 0.1929, |
| "step": 743 |
| }, |
| { |
| "epoch": 1.4151212553495007, |
| "grad_norm": 0.361965537071228, |
| "learning_rate": 9.53064464909495e-05, |
| "loss": 0.2758, |
| "step": 744 |
| }, |
| { |
| "epoch": 1.4170233000475512, |
| "grad_norm": 0.42736831307411194, |
| "learning_rate": 9.530009526833916e-05, |
| "loss": 0.3103, |
| "step": 745 |
| }, |
| { |
| "epoch": 1.4189253447456016, |
| "grad_norm": 0.3411954641342163, |
| "learning_rate": 9.529374404572881e-05, |
| "loss": 0.2498, |
| "step": 746 |
| }, |
| { |
| "epoch": 1.420827389443652, |
| "grad_norm": 0.3671089708805084, |
| "learning_rate": 9.528739282311846e-05, |
| "loss": 0.2961, |
| "step": 747 |
| }, |
| { |
| "epoch": 1.4227294341417023, |
| "grad_norm": 0.35021135210990906, |
| "learning_rate": 9.52810416005081e-05, |
| "loss": 0.2422, |
| "step": 748 |
| }, |
| { |
| "epoch": 1.4246314788397527, |
| "grad_norm": 0.3203287422657013, |
| "learning_rate": 9.527469037789775e-05, |
| "loss": 0.2377, |
| "step": 749 |
| }, |
| { |
| "epoch": 1.4265335235378032, |
| "grad_norm": 0.32512807846069336, |
| "learning_rate": 9.52683391552874e-05, |
| "loss": 0.2533, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.4284355682358536, |
| "grad_norm": 0.39963454008102417, |
| "learning_rate": 9.526198793267704e-05, |
| "loss": 0.3191, |
| "step": 751 |
| }, |
| { |
| "epoch": 1.4303376129339038, |
| "grad_norm": 0.3722153306007385, |
| "learning_rate": 9.525563671006669e-05, |
| "loss": 0.2134, |
| "step": 752 |
| }, |
| { |
| "epoch": 1.4322396576319543, |
| "grad_norm": 0.3429708182811737, |
| "learning_rate": 9.524928548745634e-05, |
| "loss": 0.2221, |
| "step": 753 |
| }, |
| { |
| "epoch": 1.4341417023300047, |
| "grad_norm": 0.4014436602592468, |
| "learning_rate": 9.524293426484598e-05, |
| "loss": 0.2638, |
| "step": 754 |
| }, |
| { |
| "epoch": 1.4360437470280552, |
| "grad_norm": 0.38329729437828064, |
| "learning_rate": 9.523658304223563e-05, |
| "loss": 0.25, |
| "step": 755 |
| }, |
| { |
| "epoch": 1.4379457917261056, |
| "grad_norm": 0.37710002064704895, |
| "learning_rate": 9.523023181962529e-05, |
| "loss": 0.2623, |
| "step": 756 |
| }, |
| { |
| "epoch": 1.439847836424156, |
| "grad_norm": 0.4223197102546692, |
| "learning_rate": 9.522388059701492e-05, |
| "loss": 0.408, |
| "step": 757 |
| }, |
| { |
| "epoch": 1.4417498811222065, |
| "grad_norm": 0.45707425475120544, |
| "learning_rate": 9.521752937440458e-05, |
| "loss": 0.3491, |
| "step": 758 |
| }, |
| { |
| "epoch": 1.4436519258202567, |
| "grad_norm": 0.39775991439819336, |
| "learning_rate": 9.521117815179423e-05, |
| "loss": 0.2498, |
| "step": 759 |
| }, |
| { |
| "epoch": 1.4455539705183071, |
| "grad_norm": 0.3113288879394531, |
| "learning_rate": 9.520482692918388e-05, |
| "loss": 0.2191, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.4474560152163576, |
| "grad_norm": 0.35126394033432007, |
| "learning_rate": 9.519847570657352e-05, |
| "loss": 0.2689, |
| "step": 761 |
| }, |
| { |
| "epoch": 1.449358059914408, |
| "grad_norm": 0.42121708393096924, |
| "learning_rate": 9.519212448396317e-05, |
| "loss": 0.2859, |
| "step": 762 |
| }, |
| { |
| "epoch": 1.4512601046124585, |
| "grad_norm": 0.37913796305656433, |
| "learning_rate": 9.518577326135282e-05, |
| "loss": 0.2676, |
| "step": 763 |
| }, |
| { |
| "epoch": 1.4531621493105087, |
| "grad_norm": 0.3767364025115967, |
| "learning_rate": 9.517942203874246e-05, |
| "loss": 0.2298, |
| "step": 764 |
| }, |
| { |
| "epoch": 1.4550641940085591, |
| "grad_norm": 0.3317908048629761, |
| "learning_rate": 9.517307081613211e-05, |
| "loss": 0.2439, |
| "step": 765 |
| }, |
| { |
| "epoch": 1.4569662387066096, |
| "grad_norm": 0.28014522790908813, |
| "learning_rate": 9.516671959352176e-05, |
| "loss": 0.207, |
| "step": 766 |
| }, |
| { |
| "epoch": 1.45886828340466, |
| "grad_norm": 0.4119054675102234, |
| "learning_rate": 9.51603683709114e-05, |
| "loss": 0.2969, |
| "step": 767 |
| }, |
| { |
| "epoch": 1.4607703281027105, |
| "grad_norm": 0.3351030647754669, |
| "learning_rate": 9.515401714830105e-05, |
| "loss": 0.2925, |
| "step": 768 |
| }, |
| { |
| "epoch": 1.462672372800761, |
| "grad_norm": 0.5204692482948303, |
| "learning_rate": 9.51476659256907e-05, |
| "loss": 0.3546, |
| "step": 769 |
| }, |
| { |
| "epoch": 1.4645744174988113, |
| "grad_norm": 0.42994043231010437, |
| "learning_rate": 9.514131470308034e-05, |
| "loss": 0.3284, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.4664764621968616, |
| "grad_norm": 0.3580436408519745, |
| "learning_rate": 9.513496348047e-05, |
| "loss": 0.2639, |
| "step": 771 |
| }, |
| { |
| "epoch": 1.468378506894912, |
| "grad_norm": 0.37151291966438293, |
| "learning_rate": 9.512861225785963e-05, |
| "loss": 0.2556, |
| "step": 772 |
| }, |
| { |
| "epoch": 1.4702805515929624, |
| "grad_norm": 0.33122384548187256, |
| "learning_rate": 9.51222610352493e-05, |
| "loss": 0.2565, |
| "step": 773 |
| }, |
| { |
| "epoch": 1.472182596291013, |
| "grad_norm": 0.3718935251235962, |
| "learning_rate": 9.511590981263894e-05, |
| "loss": 0.2348, |
| "step": 774 |
| }, |
| { |
| "epoch": 1.474084640989063, |
| "grad_norm": 0.3752667009830475, |
| "learning_rate": 9.510955859002858e-05, |
| "loss": 0.2933, |
| "step": 775 |
| }, |
| { |
| "epoch": 1.4759866856871136, |
| "grad_norm": 0.44539371132850647, |
| "learning_rate": 9.510320736741824e-05, |
| "loss": 0.2699, |
| "step": 776 |
| }, |
| { |
| "epoch": 1.477888730385164, |
| "grad_norm": 0.5468220114707947, |
| "learning_rate": 9.509685614480788e-05, |
| "loss": 0.4141, |
| "step": 777 |
| }, |
| { |
| "epoch": 1.4797907750832144, |
| "grad_norm": 0.5036222338676453, |
| "learning_rate": 9.509050492219753e-05, |
| "loss": 0.3463, |
| "step": 778 |
| }, |
| { |
| "epoch": 1.4816928197812649, |
| "grad_norm": 0.3742172420024872, |
| "learning_rate": 9.508415369958717e-05, |
| "loss": 0.3104, |
| "step": 779 |
| }, |
| { |
| "epoch": 1.4835948644793153, |
| "grad_norm": 0.38696351647377014, |
| "learning_rate": 9.507780247697682e-05, |
| "loss": 0.2406, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.4854969091773658, |
| "grad_norm": 0.43431171774864197, |
| "learning_rate": 9.507145125436647e-05, |
| "loss": 0.307, |
| "step": 781 |
| }, |
| { |
| "epoch": 1.4873989538754162, |
| "grad_norm": 0.3814404606819153, |
| "learning_rate": 9.506510003175611e-05, |
| "loss": 0.2681, |
| "step": 782 |
| }, |
| { |
| "epoch": 1.4893009985734664, |
| "grad_norm": 0.350359708070755, |
| "learning_rate": 9.505874880914578e-05, |
| "loss": 0.2408, |
| "step": 783 |
| }, |
| { |
| "epoch": 1.4912030432715169, |
| "grad_norm": 0.4443821609020233, |
| "learning_rate": 9.505239758653541e-05, |
| "loss": 0.3358, |
| "step": 784 |
| }, |
| { |
| "epoch": 1.4931050879695673, |
| "grad_norm": 0.2963017225265503, |
| "learning_rate": 9.504604636392505e-05, |
| "loss": 0.2085, |
| "step": 785 |
| }, |
| { |
| "epoch": 1.4950071326676178, |
| "grad_norm": 0.4765385389328003, |
| "learning_rate": 9.50396951413147e-05, |
| "loss": 0.396, |
| "step": 786 |
| }, |
| { |
| "epoch": 1.496909177365668, |
| "grad_norm": 0.3389003574848175, |
| "learning_rate": 9.503334391870436e-05, |
| "loss": 0.327, |
| "step": 787 |
| }, |
| { |
| "epoch": 1.4988112220637184, |
| "grad_norm": 0.42218640446662903, |
| "learning_rate": 9.5026992696094e-05, |
| "loss": 0.3078, |
| "step": 788 |
| }, |
| { |
| "epoch": 1.5007132667617689, |
| "grad_norm": 0.4693278670310974, |
| "learning_rate": 9.502064147348365e-05, |
| "loss": 0.2853, |
| "step": 789 |
| }, |
| { |
| "epoch": 1.5026153114598193, |
| "grad_norm": 0.3891851305961609, |
| "learning_rate": 9.50142902508733e-05, |
| "loss": 0.2493, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.5045173561578697, |
| "grad_norm": 0.3862535357475281, |
| "learning_rate": 9.500793902826295e-05, |
| "loss": 0.2673, |
| "step": 791 |
| }, |
| { |
| "epoch": 1.5064194008559202, |
| "grad_norm": 0.34803205728530884, |
| "learning_rate": 9.500158780565259e-05, |
| "loss": 0.2814, |
| "step": 792 |
| }, |
| { |
| "epoch": 1.5083214455539706, |
| "grad_norm": 0.3963899314403534, |
| "learning_rate": 9.499523658304224e-05, |
| "loss": 0.3018, |
| "step": 793 |
| }, |
| { |
| "epoch": 1.510223490252021, |
| "grad_norm": 0.4004577398300171, |
| "learning_rate": 9.498888536043189e-05, |
| "loss": 0.313, |
| "step": 794 |
| }, |
| { |
| "epoch": 1.5121255349500713, |
| "grad_norm": 0.32212579250335693, |
| "learning_rate": 9.498253413782153e-05, |
| "loss": 0.2081, |
| "step": 795 |
| }, |
| { |
| "epoch": 1.5140275796481217, |
| "grad_norm": 0.32745805382728577, |
| "learning_rate": 9.497618291521118e-05, |
| "loss": 0.231, |
| "step": 796 |
| }, |
| { |
| "epoch": 1.5159296243461722, |
| "grad_norm": 0.40773364901542664, |
| "learning_rate": 9.496983169260083e-05, |
| "loss": 0.2804, |
| "step": 797 |
| }, |
| { |
| "epoch": 1.5178316690442224, |
| "grad_norm": 0.3848927319049835, |
| "learning_rate": 9.496348046999047e-05, |
| "loss": 0.288, |
| "step": 798 |
| }, |
| { |
| "epoch": 1.5197337137422728, |
| "grad_norm": 0.317124605178833, |
| "learning_rate": 9.495712924738012e-05, |
| "loss": 0.2202, |
| "step": 799 |
| }, |
| { |
| "epoch": 1.5216357584403233, |
| "grad_norm": 0.3564606010913849, |
| "learning_rate": 9.495077802476978e-05, |
| "loss": 0.2594, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.5235378031383737, |
| "grad_norm": 0.3151964545249939, |
| "learning_rate": 9.494442680215943e-05, |
| "loss": 0.2138, |
| "step": 801 |
| }, |
| { |
| "epoch": 1.5254398478364242, |
| "grad_norm": 0.4009242057800293, |
| "learning_rate": 9.493807557954907e-05, |
| "loss": 0.3157, |
| "step": 802 |
| }, |
| { |
| "epoch": 1.5273418925344746, |
| "grad_norm": 0.36916011571884155, |
| "learning_rate": 9.49317243569387e-05, |
| "loss": 0.2478, |
| "step": 803 |
| }, |
| { |
| "epoch": 1.529243937232525, |
| "grad_norm": 0.372277170419693, |
| "learning_rate": 9.492537313432837e-05, |
| "loss": 0.2912, |
| "step": 804 |
| }, |
| { |
| "epoch": 1.5311459819305755, |
| "grad_norm": 0.42100057005882263, |
| "learning_rate": 9.491902191171801e-05, |
| "loss": 0.2938, |
| "step": 805 |
| }, |
| { |
| "epoch": 1.533048026628626, |
| "grad_norm": 0.3528178334236145, |
| "learning_rate": 9.491267068910765e-05, |
| "loss": 0.2519, |
| "step": 806 |
| }, |
| { |
| "epoch": 1.5349500713266762, |
| "grad_norm": 0.3655840754508972, |
| "learning_rate": 9.490631946649731e-05, |
| "loss": 0.2685, |
| "step": 807 |
| }, |
| { |
| "epoch": 1.5368521160247266, |
| "grad_norm": 0.34080174565315247, |
| "learning_rate": 9.489996824388695e-05, |
| "loss": 0.2339, |
| "step": 808 |
| }, |
| { |
| "epoch": 1.5387541607227768, |
| "grad_norm": 0.3532484173774719, |
| "learning_rate": 9.48936170212766e-05, |
| "loss": 0.2448, |
| "step": 809 |
| }, |
| { |
| "epoch": 1.5406562054208273, |
| "grad_norm": 0.33115965127944946, |
| "learning_rate": 9.488726579866624e-05, |
| "loss": 0.2549, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.5425582501188777, |
| "grad_norm": 0.40624433755874634, |
| "learning_rate": 9.488091457605589e-05, |
| "loss": 0.2847, |
| "step": 811 |
| }, |
| { |
| "epoch": 1.5444602948169281, |
| "grad_norm": 0.35374221205711365, |
| "learning_rate": 9.487456335344554e-05, |
| "loss": 0.2704, |
| "step": 812 |
| }, |
| { |
| "epoch": 1.5463623395149786, |
| "grad_norm": 0.3859337568283081, |
| "learning_rate": 9.486821213083518e-05, |
| "loss": 0.2969, |
| "step": 813 |
| }, |
| { |
| "epoch": 1.548264384213029, |
| "grad_norm": 0.37984946370124817, |
| "learning_rate": 9.486186090822485e-05, |
| "loss": 0.2908, |
| "step": 814 |
| }, |
| { |
| "epoch": 1.5501664289110795, |
| "grad_norm": 0.34984755516052246, |
| "learning_rate": 9.485550968561449e-05, |
| "loss": 0.2247, |
| "step": 815 |
| }, |
| { |
| "epoch": 1.55206847360913, |
| "grad_norm": 0.32592761516571045, |
| "learning_rate": 9.484915846300412e-05, |
| "loss": 0.1985, |
| "step": 816 |
| }, |
| { |
| "epoch": 1.5539705183071804, |
| "grad_norm": 0.4273107945919037, |
| "learning_rate": 9.484280724039378e-05, |
| "loss": 0.2875, |
| "step": 817 |
| }, |
| { |
| "epoch": 1.5558725630052306, |
| "grad_norm": 0.35476601123809814, |
| "learning_rate": 9.483645601778343e-05, |
| "loss": 0.2721, |
| "step": 818 |
| }, |
| { |
| "epoch": 1.557774607703281, |
| "grad_norm": 0.30542057752609253, |
| "learning_rate": 9.483010479517308e-05, |
| "loss": 0.1966, |
| "step": 819 |
| }, |
| { |
| "epoch": 1.5596766524013315, |
| "grad_norm": 0.44310665130615234, |
| "learning_rate": 9.482375357256272e-05, |
| "loss": 0.2533, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.5615786970993817, |
| "grad_norm": 0.39837488532066345, |
| "learning_rate": 9.481740234995237e-05, |
| "loss": 0.3045, |
| "step": 821 |
| }, |
| { |
| "epoch": 1.5634807417974321, |
| "grad_norm": 0.33650925755500793, |
| "learning_rate": 9.481105112734202e-05, |
| "loss": 0.3626, |
| "step": 822 |
| }, |
| { |
| "epoch": 1.5653827864954826, |
| "grad_norm": 0.39762622117996216, |
| "learning_rate": 9.480469990473166e-05, |
| "loss": 0.2862, |
| "step": 823 |
| }, |
| { |
| "epoch": 1.567284831193533, |
| "grad_norm": 0.36138975620269775, |
| "learning_rate": 9.479834868212131e-05, |
| "loss": 0.2434, |
| "step": 824 |
| }, |
| { |
| "epoch": 1.5691868758915835, |
| "grad_norm": 0.37878358364105225, |
| "learning_rate": 9.479199745951096e-05, |
| "loss": 0.2421, |
| "step": 825 |
| }, |
| { |
| "epoch": 1.571088920589634, |
| "grad_norm": 0.4009093642234802, |
| "learning_rate": 9.47856462369006e-05, |
| "loss": 0.2561, |
| "step": 826 |
| }, |
| { |
| "epoch": 1.5729909652876843, |
| "grad_norm": 0.3085389733314514, |
| "learning_rate": 9.477929501429025e-05, |
| "loss": 0.2293, |
| "step": 827 |
| }, |
| { |
| "epoch": 1.5748930099857348, |
| "grad_norm": 0.48082223534584045, |
| "learning_rate": 9.47729437916799e-05, |
| "loss": 0.3193, |
| "step": 828 |
| }, |
| { |
| "epoch": 1.5767950546837852, |
| "grad_norm": 0.42938464879989624, |
| "learning_rate": 9.476659256906954e-05, |
| "loss": 0.3319, |
| "step": 829 |
| }, |
| { |
| "epoch": 1.5786970993818354, |
| "grad_norm": 0.32788941264152527, |
| "learning_rate": 9.47602413464592e-05, |
| "loss": 0.2432, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.5805991440798859, |
| "grad_norm": 0.38157737255096436, |
| "learning_rate": 9.475389012384885e-05, |
| "loss": 0.3165, |
| "step": 831 |
| }, |
| { |
| "epoch": 1.5825011887779363, |
| "grad_norm": 0.38666632771492004, |
| "learning_rate": 9.47475389012385e-05, |
| "loss": 0.2554, |
| "step": 832 |
| }, |
| { |
| "epoch": 1.5844032334759865, |
| "grad_norm": 0.3475115895271301, |
| "learning_rate": 9.474118767862814e-05, |
| "loss": 0.2679, |
| "step": 833 |
| }, |
| { |
| "epoch": 1.586305278174037, |
| "grad_norm": 0.35684680938720703, |
| "learning_rate": 9.473483645601779e-05, |
| "loss": 0.2574, |
| "step": 834 |
| }, |
| { |
| "epoch": 1.5882073228720874, |
| "grad_norm": 0.5205959677696228, |
| "learning_rate": 9.472848523340744e-05, |
| "loss": 0.3646, |
| "step": 835 |
| }, |
| { |
| "epoch": 1.5901093675701379, |
| "grad_norm": 0.37549740076065063, |
| "learning_rate": 9.472213401079708e-05, |
| "loss": 0.2741, |
| "step": 836 |
| }, |
| { |
| "epoch": 1.5920114122681883, |
| "grad_norm": 0.5251928567886353, |
| "learning_rate": 9.471578278818673e-05, |
| "loss": 0.3799, |
| "step": 837 |
| }, |
| { |
| "epoch": 1.5939134569662388, |
| "grad_norm": 0.42622271180152893, |
| "learning_rate": 9.470943156557638e-05, |
| "loss": 0.2991, |
| "step": 838 |
| }, |
| { |
| "epoch": 1.5958155016642892, |
| "grad_norm": 0.3737063407897949, |
| "learning_rate": 9.470308034296602e-05, |
| "loss": 0.288, |
| "step": 839 |
| }, |
| { |
| "epoch": 1.5977175463623396, |
| "grad_norm": 0.4851538836956024, |
| "learning_rate": 9.469672912035567e-05, |
| "loss": 0.3293, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.5996195910603899, |
| "grad_norm": 0.3662918508052826, |
| "learning_rate": 9.469037789774533e-05, |
| "loss": 0.2338, |
| "step": 841 |
| }, |
| { |
| "epoch": 1.6015216357584403, |
| "grad_norm": 0.3263486325740814, |
| "learning_rate": 9.468402667513496e-05, |
| "loss": 0.2228, |
| "step": 842 |
| }, |
| { |
| "epoch": 1.6034236804564908, |
| "grad_norm": 0.4000779092311859, |
| "learning_rate": 9.467767545252462e-05, |
| "loss": 0.2635, |
| "step": 843 |
| }, |
| { |
| "epoch": 1.605325725154541, |
| "grad_norm": 0.4274492859840393, |
| "learning_rate": 9.467132422991425e-05, |
| "loss": 0.3063, |
| "step": 844 |
| }, |
| { |
| "epoch": 1.6072277698525914, |
| "grad_norm": 0.4486158490180969, |
| "learning_rate": 9.466497300730392e-05, |
| "loss": 0.3039, |
| "step": 845 |
| }, |
| { |
| "epoch": 1.6091298145506419, |
| "grad_norm": 0.48109135031700134, |
| "learning_rate": 9.465862178469356e-05, |
| "loss": 0.3471, |
| "step": 846 |
| }, |
| { |
| "epoch": 1.6110318592486923, |
| "grad_norm": 0.41299277544021606, |
| "learning_rate": 9.46522705620832e-05, |
| "loss": 0.2896, |
| "step": 847 |
| }, |
| { |
| "epoch": 1.6129339039467427, |
| "grad_norm": 0.4177182614803314, |
| "learning_rate": 9.464591933947286e-05, |
| "loss": 0.2519, |
| "step": 848 |
| }, |
| { |
| "epoch": 1.6148359486447932, |
| "grad_norm": 0.36468592286109924, |
| "learning_rate": 9.46395681168625e-05, |
| "loss": 0.275, |
| "step": 849 |
| }, |
| { |
| "epoch": 1.6167379933428436, |
| "grad_norm": 0.33025646209716797, |
| "learning_rate": 9.463321689425215e-05, |
| "loss": 0.234, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.618640038040894, |
| "grad_norm": 0.4377218186855316, |
| "learning_rate": 9.462686567164179e-05, |
| "loss": 0.2939, |
| "step": 851 |
| }, |
| { |
| "epoch": 1.6205420827389445, |
| "grad_norm": 0.34059834480285645, |
| "learning_rate": 9.462051444903144e-05, |
| "loss": 0.2559, |
| "step": 852 |
| }, |
| { |
| "epoch": 1.6224441274369947, |
| "grad_norm": 0.36525094509124756, |
| "learning_rate": 9.46141632264211e-05, |
| "loss": 0.2638, |
| "step": 853 |
| }, |
| { |
| "epoch": 1.6243461721350452, |
| "grad_norm": 0.344927042722702, |
| "learning_rate": 9.460781200381073e-05, |
| "loss": 0.1906, |
| "step": 854 |
| }, |
| { |
| "epoch": 1.6262482168330956, |
| "grad_norm": 0.4097568988800049, |
| "learning_rate": 9.460146078120038e-05, |
| "loss": 0.3143, |
| "step": 855 |
| }, |
| { |
| "epoch": 1.6281502615311458, |
| "grad_norm": 0.32290300726890564, |
| "learning_rate": 9.459510955859004e-05, |
| "loss": 0.2734, |
| "step": 856 |
| }, |
| { |
| "epoch": 1.6300523062291963, |
| "grad_norm": 0.3865107595920563, |
| "learning_rate": 9.458875833597967e-05, |
| "loss": 0.3012, |
| "step": 857 |
| }, |
| { |
| "epoch": 1.6319543509272467, |
| "grad_norm": 0.3034641444683075, |
| "learning_rate": 9.458240711336933e-05, |
| "loss": 0.2164, |
| "step": 858 |
| }, |
| { |
| "epoch": 1.6338563956252972, |
| "grad_norm": 0.3896719217300415, |
| "learning_rate": 9.457605589075898e-05, |
| "loss": 0.2577, |
| "step": 859 |
| }, |
| { |
| "epoch": 1.6357584403233476, |
| "grad_norm": 0.35619622468948364, |
| "learning_rate": 9.456970466814862e-05, |
| "loss": 0.3076, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.637660485021398, |
| "grad_norm": 0.39600345492362976, |
| "learning_rate": 9.456335344553827e-05, |
| "loss": 0.4003, |
| "step": 861 |
| }, |
| { |
| "epoch": 1.6395625297194485, |
| "grad_norm": 0.3511577248573303, |
| "learning_rate": 9.455700222292792e-05, |
| "loss": 0.2603, |
| "step": 862 |
| }, |
| { |
| "epoch": 1.641464574417499, |
| "grad_norm": 0.44329899549484253, |
| "learning_rate": 9.455065100031757e-05, |
| "loss": 0.2921, |
| "step": 863 |
| }, |
| { |
| "epoch": 1.6433666191155494, |
| "grad_norm": 0.3798992931842804, |
| "learning_rate": 9.454429977770721e-05, |
| "loss": 0.2897, |
| "step": 864 |
| }, |
| { |
| "epoch": 1.6452686638135996, |
| "grad_norm": 0.38711193203926086, |
| "learning_rate": 9.453794855509686e-05, |
| "loss": 0.2791, |
| "step": 865 |
| }, |
| { |
| "epoch": 1.64717070851165, |
| "grad_norm": 0.3537624478340149, |
| "learning_rate": 9.453159733248651e-05, |
| "loss": 0.2207, |
| "step": 866 |
| }, |
| { |
| "epoch": 1.6490727532097005, |
| "grad_norm": 0.350455641746521, |
| "learning_rate": 9.452524610987615e-05, |
| "loss": 0.2595, |
| "step": 867 |
| }, |
| { |
| "epoch": 1.6509747979077507, |
| "grad_norm": 0.35781386494636536, |
| "learning_rate": 9.45188948872658e-05, |
| "loss": 0.2618, |
| "step": 868 |
| }, |
| { |
| "epoch": 1.6528768426058011, |
| "grad_norm": 0.4823295772075653, |
| "learning_rate": 9.451254366465546e-05, |
| "loss": 0.3174, |
| "step": 869 |
| }, |
| { |
| "epoch": 1.6547788873038516, |
| "grad_norm": 0.31698495149612427, |
| "learning_rate": 9.45061924420451e-05, |
| "loss": 0.2165, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.656680932001902, |
| "grad_norm": 0.4576948583126068, |
| "learning_rate": 9.449984121943475e-05, |
| "loss": 0.2937, |
| "step": 871 |
| }, |
| { |
| "epoch": 1.6585829766999525, |
| "grad_norm": 0.4196888506412506, |
| "learning_rate": 9.44934899968244e-05, |
| "loss": 0.2876, |
| "step": 872 |
| }, |
| { |
| "epoch": 1.660485021398003, |
| "grad_norm": 0.48588597774505615, |
| "learning_rate": 9.448713877421405e-05, |
| "loss": 0.3433, |
| "step": 873 |
| }, |
| { |
| "epoch": 1.6623870660960534, |
| "grad_norm": 0.427946537733078, |
| "learning_rate": 9.448078755160369e-05, |
| "loss": 0.3184, |
| "step": 874 |
| }, |
| { |
| "epoch": 1.6642891107941038, |
| "grad_norm": 0.4138951897621155, |
| "learning_rate": 9.447443632899333e-05, |
| "loss": 0.2738, |
| "step": 875 |
| }, |
| { |
| "epoch": 1.666191155492154, |
| "grad_norm": 0.36560842394828796, |
| "learning_rate": 9.446808510638299e-05, |
| "loss": 0.3029, |
| "step": 876 |
| }, |
| { |
| "epoch": 1.6680932001902045, |
| "grad_norm": 0.42942315340042114, |
| "learning_rate": 9.446173388377263e-05, |
| "loss": 0.2888, |
| "step": 877 |
| }, |
| { |
| "epoch": 1.669995244888255, |
| "grad_norm": 0.21167854964733124, |
| "learning_rate": 9.445538266116227e-05, |
| "loss": 0.1919, |
| "step": 878 |
| }, |
| { |
| "epoch": 1.6718972895863051, |
| "grad_norm": 0.41339564323425293, |
| "learning_rate": 9.444903143855193e-05, |
| "loss": 0.2482, |
| "step": 879 |
| }, |
| { |
| "epoch": 1.6737993342843556, |
| "grad_norm": 0.47189727425575256, |
| "learning_rate": 9.444268021594157e-05, |
| "loss": 0.328, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.675701378982406, |
| "grad_norm": 0.32868659496307373, |
| "learning_rate": 9.443632899333122e-05, |
| "loss": 0.1985, |
| "step": 881 |
| }, |
| { |
| "epoch": 1.6776034236804565, |
| "grad_norm": 0.3501724898815155, |
| "learning_rate": 9.442997777072086e-05, |
| "loss": 0.2733, |
| "step": 882 |
| }, |
| { |
| "epoch": 1.679505468378507, |
| "grad_norm": 0.37144583463668823, |
| "learning_rate": 9.442362654811051e-05, |
| "loss": 0.2293, |
| "step": 883 |
| }, |
| { |
| "epoch": 1.6814075130765573, |
| "grad_norm": 0.36318424344062805, |
| "learning_rate": 9.441727532550017e-05, |
| "loss": 0.3521, |
| "step": 884 |
| }, |
| { |
| "epoch": 1.6833095577746078, |
| "grad_norm": 0.4295286238193512, |
| "learning_rate": 9.44109241028898e-05, |
| "loss": 0.3113, |
| "step": 885 |
| }, |
| { |
| "epoch": 1.6852116024726582, |
| "grad_norm": 0.3312181830406189, |
| "learning_rate": 9.440457288027947e-05, |
| "loss": 0.2818, |
| "step": 886 |
| }, |
| { |
| "epoch": 1.6871136471707087, |
| "grad_norm": 0.3743634819984436, |
| "learning_rate": 9.439822165766911e-05, |
| "loss": 0.245, |
| "step": 887 |
| }, |
| { |
| "epoch": 1.6890156918687589, |
| "grad_norm": 0.5934861898422241, |
| "learning_rate": 9.439187043505875e-05, |
| "loss": 0.3654, |
| "step": 888 |
| }, |
| { |
| "epoch": 1.6909177365668093, |
| "grad_norm": 0.4149317741394043, |
| "learning_rate": 9.43855192124484e-05, |
| "loss": 0.2584, |
| "step": 889 |
| }, |
| { |
| "epoch": 1.6928197812648598, |
| "grad_norm": 0.40615764260292053, |
| "learning_rate": 9.437916798983805e-05, |
| "loss": 0.2986, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.69472182596291, |
| "grad_norm": 0.37536385655403137, |
| "learning_rate": 9.43728167672277e-05, |
| "loss": 0.2813, |
| "step": 891 |
| }, |
| { |
| "epoch": 1.6966238706609604, |
| "grad_norm": 0.41415923833847046, |
| "learning_rate": 9.436646554461734e-05, |
| "loss": 0.3333, |
| "step": 892 |
| }, |
| { |
| "epoch": 1.6985259153590109, |
| "grad_norm": 0.30747082829475403, |
| "learning_rate": 9.436011432200699e-05, |
| "loss": 0.2143, |
| "step": 893 |
| }, |
| { |
| "epoch": 1.7004279600570613, |
| "grad_norm": 0.44593873620033264, |
| "learning_rate": 9.435376309939664e-05, |
| "loss": 0.2834, |
| "step": 894 |
| }, |
| { |
| "epoch": 1.7023300047551118, |
| "grad_norm": 0.3417704403400421, |
| "learning_rate": 9.434741187678628e-05, |
| "loss": 0.2265, |
| "step": 895 |
| }, |
| { |
| "epoch": 1.7042320494531622, |
| "grad_norm": 0.3436511754989624, |
| "learning_rate": 9.434106065417593e-05, |
| "loss": 0.249, |
| "step": 896 |
| }, |
| { |
| "epoch": 1.7061340941512126, |
| "grad_norm": 0.4569544494152069, |
| "learning_rate": 9.433470943156559e-05, |
| "loss": 0.3271, |
| "step": 897 |
| }, |
| { |
| "epoch": 1.708036138849263, |
| "grad_norm": 0.3883751630783081, |
| "learning_rate": 9.432835820895522e-05, |
| "loss": 0.2673, |
| "step": 898 |
| }, |
| { |
| "epoch": 1.7099381835473135, |
| "grad_norm": 0.3915776014328003, |
| "learning_rate": 9.432200698634488e-05, |
| "loss": 0.2313, |
| "step": 899 |
| }, |
| { |
| "epoch": 1.7118402282453637, |
| "grad_norm": 0.3450072407722473, |
| "learning_rate": 9.431565576373453e-05, |
| "loss": 0.2726, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.7137422729434142, |
| "grad_norm": 0.3894912004470825, |
| "learning_rate": 9.430930454112417e-05, |
| "loss": 0.2607, |
| "step": 901 |
| }, |
| { |
| "epoch": 1.7156443176414644, |
| "grad_norm": 0.3509180545806885, |
| "learning_rate": 9.430295331851382e-05, |
| "loss": 0.2781, |
| "step": 902 |
| }, |
| { |
| "epoch": 1.7175463623395149, |
| "grad_norm": 0.5164948105812073, |
| "learning_rate": 9.429660209590347e-05, |
| "loss": 0.3619, |
| "step": 903 |
| }, |
| { |
| "epoch": 1.7194484070375653, |
| "grad_norm": 0.4074023962020874, |
| "learning_rate": 9.429025087329312e-05, |
| "loss": 0.3116, |
| "step": 904 |
| }, |
| { |
| "epoch": 1.7213504517356157, |
| "grad_norm": 0.4034394323825836, |
| "learning_rate": 9.428389965068276e-05, |
| "loss": 0.3155, |
| "step": 905 |
| }, |
| { |
| "epoch": 1.7232524964336662, |
| "grad_norm": 0.32292982935905457, |
| "learning_rate": 9.427754842807241e-05, |
| "loss": 0.2171, |
| "step": 906 |
| }, |
| { |
| "epoch": 1.7251545411317166, |
| "grad_norm": 0.368856817483902, |
| "learning_rate": 9.427119720546206e-05, |
| "loss": 0.3021, |
| "step": 907 |
| }, |
| { |
| "epoch": 1.727056585829767, |
| "grad_norm": 0.34953123331069946, |
| "learning_rate": 9.42648459828517e-05, |
| "loss": 0.2701, |
| "step": 908 |
| }, |
| { |
| "epoch": 1.7289586305278175, |
| "grad_norm": 0.37510743737220764, |
| "learning_rate": 9.425849476024135e-05, |
| "loss": 0.3216, |
| "step": 909 |
| }, |
| { |
| "epoch": 1.730860675225868, |
| "grad_norm": 0.31331393122673035, |
| "learning_rate": 9.4252143537631e-05, |
| "loss": 0.2855, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.7327627199239182, |
| "grad_norm": 0.3806105852127075, |
| "learning_rate": 9.424579231502064e-05, |
| "loss": 0.3216, |
| "step": 911 |
| }, |
| { |
| "epoch": 1.7346647646219686, |
| "grad_norm": 0.3693408668041229, |
| "learning_rate": 9.42394410924103e-05, |
| "loss": 0.2473, |
| "step": 912 |
| }, |
| { |
| "epoch": 1.736566809320019, |
| "grad_norm": 0.2931939959526062, |
| "learning_rate": 9.423308986979993e-05, |
| "loss": 0.1873, |
| "step": 913 |
| }, |
| { |
| "epoch": 1.7384688540180693, |
| "grad_norm": 0.4330272972583771, |
| "learning_rate": 9.422673864718959e-05, |
| "loss": 0.3078, |
| "step": 914 |
| }, |
| { |
| "epoch": 1.7403708987161197, |
| "grad_norm": 0.4881534278392792, |
| "learning_rate": 9.422038742457924e-05, |
| "loss": 0.3771, |
| "step": 915 |
| }, |
| { |
| "epoch": 1.7422729434141702, |
| "grad_norm": 0.3158344328403473, |
| "learning_rate": 9.421403620196888e-05, |
| "loss": 0.2813, |
| "step": 916 |
| }, |
| { |
| "epoch": 1.7441749881122206, |
| "grad_norm": 0.4482041299343109, |
| "learning_rate": 9.420768497935854e-05, |
| "loss": 0.3872, |
| "step": 917 |
| }, |
| { |
| "epoch": 1.746077032810271, |
| "grad_norm": 0.3493407070636749, |
| "learning_rate": 9.420133375674818e-05, |
| "loss": 0.2284, |
| "step": 918 |
| }, |
| { |
| "epoch": 1.7479790775083215, |
| "grad_norm": 0.3753608763217926, |
| "learning_rate": 9.419498253413782e-05, |
| "loss": 0.254, |
| "step": 919 |
| }, |
| { |
| "epoch": 1.749881122206372, |
| "grad_norm": 0.4550943374633789, |
| "learning_rate": 9.418863131152747e-05, |
| "loss": 0.3073, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.7517831669044224, |
| "grad_norm": 0.3239607810974121, |
| "learning_rate": 9.418228008891712e-05, |
| "loss": 0.2087, |
| "step": 921 |
| }, |
| { |
| "epoch": 1.7536852116024728, |
| "grad_norm": 0.4610382616519928, |
| "learning_rate": 9.417592886630677e-05, |
| "loss": 0.3104, |
| "step": 922 |
| }, |
| { |
| "epoch": 1.755587256300523, |
| "grad_norm": 0.4382965862751007, |
| "learning_rate": 9.416957764369641e-05, |
| "loss": 0.2583, |
| "step": 923 |
| }, |
| { |
| "epoch": 1.7574893009985735, |
| "grad_norm": 0.31299924850463867, |
| "learning_rate": 9.416322642108606e-05, |
| "loss": 0.2033, |
| "step": 924 |
| }, |
| { |
| "epoch": 1.759391345696624, |
| "grad_norm": 0.33872106671333313, |
| "learning_rate": 9.415687519847571e-05, |
| "loss": 0.2366, |
| "step": 925 |
| }, |
| { |
| "epoch": 1.7612933903946741, |
| "grad_norm": 0.33771976828575134, |
| "learning_rate": 9.415052397586535e-05, |
| "loss": 0.3062, |
| "step": 926 |
| }, |
| { |
| "epoch": 1.7631954350927246, |
| "grad_norm": 0.32810178399086, |
| "learning_rate": 9.4144172753255e-05, |
| "loss": 0.2264, |
| "step": 927 |
| }, |
| { |
| "epoch": 1.765097479790775, |
| "grad_norm": 0.41518697142601013, |
| "learning_rate": 9.413782153064466e-05, |
| "loss": 0.2747, |
| "step": 928 |
| }, |
| { |
| "epoch": 1.7669995244888255, |
| "grad_norm": 0.43647775053977966, |
| "learning_rate": 9.41314703080343e-05, |
| "loss": 0.3439, |
| "step": 929 |
| }, |
| { |
| "epoch": 1.768901569186876, |
| "grad_norm": 0.2905902564525604, |
| "learning_rate": 9.412511908542395e-05, |
| "loss": 0.2327, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.7708036138849264, |
| "grad_norm": 0.38527336716651917, |
| "learning_rate": 9.41187678628136e-05, |
| "loss": 0.264, |
| "step": 931 |
| }, |
| { |
| "epoch": 1.7727056585829768, |
| "grad_norm": 0.4135185182094574, |
| "learning_rate": 9.411241664020324e-05, |
| "loss": 0.3075, |
| "step": 932 |
| }, |
| { |
| "epoch": 1.7746077032810272, |
| "grad_norm": 0.30278775095939636, |
| "learning_rate": 9.410606541759289e-05, |
| "loss": 0.1831, |
| "step": 933 |
| }, |
| { |
| "epoch": 1.7765097479790775, |
| "grad_norm": 0.3687085509300232, |
| "learning_rate": 9.409971419498254e-05, |
| "loss": 0.2862, |
| "step": 934 |
| }, |
| { |
| "epoch": 1.778411792677128, |
| "grad_norm": 0.3217594623565674, |
| "learning_rate": 9.409336297237219e-05, |
| "loss": 0.1975, |
| "step": 935 |
| }, |
| { |
| "epoch": 1.7803138373751783, |
| "grad_norm": 0.3583223223686218, |
| "learning_rate": 9.408701174976183e-05, |
| "loss": 0.2345, |
| "step": 936 |
| }, |
| { |
| "epoch": 1.7822158820732286, |
| "grad_norm": 0.4119435250759125, |
| "learning_rate": 9.408066052715148e-05, |
| "loss": 0.2916, |
| "step": 937 |
| }, |
| { |
| "epoch": 1.784117926771279, |
| "grad_norm": 0.400728315114975, |
| "learning_rate": 9.407430930454113e-05, |
| "loss": 0.4505, |
| "step": 938 |
| }, |
| { |
| "epoch": 1.7860199714693294, |
| "grad_norm": 0.3988611698150635, |
| "learning_rate": 9.406795808193077e-05, |
| "loss": 0.286, |
| "step": 939 |
| }, |
| { |
| "epoch": 1.78792201616738, |
| "grad_norm": 0.4544796347618103, |
| "learning_rate": 9.406160685932042e-05, |
| "loss": 0.3268, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.7898240608654303, |
| "grad_norm": 0.3785744905471802, |
| "learning_rate": 9.405525563671008e-05, |
| "loss": 0.2532, |
| "step": 941 |
| }, |
| { |
| "epoch": 1.7917261055634808, |
| "grad_norm": 0.4459128975868225, |
| "learning_rate": 9.404890441409971e-05, |
| "loss": 0.3348, |
| "step": 942 |
| }, |
| { |
| "epoch": 1.7936281502615312, |
| "grad_norm": 0.3253449499607086, |
| "learning_rate": 9.404255319148937e-05, |
| "loss": 0.1945, |
| "step": 943 |
| }, |
| { |
| "epoch": 1.7955301949595817, |
| "grad_norm": 0.4977390468120575, |
| "learning_rate": 9.403620196887902e-05, |
| "loss": 0.3, |
| "step": 944 |
| }, |
| { |
| "epoch": 1.797432239657632, |
| "grad_norm": 0.46191859245300293, |
| "learning_rate": 9.402985074626867e-05, |
| "loss": 0.3638, |
| "step": 945 |
| }, |
| { |
| "epoch": 1.7993342843556823, |
| "grad_norm": 0.38492342829704285, |
| "learning_rate": 9.402349952365831e-05, |
| "loss": 0.2566, |
| "step": 946 |
| }, |
| { |
| "epoch": 1.8012363290537328, |
| "grad_norm": 0.34863540530204773, |
| "learning_rate": 9.401714830104795e-05, |
| "loss": 0.2321, |
| "step": 947 |
| }, |
| { |
| "epoch": 1.8031383737517832, |
| "grad_norm": 0.3839346766471863, |
| "learning_rate": 9.401079707843761e-05, |
| "loss": 0.2751, |
| "step": 948 |
| }, |
| { |
| "epoch": 1.8050404184498334, |
| "grad_norm": 0.36121171712875366, |
| "learning_rate": 9.400444585582725e-05, |
| "loss": 0.2492, |
| "step": 949 |
| }, |
| { |
| "epoch": 1.8069424631478839, |
| "grad_norm": 0.3479311466217041, |
| "learning_rate": 9.399809463321689e-05, |
| "loss": 0.2436, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.8088445078459343, |
| "grad_norm": 0.35279884934425354, |
| "learning_rate": 9.399174341060655e-05, |
| "loss": 0.2718, |
| "step": 951 |
| }, |
| { |
| "epoch": 1.8107465525439848, |
| "grad_norm": 0.43152448534965515, |
| "learning_rate": 9.398539218799619e-05, |
| "loss": 0.2739, |
| "step": 952 |
| }, |
| { |
| "epoch": 1.8126485972420352, |
| "grad_norm": 0.3631283938884735, |
| "learning_rate": 9.397904096538584e-05, |
| "loss": 0.2239, |
| "step": 953 |
| }, |
| { |
| "epoch": 1.8145506419400856, |
| "grad_norm": 0.4698762595653534, |
| "learning_rate": 9.397268974277548e-05, |
| "loss": 0.3247, |
| "step": 954 |
| }, |
| { |
| "epoch": 1.816452686638136, |
| "grad_norm": 0.36629432439804077, |
| "learning_rate": 9.396633852016513e-05, |
| "loss": 0.2778, |
| "step": 955 |
| }, |
| { |
| "epoch": 1.8183547313361865, |
| "grad_norm": 0.34220409393310547, |
| "learning_rate": 9.395998729755479e-05, |
| "loss": 0.2466, |
| "step": 956 |
| }, |
| { |
| "epoch": 1.820256776034237, |
| "grad_norm": 0.3768969178199768, |
| "learning_rate": 9.395363607494442e-05, |
| "loss": 0.334, |
| "step": 957 |
| }, |
| { |
| "epoch": 1.8221588207322872, |
| "grad_norm": 0.2891027629375458, |
| "learning_rate": 9.394728485233409e-05, |
| "loss": 0.206, |
| "step": 958 |
| }, |
| { |
| "epoch": 1.8240608654303376, |
| "grad_norm": 0.2802363634109497, |
| "learning_rate": 9.394093362972373e-05, |
| "loss": 0.2566, |
| "step": 959 |
| }, |
| { |
| "epoch": 1.825962910128388, |
| "grad_norm": 0.38722601532936096, |
| "learning_rate": 9.393458240711337e-05, |
| "loss": 0.2615, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.8278649548264383, |
| "grad_norm": 0.45663881301879883, |
| "learning_rate": 9.392823118450302e-05, |
| "loss": 0.3521, |
| "step": 961 |
| }, |
| { |
| "epoch": 1.8297669995244887, |
| "grad_norm": 0.36096152663230896, |
| "learning_rate": 9.392187996189267e-05, |
| "loss": 0.2429, |
| "step": 962 |
| }, |
| { |
| "epoch": 1.8316690442225392, |
| "grad_norm": 0.3237638473510742, |
| "learning_rate": 9.391552873928232e-05, |
| "loss": 0.2874, |
| "step": 963 |
| }, |
| { |
| "epoch": 1.8335710889205896, |
| "grad_norm": 0.379863440990448, |
| "learning_rate": 9.390917751667196e-05, |
| "loss": 0.2504, |
| "step": 964 |
| }, |
| { |
| "epoch": 1.83547313361864, |
| "grad_norm": 0.40816691517829895, |
| "learning_rate": 9.390282629406161e-05, |
| "loss": 0.2614, |
| "step": 965 |
| }, |
| { |
| "epoch": 1.8373751783166905, |
| "grad_norm": 0.38382720947265625, |
| "learning_rate": 9.389647507145126e-05, |
| "loss": 0.2282, |
| "step": 966 |
| }, |
| { |
| "epoch": 1.839277223014741, |
| "grad_norm": 0.328861266374588, |
| "learning_rate": 9.38901238488409e-05, |
| "loss": 0.1763, |
| "step": 967 |
| }, |
| { |
| "epoch": 1.8411792677127914, |
| "grad_norm": 0.3471934497356415, |
| "learning_rate": 9.388377262623055e-05, |
| "loss": 0.2348, |
| "step": 968 |
| }, |
| { |
| "epoch": 1.8430813124108416, |
| "grad_norm": 0.44112637639045715, |
| "learning_rate": 9.38774214036202e-05, |
| "loss": 0.3496, |
| "step": 969 |
| }, |
| { |
| "epoch": 1.844983357108892, |
| "grad_norm": 0.4357364773750305, |
| "learning_rate": 9.387107018100984e-05, |
| "loss": 0.2832, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.8468854018069425, |
| "grad_norm": 0.4502738118171692, |
| "learning_rate": 9.38647189583995e-05, |
| "loss": 0.2862, |
| "step": 971 |
| }, |
| { |
| "epoch": 1.8487874465049927, |
| "grad_norm": 0.3577602505683899, |
| "learning_rate": 9.385836773578915e-05, |
| "loss": 0.2019, |
| "step": 972 |
| }, |
| { |
| "epoch": 1.8506894912030432, |
| "grad_norm": 0.36250707507133484, |
| "learning_rate": 9.385201651317879e-05, |
| "loss": 0.2936, |
| "step": 973 |
| }, |
| { |
| "epoch": 1.8525915359010936, |
| "grad_norm": 0.44027233123779297, |
| "learning_rate": 9.384566529056844e-05, |
| "loss": 0.3004, |
| "step": 974 |
| }, |
| { |
| "epoch": 1.854493580599144, |
| "grad_norm": 0.4500497877597809, |
| "learning_rate": 9.383931406795809e-05, |
| "loss": 0.3, |
| "step": 975 |
| }, |
| { |
| "epoch": 1.8563956252971945, |
| "grad_norm": 0.3777524530887604, |
| "learning_rate": 9.383296284534774e-05, |
| "loss": 0.2535, |
| "step": 976 |
| }, |
| { |
| "epoch": 1.858297669995245, |
| "grad_norm": 0.3377416431903839, |
| "learning_rate": 9.382661162273738e-05, |
| "loss": 0.2767, |
| "step": 977 |
| }, |
| { |
| "epoch": 1.8601997146932954, |
| "grad_norm": 0.34563374519348145, |
| "learning_rate": 9.382026040012702e-05, |
| "loss": 0.1923, |
| "step": 978 |
| }, |
| { |
| "epoch": 1.8621017593913458, |
| "grad_norm": 0.3025479018688202, |
| "learning_rate": 9.381390917751668e-05, |
| "loss": 0.2214, |
| "step": 979 |
| }, |
| { |
| "epoch": 1.8640038040893963, |
| "grad_norm": 0.3614577054977417, |
| "learning_rate": 9.380755795490632e-05, |
| "loss": 0.299, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.8659058487874465, |
| "grad_norm": 0.34508028626441956, |
| "learning_rate": 9.380120673229597e-05, |
| "loss": 0.2201, |
| "step": 981 |
| }, |
| { |
| "epoch": 1.867807893485497, |
| "grad_norm": 0.33169567584991455, |
| "learning_rate": 9.379485550968563e-05, |
| "loss": 0.2298, |
| "step": 982 |
| }, |
| { |
| "epoch": 1.8697099381835474, |
| "grad_norm": 0.4361656904220581, |
| "learning_rate": 9.378850428707526e-05, |
| "loss": 0.3109, |
| "step": 983 |
| }, |
| { |
| "epoch": 1.8716119828815976, |
| "grad_norm": 0.3832654654979706, |
| "learning_rate": 9.378215306446492e-05, |
| "loss": 0.2877, |
| "step": 984 |
| }, |
| { |
| "epoch": 1.873514027579648, |
| "grad_norm": 0.3991541862487793, |
| "learning_rate": 9.377580184185455e-05, |
| "loss": 0.2755, |
| "step": 985 |
| }, |
| { |
| "epoch": 1.8754160722776985, |
| "grad_norm": 0.6057716012001038, |
| "learning_rate": 9.37694506192442e-05, |
| "loss": 0.3665, |
| "step": 986 |
| }, |
| { |
| "epoch": 1.877318116975749, |
| "grad_norm": 0.2887308895587921, |
| "learning_rate": 9.376309939663386e-05, |
| "loss": 0.2414, |
| "step": 987 |
| }, |
| { |
| "epoch": 1.8792201616737993, |
| "grad_norm": 0.28379005193710327, |
| "learning_rate": 9.37567481740235e-05, |
| "loss": 0.1895, |
| "step": 988 |
| }, |
| { |
| "epoch": 1.8811222063718498, |
| "grad_norm": 0.36071258783340454, |
| "learning_rate": 9.375039695141316e-05, |
| "loss": 0.2855, |
| "step": 989 |
| }, |
| { |
| "epoch": 1.8830242510699002, |
| "grad_norm": 0.3872823119163513, |
| "learning_rate": 9.37440457288028e-05, |
| "loss": 0.3112, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.8849262957679507, |
| "grad_norm": 0.3761101961135864, |
| "learning_rate": 9.373769450619244e-05, |
| "loss": 0.2291, |
| "step": 991 |
| }, |
| { |
| "epoch": 1.886828340466001, |
| "grad_norm": 0.404000461101532, |
| "learning_rate": 9.373134328358209e-05, |
| "loss": 0.2349, |
| "step": 992 |
| }, |
| { |
| "epoch": 1.8887303851640513, |
| "grad_norm": 0.4787864089012146, |
| "learning_rate": 9.372499206097174e-05, |
| "loss": 0.3447, |
| "step": 993 |
| }, |
| { |
| "epoch": 1.8906324298621018, |
| "grad_norm": 0.4898964762687683, |
| "learning_rate": 9.37186408383614e-05, |
| "loss": 0.3306, |
| "step": 994 |
| }, |
| { |
| "epoch": 1.892534474560152, |
| "grad_norm": 0.3915330767631531, |
| "learning_rate": 9.371228961575103e-05, |
| "loss": 0.2896, |
| "step": 995 |
| }, |
| { |
| "epoch": 1.8944365192582024, |
| "grad_norm": 0.4643494486808777, |
| "learning_rate": 9.370593839314068e-05, |
| "loss": 0.3131, |
| "step": 996 |
| }, |
| { |
| "epoch": 1.8963385639562529, |
| "grad_norm": 0.39880135655403137, |
| "learning_rate": 9.369958717053034e-05, |
| "loss": 0.2598, |
| "step": 997 |
| }, |
| { |
| "epoch": 1.8982406086543033, |
| "grad_norm": 0.3153114318847656, |
| "learning_rate": 9.369323594791997e-05, |
| "loss": 0.2429, |
| "step": 998 |
| }, |
| { |
| "epoch": 1.9001426533523538, |
| "grad_norm": 0.4997500479221344, |
| "learning_rate": 9.368688472530963e-05, |
| "loss": 0.4179, |
| "step": 999 |
| }, |
| { |
| "epoch": 1.9020446980504042, |
| "grad_norm": 0.3919009566307068, |
| "learning_rate": 9.368053350269928e-05, |
| "loss": 0.2468, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.9039467427484547, |
| "grad_norm": 0.48444265127182007, |
| "learning_rate": 9.367418228008892e-05, |
| "loss": 0.3191, |
| "step": 1001 |
| }, |
| { |
| "epoch": 1.905848787446505, |
| "grad_norm": 0.38168856501579285, |
| "learning_rate": 9.366783105747857e-05, |
| "loss": 0.2658, |
| "step": 1002 |
| }, |
| { |
| "epoch": 1.9077508321445555, |
| "grad_norm": 0.47058162093162537, |
| "learning_rate": 9.366147983486822e-05, |
| "loss": 0.3392, |
| "step": 1003 |
| }, |
| { |
| "epoch": 1.9096528768426058, |
| "grad_norm": 0.40145471692085266, |
| "learning_rate": 9.365512861225786e-05, |
| "loss": 0.2619, |
| "step": 1004 |
| }, |
| { |
| "epoch": 1.9115549215406562, |
| "grad_norm": 0.6980530619621277, |
| "learning_rate": 9.364877738964751e-05, |
| "loss": 0.3111, |
| "step": 1005 |
| }, |
| { |
| "epoch": 1.9134569662387066, |
| "grad_norm": 0.35878410935401917, |
| "learning_rate": 9.364242616703716e-05, |
| "loss": 0.3026, |
| "step": 1006 |
| }, |
| { |
| "epoch": 1.9153590109367569, |
| "grad_norm": 0.3291071653366089, |
| "learning_rate": 9.363607494442681e-05, |
| "loss": 0.2813, |
| "step": 1007 |
| }, |
| { |
| "epoch": 1.9172610556348073, |
| "grad_norm": 0.4286592900753021, |
| "learning_rate": 9.362972372181645e-05, |
| "loss": 0.2921, |
| "step": 1008 |
| }, |
| { |
| "epoch": 1.9191631003328578, |
| "grad_norm": 0.2965177893638611, |
| "learning_rate": 9.36233724992061e-05, |
| "loss": 0.2373, |
| "step": 1009 |
| }, |
| { |
| "epoch": 1.9210651450309082, |
| "grad_norm": 0.3153838515281677, |
| "learning_rate": 9.361702127659576e-05, |
| "loss": 0.2195, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.9229671897289586, |
| "grad_norm": 0.4827108085155487, |
| "learning_rate": 9.36106700539854e-05, |
| "loss": 0.3127, |
| "step": 1011 |
| }, |
| { |
| "epoch": 1.924869234427009, |
| "grad_norm": 0.43089860677719116, |
| "learning_rate": 9.360431883137505e-05, |
| "loss": 0.2687, |
| "step": 1012 |
| }, |
| { |
| "epoch": 1.9267712791250595, |
| "grad_norm": 0.43147915601730347, |
| "learning_rate": 9.35979676087647e-05, |
| "loss": 0.3953, |
| "step": 1013 |
| }, |
| { |
| "epoch": 1.92867332382311, |
| "grad_norm": 0.37924453616142273, |
| "learning_rate": 9.359161638615434e-05, |
| "loss": 0.2522, |
| "step": 1014 |
| }, |
| { |
| "epoch": 1.9305753685211604, |
| "grad_norm": 0.34664931893348694, |
| "learning_rate": 9.358526516354399e-05, |
| "loss": 0.2048, |
| "step": 1015 |
| }, |
| { |
| "epoch": 1.9324774132192106, |
| "grad_norm": 0.2877664566040039, |
| "learning_rate": 9.357891394093364e-05, |
| "loss": 0.1794, |
| "step": 1016 |
| }, |
| { |
| "epoch": 1.934379457917261, |
| "grad_norm": 0.4924784302711487, |
| "learning_rate": 9.357256271832329e-05, |
| "loss": 0.2737, |
| "step": 1017 |
| }, |
| { |
| "epoch": 1.9362815026153115, |
| "grad_norm": 0.36828553676605225, |
| "learning_rate": 9.356621149571293e-05, |
| "loss": 0.2761, |
| "step": 1018 |
| }, |
| { |
| "epoch": 1.9381835473133617, |
| "grad_norm": 0.355372816324234, |
| "learning_rate": 9.355986027310257e-05, |
| "loss": 0.2647, |
| "step": 1019 |
| }, |
| { |
| "epoch": 1.9400855920114122, |
| "grad_norm": 0.37469297647476196, |
| "learning_rate": 9.355350905049223e-05, |
| "loss": 0.2347, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.9419876367094626, |
| "grad_norm": 0.44890064001083374, |
| "learning_rate": 9.354715782788187e-05, |
| "loss": 0.2581, |
| "step": 1021 |
| }, |
| { |
| "epoch": 1.943889681407513, |
| "grad_norm": 0.355234295129776, |
| "learning_rate": 9.354080660527151e-05, |
| "loss": 0.2467, |
| "step": 1022 |
| }, |
| { |
| "epoch": 1.9457917261055635, |
| "grad_norm": 0.463871568441391, |
| "learning_rate": 9.353445538266116e-05, |
| "loss": 0.2338, |
| "step": 1023 |
| }, |
| { |
| "epoch": 1.947693770803614, |
| "grad_norm": 0.38206830620765686, |
| "learning_rate": 9.352810416005081e-05, |
| "loss": 0.2353, |
| "step": 1024 |
| }, |
| { |
| "epoch": 1.9495958155016644, |
| "grad_norm": 0.37627413868904114, |
| "learning_rate": 9.352175293744047e-05, |
| "loss": 0.2375, |
| "step": 1025 |
| }, |
| { |
| "epoch": 1.9514978601997148, |
| "grad_norm": 0.4191925823688507, |
| "learning_rate": 9.35154017148301e-05, |
| "loss": 0.2444, |
| "step": 1026 |
| }, |
| { |
| "epoch": 1.953399904897765, |
| "grad_norm": 0.41149812936782837, |
| "learning_rate": 9.350905049221976e-05, |
| "loss": 0.2905, |
| "step": 1027 |
| }, |
| { |
| "epoch": 1.9553019495958155, |
| "grad_norm": 0.329313725233078, |
| "learning_rate": 9.350269926960941e-05, |
| "loss": 0.2293, |
| "step": 1028 |
| }, |
| { |
| "epoch": 1.957203994293866, |
| "grad_norm": 0.4160427749156952, |
| "learning_rate": 9.349634804699905e-05, |
| "loss": 0.2512, |
| "step": 1029 |
| }, |
| { |
| "epoch": 1.9591060389919162, |
| "grad_norm": 0.4005848467350006, |
| "learning_rate": 9.34899968243887e-05, |
| "loss": 0.2446, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.9610080836899666, |
| "grad_norm": 0.4497627019882202, |
| "learning_rate": 9.348364560177835e-05, |
| "loss": 0.3265, |
| "step": 1031 |
| }, |
| { |
| "epoch": 1.962910128388017, |
| "grad_norm": 0.4275449216365814, |
| "learning_rate": 9.347729437916799e-05, |
| "loss": 0.302, |
| "step": 1032 |
| }, |
| { |
| "epoch": 1.9648121730860675, |
| "grad_norm": 0.33947649598121643, |
| "learning_rate": 9.347094315655764e-05, |
| "loss": 0.1903, |
| "step": 1033 |
| }, |
| { |
| "epoch": 1.966714217784118, |
| "grad_norm": 0.38422051072120667, |
| "learning_rate": 9.346459193394729e-05, |
| "loss": 0.2595, |
| "step": 1034 |
| }, |
| { |
| "epoch": 1.9686162624821684, |
| "grad_norm": 0.35371389985084534, |
| "learning_rate": 9.345824071133694e-05, |
| "loss": 0.2284, |
| "step": 1035 |
| }, |
| { |
| "epoch": 1.9705183071802188, |
| "grad_norm": 0.38803884387016296, |
| "learning_rate": 9.345188948872658e-05, |
| "loss": 0.3021, |
| "step": 1036 |
| }, |
| { |
| "epoch": 1.9724203518782693, |
| "grad_norm": 0.38203269243240356, |
| "learning_rate": 9.344553826611623e-05, |
| "loss": 0.2863, |
| "step": 1037 |
| }, |
| { |
| "epoch": 1.9743223965763197, |
| "grad_norm": 0.3267860412597656, |
| "learning_rate": 9.343918704350588e-05, |
| "loss": 0.226, |
| "step": 1038 |
| }, |
| { |
| "epoch": 1.97622444127437, |
| "grad_norm": 0.39556884765625, |
| "learning_rate": 9.343283582089552e-05, |
| "loss": 0.2727, |
| "step": 1039 |
| }, |
| { |
| "epoch": 1.9781264859724204, |
| "grad_norm": 0.4278768301010132, |
| "learning_rate": 9.342648459828517e-05, |
| "loss": 0.2723, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.9800285306704708, |
| "grad_norm": 0.37279701232910156, |
| "learning_rate": 9.342013337567483e-05, |
| "loss": 0.2685, |
| "step": 1041 |
| }, |
| { |
| "epoch": 1.981930575368521, |
| "grad_norm": 0.4421425759792328, |
| "learning_rate": 9.341378215306447e-05, |
| "loss": 0.2793, |
| "step": 1042 |
| }, |
| { |
| "epoch": 1.9838326200665715, |
| "grad_norm": 0.4341887831687927, |
| "learning_rate": 9.340743093045412e-05, |
| "loss": 0.2752, |
| "step": 1043 |
| }, |
| { |
| "epoch": 1.985734664764622, |
| "grad_norm": 0.42935600876808167, |
| "learning_rate": 9.340107970784377e-05, |
| "loss": 0.3127, |
| "step": 1044 |
| }, |
| { |
| "epoch": 1.9876367094626723, |
| "grad_norm": 0.29476839303970337, |
| "learning_rate": 9.339472848523341e-05, |
| "loss": 0.1855, |
| "step": 1045 |
| }, |
| { |
| "epoch": 1.9895387541607228, |
| "grad_norm": 0.43286338448524475, |
| "learning_rate": 9.338837726262306e-05, |
| "loss": 0.3109, |
| "step": 1046 |
| }, |
| { |
| "epoch": 1.9914407988587732, |
| "grad_norm": 0.35097062587738037, |
| "learning_rate": 9.338202604001271e-05, |
| "loss": 0.2178, |
| "step": 1047 |
| }, |
| { |
| "epoch": 1.9933428435568237, |
| "grad_norm": 0.3497145175933838, |
| "learning_rate": 9.337567481740236e-05, |
| "loss": 0.2372, |
| "step": 1048 |
| }, |
| { |
| "epoch": 1.9952448882548741, |
| "grad_norm": 0.4399060904979706, |
| "learning_rate": 9.3369323594792e-05, |
| "loss": 0.3065, |
| "step": 1049 |
| }, |
| { |
| "epoch": 1.9971469329529246, |
| "grad_norm": 0.43642693758010864, |
| "learning_rate": 9.336297237218164e-05, |
| "loss": 0.3099, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.9990489776509748, |
| "grad_norm": 0.42969372868537903, |
| "learning_rate": 9.33566211495713e-05, |
| "loss": 0.2899, |
| "step": 1051 |
| }, |
| { |
| "epoch": 2.000951022349025, |
| "grad_norm": 0.324709951877594, |
| "learning_rate": 9.335026992696094e-05, |
| "loss": 0.1977, |
| "step": 1052 |
| }, |
| { |
| "epoch": 2.0028530670470754, |
| "grad_norm": 0.2254759967327118, |
| "learning_rate": 9.33439187043506e-05, |
| "loss": 0.1513, |
| "step": 1053 |
| }, |
| { |
| "epoch": 2.004755111745126, |
| "grad_norm": 0.29324305057525635, |
| "learning_rate": 9.333756748174025e-05, |
| "loss": 0.1739, |
| "step": 1054 |
| }, |
| { |
| "epoch": 2.0066571564431763, |
| "grad_norm": 0.2934301495552063, |
| "learning_rate": 9.333121625912988e-05, |
| "loss": 0.1788, |
| "step": 1055 |
| }, |
| { |
| "epoch": 2.0085592011412268, |
| "grad_norm": 0.3355758786201477, |
| "learning_rate": 9.332486503651954e-05, |
| "loss": 0.1829, |
| "step": 1056 |
| }, |
| { |
| "epoch": 2.010461245839277, |
| "grad_norm": 0.4047424793243408, |
| "learning_rate": 9.331851381390917e-05, |
| "loss": 0.2256, |
| "step": 1057 |
| }, |
| { |
| "epoch": 2.0123632905373277, |
| "grad_norm": 0.38155117630958557, |
| "learning_rate": 9.331216259129883e-05, |
| "loss": 0.1992, |
| "step": 1058 |
| }, |
| { |
| "epoch": 2.014265335235378, |
| "grad_norm": 0.4122423827648163, |
| "learning_rate": 9.330581136868848e-05, |
| "loss": 0.2222, |
| "step": 1059 |
| }, |
| { |
| "epoch": 2.0161673799334285, |
| "grad_norm": 0.4098420739173889, |
| "learning_rate": 9.329946014607812e-05, |
| "loss": 0.1495, |
| "step": 1060 |
| }, |
| { |
| "epoch": 2.018069424631479, |
| "grad_norm": 0.37494683265686035, |
| "learning_rate": 9.329310892346778e-05, |
| "loss": 0.1955, |
| "step": 1061 |
| }, |
| { |
| "epoch": 2.0199714693295294, |
| "grad_norm": 0.4210919439792633, |
| "learning_rate": 9.328675770085742e-05, |
| "loss": 0.1851, |
| "step": 1062 |
| }, |
| { |
| "epoch": 2.02187351402758, |
| "grad_norm": 0.415770560503006, |
| "learning_rate": 9.328040647824706e-05, |
| "loss": 0.209, |
| "step": 1063 |
| }, |
| { |
| "epoch": 2.02377555872563, |
| "grad_norm": 0.38957807421684265, |
| "learning_rate": 9.327405525563671e-05, |
| "loss": 0.1597, |
| "step": 1064 |
| }, |
| { |
| "epoch": 2.0256776034236803, |
| "grad_norm": 0.3568849563598633, |
| "learning_rate": 9.326770403302636e-05, |
| "loss": 0.1564, |
| "step": 1065 |
| }, |
| { |
| "epoch": 2.0275796481217307, |
| "grad_norm": 0.4151419699192047, |
| "learning_rate": 9.326135281041601e-05, |
| "loss": 0.2213, |
| "step": 1066 |
| }, |
| { |
| "epoch": 2.029481692819781, |
| "grad_norm": 0.437418669462204, |
| "learning_rate": 9.325500158780565e-05, |
| "loss": 0.2091, |
| "step": 1067 |
| }, |
| { |
| "epoch": 2.0313837375178316, |
| "grad_norm": 0.45977523922920227, |
| "learning_rate": 9.32486503651953e-05, |
| "loss": 0.2044, |
| "step": 1068 |
| }, |
| { |
| "epoch": 2.033285782215882, |
| "grad_norm": 0.3634967803955078, |
| "learning_rate": 9.324229914258496e-05, |
| "loss": 0.1575, |
| "step": 1069 |
| }, |
| { |
| "epoch": 2.0351878269139325, |
| "grad_norm": 0.4348776638507843, |
| "learning_rate": 9.32359479199746e-05, |
| "loss": 0.1892, |
| "step": 1070 |
| }, |
| { |
| "epoch": 2.037089871611983, |
| "grad_norm": 0.39220520853996277, |
| "learning_rate": 9.322959669736425e-05, |
| "loss": 0.1962, |
| "step": 1071 |
| }, |
| { |
| "epoch": 2.0389919163100334, |
| "grad_norm": 0.4379669725894928, |
| "learning_rate": 9.32232454747539e-05, |
| "loss": 0.2201, |
| "step": 1072 |
| }, |
| { |
| "epoch": 2.040893961008084, |
| "grad_norm": 0.31880828738212585, |
| "learning_rate": 9.321689425214354e-05, |
| "loss": 0.1471, |
| "step": 1073 |
| }, |
| { |
| "epoch": 2.0427960057061343, |
| "grad_norm": 0.31966346502304077, |
| "learning_rate": 9.321054302953319e-05, |
| "loss": 0.1688, |
| "step": 1074 |
| }, |
| { |
| "epoch": 2.0446980504041843, |
| "grad_norm": 0.38291382789611816, |
| "learning_rate": 9.320419180692284e-05, |
| "loss": 0.1797, |
| "step": 1075 |
| }, |
| { |
| "epoch": 2.0466000951022347, |
| "grad_norm": 0.3871828615665436, |
| "learning_rate": 9.319784058431248e-05, |
| "loss": 0.2201, |
| "step": 1076 |
| }, |
| { |
| "epoch": 2.048502139800285, |
| "grad_norm": 0.35201162099838257, |
| "learning_rate": 9.319148936170213e-05, |
| "loss": 0.1759, |
| "step": 1077 |
| }, |
| { |
| "epoch": 2.0504041844983356, |
| "grad_norm": 0.32999902963638306, |
| "learning_rate": 9.318513813909178e-05, |
| "loss": 0.1676, |
| "step": 1078 |
| }, |
| { |
| "epoch": 2.052306229196386, |
| "grad_norm": 0.38137802481651306, |
| "learning_rate": 9.317878691648143e-05, |
| "loss": 0.181, |
| "step": 1079 |
| }, |
| { |
| "epoch": 2.0542082738944365, |
| "grad_norm": 0.28507858514785767, |
| "learning_rate": 9.317243569387107e-05, |
| "loss": 0.1333, |
| "step": 1080 |
| }, |
| { |
| "epoch": 2.056110318592487, |
| "grad_norm": 0.511489987373352, |
| "learning_rate": 9.316608447126071e-05, |
| "loss": 0.271, |
| "step": 1081 |
| }, |
| { |
| "epoch": 2.0580123632905374, |
| "grad_norm": 0.37042170763015747, |
| "learning_rate": 9.315973324865038e-05, |
| "loss": 0.2733, |
| "step": 1082 |
| }, |
| { |
| "epoch": 2.059914407988588, |
| "grad_norm": 0.3986508548259735, |
| "learning_rate": 9.315338202604001e-05, |
| "loss": 0.1964, |
| "step": 1083 |
| }, |
| { |
| "epoch": 2.0618164526866383, |
| "grad_norm": 0.37804266810417175, |
| "learning_rate": 9.314703080342967e-05, |
| "loss": 0.1601, |
| "step": 1084 |
| }, |
| { |
| "epoch": 2.0637184973846887, |
| "grad_norm": 0.32077136635780334, |
| "learning_rate": 9.314067958081932e-05, |
| "loss": 0.1462, |
| "step": 1085 |
| }, |
| { |
| "epoch": 2.065620542082739, |
| "grad_norm": 0.2813294231891632, |
| "learning_rate": 9.313432835820896e-05, |
| "loss": 0.1321, |
| "step": 1086 |
| }, |
| { |
| "epoch": 2.067522586780789, |
| "grad_norm": 0.40840163826942444, |
| "learning_rate": 9.312797713559861e-05, |
| "loss": 0.1892, |
| "step": 1087 |
| }, |
| { |
| "epoch": 2.0694246314788396, |
| "grad_norm": 0.3264133334159851, |
| "learning_rate": 9.312162591298825e-05, |
| "loss": 0.1415, |
| "step": 1088 |
| }, |
| { |
| "epoch": 2.07132667617689, |
| "grad_norm": 0.4274674952030182, |
| "learning_rate": 9.311527469037791e-05, |
| "loss": 0.1813, |
| "step": 1089 |
| }, |
| { |
| "epoch": 2.0732287208749405, |
| "grad_norm": 0.37283292412757874, |
| "learning_rate": 9.310892346776755e-05, |
| "loss": 0.1753, |
| "step": 1090 |
| }, |
| { |
| "epoch": 2.075130765572991, |
| "grad_norm": 0.32638901472091675, |
| "learning_rate": 9.310257224515719e-05, |
| "loss": 0.1731, |
| "step": 1091 |
| }, |
| { |
| "epoch": 2.0770328102710414, |
| "grad_norm": 0.3295043408870697, |
| "learning_rate": 9.309622102254685e-05, |
| "loss": 0.1934, |
| "step": 1092 |
| }, |
| { |
| "epoch": 2.078934854969092, |
| "grad_norm": 0.34605681896209717, |
| "learning_rate": 9.308986979993649e-05, |
| "loss": 0.2556, |
| "step": 1093 |
| }, |
| { |
| "epoch": 2.0808368996671422, |
| "grad_norm": 0.35646018385887146, |
| "learning_rate": 9.308351857732613e-05, |
| "loss": 0.1508, |
| "step": 1094 |
| }, |
| { |
| "epoch": 2.0827389443651927, |
| "grad_norm": 0.3224691152572632, |
| "learning_rate": 9.307716735471578e-05, |
| "loss": 0.1592, |
| "step": 1095 |
| }, |
| { |
| "epoch": 2.084640989063243, |
| "grad_norm": 0.3692566156387329, |
| "learning_rate": 9.307081613210543e-05, |
| "loss": 0.1555, |
| "step": 1096 |
| }, |
| { |
| "epoch": 2.0865430337612936, |
| "grad_norm": 0.46436119079589844, |
| "learning_rate": 9.306446490949509e-05, |
| "loss": 0.2176, |
| "step": 1097 |
| }, |
| { |
| "epoch": 2.088445078459344, |
| "grad_norm": 0.3176686465740204, |
| "learning_rate": 9.305811368688472e-05, |
| "loss": 0.1763, |
| "step": 1098 |
| }, |
| { |
| "epoch": 2.090347123157394, |
| "grad_norm": 0.29192522168159485, |
| "learning_rate": 9.305176246427438e-05, |
| "loss": 0.1485, |
| "step": 1099 |
| }, |
| { |
| "epoch": 2.0922491678554445, |
| "grad_norm": 0.34905532002449036, |
| "learning_rate": 9.304541124166403e-05, |
| "loss": 0.1657, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.094151212553495, |
| "grad_norm": 0.4198562800884247, |
| "learning_rate": 9.303906001905367e-05, |
| "loss": 0.2077, |
| "step": 1101 |
| }, |
| { |
| "epoch": 2.0960532572515453, |
| "grad_norm": 0.35974305868148804, |
| "learning_rate": 9.303270879644332e-05, |
| "loss": 0.1776, |
| "step": 1102 |
| }, |
| { |
| "epoch": 2.097955301949596, |
| "grad_norm": 0.35371047258377075, |
| "learning_rate": 9.302635757383297e-05, |
| "loss": 0.1887, |
| "step": 1103 |
| }, |
| { |
| "epoch": 2.0998573466476462, |
| "grad_norm": 0.30068957805633545, |
| "learning_rate": 9.302000635122261e-05, |
| "loss": 0.14, |
| "step": 1104 |
| }, |
| { |
| "epoch": 2.1017593913456967, |
| "grad_norm": 0.31092819571495056, |
| "learning_rate": 9.301365512861226e-05, |
| "loss": 0.1603, |
| "step": 1105 |
| }, |
| { |
| "epoch": 2.103661436043747, |
| "grad_norm": 0.3615265190601349, |
| "learning_rate": 9.300730390600191e-05, |
| "loss": 0.1791, |
| "step": 1106 |
| }, |
| { |
| "epoch": 2.1055634807417976, |
| "grad_norm": 0.2767830491065979, |
| "learning_rate": 9.300095268339156e-05, |
| "loss": 0.1243, |
| "step": 1107 |
| }, |
| { |
| "epoch": 2.107465525439848, |
| "grad_norm": 0.36988285183906555, |
| "learning_rate": 9.29946014607812e-05, |
| "loss": 0.1619, |
| "step": 1108 |
| }, |
| { |
| "epoch": 2.1093675701378984, |
| "grad_norm": 0.6014404892921448, |
| "learning_rate": 9.298825023817085e-05, |
| "loss": 0.2635, |
| "step": 1109 |
| }, |
| { |
| "epoch": 2.1112696148359484, |
| "grad_norm": 0.3621249794960022, |
| "learning_rate": 9.29818990155605e-05, |
| "loss": 0.1749, |
| "step": 1110 |
| }, |
| { |
| "epoch": 2.113171659533999, |
| "grad_norm": 0.2977392077445984, |
| "learning_rate": 9.297554779295014e-05, |
| "loss": 0.1582, |
| "step": 1111 |
| }, |
| { |
| "epoch": 2.1150737042320493, |
| "grad_norm": 0.3253994286060333, |
| "learning_rate": 9.29691965703398e-05, |
| "loss": 0.1787, |
| "step": 1112 |
| }, |
| { |
| "epoch": 2.1169757489300998, |
| "grad_norm": 0.34662213921546936, |
| "learning_rate": 9.296284534772945e-05, |
| "loss": 0.1923, |
| "step": 1113 |
| }, |
| { |
| "epoch": 2.11887779362815, |
| "grad_norm": 0.416458398103714, |
| "learning_rate": 9.295649412511909e-05, |
| "loss": 0.1941, |
| "step": 1114 |
| }, |
| { |
| "epoch": 2.1207798383262007, |
| "grad_norm": 0.36649563908576965, |
| "learning_rate": 9.295014290250874e-05, |
| "loss": 0.2233, |
| "step": 1115 |
| }, |
| { |
| "epoch": 2.122681883024251, |
| "grad_norm": 0.3445313274860382, |
| "learning_rate": 9.294379167989839e-05, |
| "loss": 0.1701, |
| "step": 1116 |
| }, |
| { |
| "epoch": 2.1245839277223015, |
| "grad_norm": 0.38747549057006836, |
| "learning_rate": 9.293744045728803e-05, |
| "loss": 0.1707, |
| "step": 1117 |
| }, |
| { |
| "epoch": 2.126485972420352, |
| "grad_norm": 0.4027896225452423, |
| "learning_rate": 9.293108923467768e-05, |
| "loss": 0.2086, |
| "step": 1118 |
| }, |
| { |
| "epoch": 2.1283880171184024, |
| "grad_norm": 0.3629845976829529, |
| "learning_rate": 9.292473801206733e-05, |
| "loss": 0.1743, |
| "step": 1119 |
| }, |
| { |
| "epoch": 2.130290061816453, |
| "grad_norm": 0.39419326186180115, |
| "learning_rate": 9.291838678945698e-05, |
| "loss": 0.1907, |
| "step": 1120 |
| }, |
| { |
| "epoch": 2.132192106514503, |
| "grad_norm": 0.36944523453712463, |
| "learning_rate": 9.291203556684662e-05, |
| "loss": 0.1631, |
| "step": 1121 |
| }, |
| { |
| "epoch": 2.1340941512125533, |
| "grad_norm": 0.4214774966239929, |
| "learning_rate": 9.290568434423626e-05, |
| "loss": 0.2397, |
| "step": 1122 |
| }, |
| { |
| "epoch": 2.1359961959106037, |
| "grad_norm": 0.3092084228992462, |
| "learning_rate": 9.289933312162593e-05, |
| "loss": 0.1396, |
| "step": 1123 |
| }, |
| { |
| "epoch": 2.137898240608654, |
| "grad_norm": 0.3649998605251312, |
| "learning_rate": 9.289298189901556e-05, |
| "loss": 0.1677, |
| "step": 1124 |
| }, |
| { |
| "epoch": 2.1398002853067046, |
| "grad_norm": 0.4131282567977905, |
| "learning_rate": 9.288663067640522e-05, |
| "loss": 0.2049, |
| "step": 1125 |
| }, |
| { |
| "epoch": 2.141702330004755, |
| "grad_norm": 0.4324544668197632, |
| "learning_rate": 9.288027945379485e-05, |
| "loss": 0.1757, |
| "step": 1126 |
| }, |
| { |
| "epoch": 2.1436043747028055, |
| "grad_norm": 0.4258798658847809, |
| "learning_rate": 9.28739282311845e-05, |
| "loss": 0.199, |
| "step": 1127 |
| }, |
| { |
| "epoch": 2.145506419400856, |
| "grad_norm": 0.4244062602519989, |
| "learning_rate": 9.286757700857416e-05, |
| "loss": 0.2006, |
| "step": 1128 |
| }, |
| { |
| "epoch": 2.1474084640989064, |
| "grad_norm": 0.4003104865550995, |
| "learning_rate": 9.28612257859638e-05, |
| "loss": 0.2098, |
| "step": 1129 |
| }, |
| { |
| "epoch": 2.149310508796957, |
| "grad_norm": 0.36191633343696594, |
| "learning_rate": 9.285487456335345e-05, |
| "loss": 0.1821, |
| "step": 1130 |
| }, |
| { |
| "epoch": 2.1512125534950073, |
| "grad_norm": 0.47675448656082153, |
| "learning_rate": 9.28485233407431e-05, |
| "loss": 0.2083, |
| "step": 1131 |
| }, |
| { |
| "epoch": 2.1531145981930577, |
| "grad_norm": 0.4418546259403229, |
| "learning_rate": 9.284217211813274e-05, |
| "loss": 0.2228, |
| "step": 1132 |
| }, |
| { |
| "epoch": 2.155016642891108, |
| "grad_norm": 0.31201982498168945, |
| "learning_rate": 9.283582089552239e-05, |
| "loss": 0.1326, |
| "step": 1133 |
| }, |
| { |
| "epoch": 2.156918687589158, |
| "grad_norm": 0.30012449622154236, |
| "learning_rate": 9.282946967291204e-05, |
| "loss": 0.1376, |
| "step": 1134 |
| }, |
| { |
| "epoch": 2.1588207322872086, |
| "grad_norm": 0.3705848455429077, |
| "learning_rate": 9.282311845030168e-05, |
| "loss": 0.1719, |
| "step": 1135 |
| }, |
| { |
| "epoch": 2.160722776985259, |
| "grad_norm": 0.4028238356113434, |
| "learning_rate": 9.281676722769133e-05, |
| "loss": 0.178, |
| "step": 1136 |
| }, |
| { |
| "epoch": 2.1626248216833095, |
| "grad_norm": 0.38973838090896606, |
| "learning_rate": 9.281041600508098e-05, |
| "loss": 0.1875, |
| "step": 1137 |
| }, |
| { |
| "epoch": 2.16452686638136, |
| "grad_norm": 0.3756285309791565, |
| "learning_rate": 9.280406478247064e-05, |
| "loss": 0.1883, |
| "step": 1138 |
| }, |
| { |
| "epoch": 2.1664289110794104, |
| "grad_norm": 0.2721819579601288, |
| "learning_rate": 9.279771355986027e-05, |
| "loss": 0.1468, |
| "step": 1139 |
| }, |
| { |
| "epoch": 2.168330955777461, |
| "grad_norm": 0.34547916054725647, |
| "learning_rate": 9.279136233724993e-05, |
| "loss": 0.2043, |
| "step": 1140 |
| }, |
| { |
| "epoch": 2.1702330004755113, |
| "grad_norm": 0.44819575548171997, |
| "learning_rate": 9.278501111463958e-05, |
| "loss": 0.2029, |
| "step": 1141 |
| }, |
| { |
| "epoch": 2.1721350451735617, |
| "grad_norm": 0.36632853746414185, |
| "learning_rate": 9.277865989202922e-05, |
| "loss": 0.1884, |
| "step": 1142 |
| }, |
| { |
| "epoch": 2.174037089871612, |
| "grad_norm": 0.37020185589790344, |
| "learning_rate": 9.277230866941887e-05, |
| "loss": 0.1819, |
| "step": 1143 |
| }, |
| { |
| "epoch": 2.1759391345696626, |
| "grad_norm": 0.4174460470676422, |
| "learning_rate": 9.276595744680852e-05, |
| "loss": 0.1918, |
| "step": 1144 |
| }, |
| { |
| "epoch": 2.1778411792677126, |
| "grad_norm": 0.4120714068412781, |
| "learning_rate": 9.275960622419816e-05, |
| "loss": 0.2496, |
| "step": 1145 |
| }, |
| { |
| "epoch": 2.179743223965763, |
| "grad_norm": 0.4350152909755707, |
| "learning_rate": 9.275325500158781e-05, |
| "loss": 0.1981, |
| "step": 1146 |
| }, |
| { |
| "epoch": 2.1816452686638135, |
| "grad_norm": 0.35637348890304565, |
| "learning_rate": 9.274690377897746e-05, |
| "loss": 0.1639, |
| "step": 1147 |
| }, |
| { |
| "epoch": 2.183547313361864, |
| "grad_norm": 0.34323298931121826, |
| "learning_rate": 9.27405525563671e-05, |
| "loss": 0.1761, |
| "step": 1148 |
| }, |
| { |
| "epoch": 2.1854493580599144, |
| "grad_norm": 0.30730780959129333, |
| "learning_rate": 9.273420133375675e-05, |
| "loss": 0.1623, |
| "step": 1149 |
| }, |
| { |
| "epoch": 2.187351402757965, |
| "grad_norm": 0.32239773869514465, |
| "learning_rate": 9.27278501111464e-05, |
| "loss": 0.1238, |
| "step": 1150 |
| }, |
| { |
| "epoch": 2.1892534474560152, |
| "grad_norm": 0.35441848635673523, |
| "learning_rate": 9.272149888853606e-05, |
| "loss": 0.1578, |
| "step": 1151 |
| }, |
| { |
| "epoch": 2.1911554921540657, |
| "grad_norm": 0.33287835121154785, |
| "learning_rate": 9.27151476659257e-05, |
| "loss": 0.1726, |
| "step": 1152 |
| }, |
| { |
| "epoch": 2.193057536852116, |
| "grad_norm": 0.3281983435153961, |
| "learning_rate": 9.270879644331533e-05, |
| "loss": 0.1435, |
| "step": 1153 |
| }, |
| { |
| "epoch": 2.1949595815501666, |
| "grad_norm": 0.31831398606300354, |
| "learning_rate": 9.2702445220705e-05, |
| "loss": 0.1585, |
| "step": 1154 |
| }, |
| { |
| "epoch": 2.196861626248217, |
| "grad_norm": 0.43460169434547424, |
| "learning_rate": 9.269609399809464e-05, |
| "loss": 0.2121, |
| "step": 1155 |
| }, |
| { |
| "epoch": 2.198763670946267, |
| "grad_norm": 0.3470516502857208, |
| "learning_rate": 9.268974277548429e-05, |
| "loss": 0.157, |
| "step": 1156 |
| }, |
| { |
| "epoch": 2.2006657156443175, |
| "grad_norm": 0.3971126079559326, |
| "learning_rate": 9.268339155287394e-05, |
| "loss": 0.1738, |
| "step": 1157 |
| }, |
| { |
| "epoch": 2.202567760342368, |
| "grad_norm": 0.39526277780532837, |
| "learning_rate": 9.267704033026358e-05, |
| "loss": 0.2117, |
| "step": 1158 |
| }, |
| { |
| "epoch": 2.2044698050404183, |
| "grad_norm": 0.31649425625801086, |
| "learning_rate": 9.267068910765323e-05, |
| "loss": 0.1966, |
| "step": 1159 |
| }, |
| { |
| "epoch": 2.206371849738469, |
| "grad_norm": 0.4104944169521332, |
| "learning_rate": 9.266433788504287e-05, |
| "loss": 0.2178, |
| "step": 1160 |
| }, |
| { |
| "epoch": 2.2082738944365192, |
| "grad_norm": 0.3751467168331146, |
| "learning_rate": 9.265798666243253e-05, |
| "loss": 0.1921, |
| "step": 1161 |
| }, |
| { |
| "epoch": 2.2101759391345697, |
| "grad_norm": 0.3348170816898346, |
| "learning_rate": 9.265163543982217e-05, |
| "loss": 0.1533, |
| "step": 1162 |
| }, |
| { |
| "epoch": 2.21207798383262, |
| "grad_norm": 0.39907872676849365, |
| "learning_rate": 9.264528421721181e-05, |
| "loss": 0.1733, |
| "step": 1163 |
| }, |
| { |
| "epoch": 2.2139800285306706, |
| "grad_norm": 0.45442381501197815, |
| "learning_rate": 9.263893299460147e-05, |
| "loss": 0.2065, |
| "step": 1164 |
| }, |
| { |
| "epoch": 2.215882073228721, |
| "grad_norm": 0.37475696206092834, |
| "learning_rate": 9.263258177199111e-05, |
| "loss": 0.1914, |
| "step": 1165 |
| }, |
| { |
| "epoch": 2.2177841179267714, |
| "grad_norm": 0.3757840394973755, |
| "learning_rate": 9.262623054938075e-05, |
| "loss": 0.1781, |
| "step": 1166 |
| }, |
| { |
| "epoch": 2.219686162624822, |
| "grad_norm": 0.3655502200126648, |
| "learning_rate": 9.26198793267704e-05, |
| "loss": 0.1814, |
| "step": 1167 |
| }, |
| { |
| "epoch": 2.2215882073228723, |
| "grad_norm": 0.4219561219215393, |
| "learning_rate": 9.261352810416006e-05, |
| "loss": 0.213, |
| "step": 1168 |
| }, |
| { |
| "epoch": 2.2234902520209223, |
| "grad_norm": 0.3741750419139862, |
| "learning_rate": 9.260717688154971e-05, |
| "loss": 0.1782, |
| "step": 1169 |
| }, |
| { |
| "epoch": 2.2253922967189728, |
| "grad_norm": 0.37189987301826477, |
| "learning_rate": 9.260082565893935e-05, |
| "loss": 0.1783, |
| "step": 1170 |
| }, |
| { |
| "epoch": 2.227294341417023, |
| "grad_norm": 0.2988317608833313, |
| "learning_rate": 9.2594474436329e-05, |
| "loss": 0.1481, |
| "step": 1171 |
| }, |
| { |
| "epoch": 2.2291963861150736, |
| "grad_norm": 0.38000479340553284, |
| "learning_rate": 9.258812321371865e-05, |
| "loss": 0.1843, |
| "step": 1172 |
| }, |
| { |
| "epoch": 2.231098430813124, |
| "grad_norm": 0.30989545583724976, |
| "learning_rate": 9.258177199110829e-05, |
| "loss": 0.1487, |
| "step": 1173 |
| }, |
| { |
| "epoch": 2.2330004755111745, |
| "grad_norm": 0.27984580397605896, |
| "learning_rate": 9.257542076849794e-05, |
| "loss": 0.1445, |
| "step": 1174 |
| }, |
| { |
| "epoch": 2.234902520209225, |
| "grad_norm": 0.3828918933868408, |
| "learning_rate": 9.256906954588759e-05, |
| "loss": 0.1709, |
| "step": 1175 |
| }, |
| { |
| "epoch": 2.2368045649072754, |
| "grad_norm": 0.33677807450294495, |
| "learning_rate": 9.256271832327723e-05, |
| "loss": 0.1656, |
| "step": 1176 |
| }, |
| { |
| "epoch": 2.238706609605326, |
| "grad_norm": 0.37769967317581177, |
| "learning_rate": 9.255636710066688e-05, |
| "loss": 0.2101, |
| "step": 1177 |
| }, |
| { |
| "epoch": 2.2406086543033763, |
| "grad_norm": 0.3978733420372009, |
| "learning_rate": 9.255001587805653e-05, |
| "loss": 0.215, |
| "step": 1178 |
| }, |
| { |
| "epoch": 2.2425106990014267, |
| "grad_norm": 0.3774537146091461, |
| "learning_rate": 9.254366465544618e-05, |
| "loss": 0.1778, |
| "step": 1179 |
| }, |
| { |
| "epoch": 2.2444127436994767, |
| "grad_norm": 0.4117525815963745, |
| "learning_rate": 9.253731343283582e-05, |
| "loss": 0.1801, |
| "step": 1180 |
| }, |
| { |
| "epoch": 2.246314788397527, |
| "grad_norm": 0.41460955142974854, |
| "learning_rate": 9.253096221022547e-05, |
| "loss": 0.1939, |
| "step": 1181 |
| }, |
| { |
| "epoch": 2.2482168330955776, |
| "grad_norm": 0.41124284267425537, |
| "learning_rate": 9.252461098761513e-05, |
| "loss": 0.1944, |
| "step": 1182 |
| }, |
| { |
| "epoch": 2.250118877793628, |
| "grad_norm": 0.39252787828445435, |
| "learning_rate": 9.251825976500476e-05, |
| "loss": 0.2037, |
| "step": 1183 |
| }, |
| { |
| "epoch": 2.2520209224916785, |
| "grad_norm": 0.4118300676345825, |
| "learning_rate": 9.25119085423944e-05, |
| "loss": 0.2067, |
| "step": 1184 |
| }, |
| { |
| "epoch": 2.253922967189729, |
| "grad_norm": 0.43823009729385376, |
| "learning_rate": 9.250555731978407e-05, |
| "loss": 0.2093, |
| "step": 1185 |
| }, |
| { |
| "epoch": 2.2558250118877794, |
| "grad_norm": 0.41397175192832947, |
| "learning_rate": 9.249920609717371e-05, |
| "loss": 0.195, |
| "step": 1186 |
| }, |
| { |
| "epoch": 2.25772705658583, |
| "grad_norm": 0.4286901652812958, |
| "learning_rate": 9.249285487456336e-05, |
| "loss": 0.1777, |
| "step": 1187 |
| }, |
| { |
| "epoch": 2.2596291012838803, |
| "grad_norm": 0.373329758644104, |
| "learning_rate": 9.248650365195301e-05, |
| "loss": 0.1759, |
| "step": 1188 |
| }, |
| { |
| "epoch": 2.2615311459819307, |
| "grad_norm": 0.4786781072616577, |
| "learning_rate": 9.248015242934265e-05, |
| "loss": 0.2509, |
| "step": 1189 |
| }, |
| { |
| "epoch": 2.263433190679981, |
| "grad_norm": 0.41533464193344116, |
| "learning_rate": 9.24738012067323e-05, |
| "loss": 0.1595, |
| "step": 1190 |
| }, |
| { |
| "epoch": 2.265335235378031, |
| "grad_norm": 0.37687090039253235, |
| "learning_rate": 9.246744998412194e-05, |
| "loss": 0.19, |
| "step": 1191 |
| }, |
| { |
| "epoch": 2.2672372800760816, |
| "grad_norm": 0.3623497188091278, |
| "learning_rate": 9.24610987615116e-05, |
| "loss": 0.1723, |
| "step": 1192 |
| }, |
| { |
| "epoch": 2.269139324774132, |
| "grad_norm": 0.378251850605011, |
| "learning_rate": 9.245474753890124e-05, |
| "loss": 0.1773, |
| "step": 1193 |
| }, |
| { |
| "epoch": 2.2710413694721825, |
| "grad_norm": 0.3755147457122803, |
| "learning_rate": 9.244839631629088e-05, |
| "loss": 0.1685, |
| "step": 1194 |
| }, |
| { |
| "epoch": 2.272943414170233, |
| "grad_norm": 0.5196719765663147, |
| "learning_rate": 9.244204509368055e-05, |
| "loss": 0.2665, |
| "step": 1195 |
| }, |
| { |
| "epoch": 2.2748454588682834, |
| "grad_norm": 0.4404764473438263, |
| "learning_rate": 9.243569387107018e-05, |
| "loss": 0.1956, |
| "step": 1196 |
| }, |
| { |
| "epoch": 2.276747503566334, |
| "grad_norm": 0.47750818729400635, |
| "learning_rate": 9.242934264845984e-05, |
| "loss": 0.2164, |
| "step": 1197 |
| }, |
| { |
| "epoch": 2.2786495482643843, |
| "grad_norm": 0.3968189060688019, |
| "learning_rate": 9.242299142584947e-05, |
| "loss": 0.2299, |
| "step": 1198 |
| }, |
| { |
| "epoch": 2.2805515929624347, |
| "grad_norm": 0.4168682396411896, |
| "learning_rate": 9.241664020323913e-05, |
| "loss": 0.1924, |
| "step": 1199 |
| }, |
| { |
| "epoch": 2.282453637660485, |
| "grad_norm": 0.3767165541648865, |
| "learning_rate": 9.241028898062878e-05, |
| "loss": 0.1868, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.2843556823585356, |
| "grad_norm": 0.37699073553085327, |
| "learning_rate": 9.240393775801842e-05, |
| "loss": 0.1968, |
| "step": 1201 |
| }, |
| { |
| "epoch": 2.2862577270565856, |
| "grad_norm": 0.4355759620666504, |
| "learning_rate": 9.239758653540807e-05, |
| "loss": 0.1988, |
| "step": 1202 |
| }, |
| { |
| "epoch": 2.2881597717546365, |
| "grad_norm": 0.42668578028678894, |
| "learning_rate": 9.239123531279772e-05, |
| "loss": 0.1988, |
| "step": 1203 |
| }, |
| { |
| "epoch": 2.2900618164526865, |
| "grad_norm": 0.44233736395835876, |
| "learning_rate": 9.238488409018736e-05, |
| "loss": 0.2128, |
| "step": 1204 |
| }, |
| { |
| "epoch": 2.291963861150737, |
| "grad_norm": 0.31429731845855713, |
| "learning_rate": 9.237853286757701e-05, |
| "loss": 0.1527, |
| "step": 1205 |
| }, |
| { |
| "epoch": 2.2938659058487874, |
| "grad_norm": 0.38366618752479553, |
| "learning_rate": 9.237218164496666e-05, |
| "loss": 0.1747, |
| "step": 1206 |
| }, |
| { |
| "epoch": 2.295767950546838, |
| "grad_norm": 0.3685773015022278, |
| "learning_rate": 9.23658304223563e-05, |
| "loss": 0.183, |
| "step": 1207 |
| }, |
| { |
| "epoch": 2.2976699952448882, |
| "grad_norm": 0.349924772977829, |
| "learning_rate": 9.235947919974595e-05, |
| "loss": 0.1641, |
| "step": 1208 |
| }, |
| { |
| "epoch": 2.2995720399429387, |
| "grad_norm": 0.3128054738044739, |
| "learning_rate": 9.23531279771356e-05, |
| "loss": 0.1682, |
| "step": 1209 |
| }, |
| { |
| "epoch": 2.301474084640989, |
| "grad_norm": 0.4457269608974457, |
| "learning_rate": 9.234677675452526e-05, |
| "loss": 0.1888, |
| "step": 1210 |
| }, |
| { |
| "epoch": 2.3033761293390396, |
| "grad_norm": 0.37438902258872986, |
| "learning_rate": 9.23404255319149e-05, |
| "loss": 0.1612, |
| "step": 1211 |
| }, |
| { |
| "epoch": 2.30527817403709, |
| "grad_norm": 0.3830793499946594, |
| "learning_rate": 9.233407430930455e-05, |
| "loss": 0.1825, |
| "step": 1212 |
| }, |
| { |
| "epoch": 2.3071802187351405, |
| "grad_norm": 0.4047216773033142, |
| "learning_rate": 9.23277230866942e-05, |
| "loss": 0.1874, |
| "step": 1213 |
| }, |
| { |
| "epoch": 2.309082263433191, |
| "grad_norm": 0.400716096162796, |
| "learning_rate": 9.232137186408384e-05, |
| "loss": 0.165, |
| "step": 1214 |
| }, |
| { |
| "epoch": 2.310984308131241, |
| "grad_norm": 0.35491228103637695, |
| "learning_rate": 9.231502064147349e-05, |
| "loss": 0.1428, |
| "step": 1215 |
| }, |
| { |
| "epoch": 2.3128863528292913, |
| "grad_norm": 0.3040875494480133, |
| "learning_rate": 9.230866941886314e-05, |
| "loss": 0.1315, |
| "step": 1216 |
| }, |
| { |
| "epoch": 2.314788397527342, |
| "grad_norm": 0.40058350563049316, |
| "learning_rate": 9.230231819625278e-05, |
| "loss": 0.2016, |
| "step": 1217 |
| }, |
| { |
| "epoch": 2.316690442225392, |
| "grad_norm": 0.33165568113327026, |
| "learning_rate": 9.229596697364243e-05, |
| "loss": 0.1668, |
| "step": 1218 |
| }, |
| { |
| "epoch": 2.3185924869234427, |
| "grad_norm": 0.29281625151634216, |
| "learning_rate": 9.228961575103208e-05, |
| "loss": 0.1577, |
| "step": 1219 |
| }, |
| { |
| "epoch": 2.320494531621493, |
| "grad_norm": 0.4083446264266968, |
| "learning_rate": 9.228326452842172e-05, |
| "loss": 0.174, |
| "step": 1220 |
| }, |
| { |
| "epoch": 2.3223965763195435, |
| "grad_norm": 0.3308553695678711, |
| "learning_rate": 9.227691330581137e-05, |
| "loss": 0.21, |
| "step": 1221 |
| }, |
| { |
| "epoch": 2.324298621017594, |
| "grad_norm": 0.4102175831794739, |
| "learning_rate": 9.227056208320102e-05, |
| "loss": 0.205, |
| "step": 1222 |
| }, |
| { |
| "epoch": 2.3262006657156444, |
| "grad_norm": 0.48705750703811646, |
| "learning_rate": 9.226421086059068e-05, |
| "loss": 0.2544, |
| "step": 1223 |
| }, |
| { |
| "epoch": 2.328102710413695, |
| "grad_norm": 0.3305780291557312, |
| "learning_rate": 9.225785963798031e-05, |
| "loss": 0.1786, |
| "step": 1224 |
| }, |
| { |
| "epoch": 2.3300047551117453, |
| "grad_norm": 0.3046979308128357, |
| "learning_rate": 9.225150841536995e-05, |
| "loss": 0.1325, |
| "step": 1225 |
| }, |
| { |
| "epoch": 2.3319067998097953, |
| "grad_norm": 0.4403087794780731, |
| "learning_rate": 9.224515719275962e-05, |
| "loss": 0.2288, |
| "step": 1226 |
| }, |
| { |
| "epoch": 2.3338088445078458, |
| "grad_norm": 0.3797864317893982, |
| "learning_rate": 9.223880597014926e-05, |
| "loss": 0.2068, |
| "step": 1227 |
| }, |
| { |
| "epoch": 2.335710889205896, |
| "grad_norm": 0.34793582558631897, |
| "learning_rate": 9.223245474753891e-05, |
| "loss": 0.182, |
| "step": 1228 |
| }, |
| { |
| "epoch": 2.3376129339039466, |
| "grad_norm": 0.30754920840263367, |
| "learning_rate": 9.222610352492856e-05, |
| "loss": 0.144, |
| "step": 1229 |
| }, |
| { |
| "epoch": 2.339514978601997, |
| "grad_norm": 0.4364961087703705, |
| "learning_rate": 9.22197523023182e-05, |
| "loss": 0.1824, |
| "step": 1230 |
| }, |
| { |
| "epoch": 2.3414170233000475, |
| "grad_norm": 0.3395443260669708, |
| "learning_rate": 9.221340107970785e-05, |
| "loss": 0.1691, |
| "step": 1231 |
| }, |
| { |
| "epoch": 2.343319067998098, |
| "grad_norm": 0.34626251459121704, |
| "learning_rate": 9.220704985709749e-05, |
| "loss": 0.2285, |
| "step": 1232 |
| }, |
| { |
| "epoch": 2.3452211126961484, |
| "grad_norm": 0.316518098115921, |
| "learning_rate": 9.220069863448715e-05, |
| "loss": 0.1469, |
| "step": 1233 |
| }, |
| { |
| "epoch": 2.347123157394199, |
| "grad_norm": 0.38813212513923645, |
| "learning_rate": 9.219434741187679e-05, |
| "loss": 0.1907, |
| "step": 1234 |
| }, |
| { |
| "epoch": 2.3490252020922493, |
| "grad_norm": 0.3442121744155884, |
| "learning_rate": 9.218799618926643e-05, |
| "loss": 0.1398, |
| "step": 1235 |
| }, |
| { |
| "epoch": 2.3509272467902997, |
| "grad_norm": 0.3373865783214569, |
| "learning_rate": 9.218164496665608e-05, |
| "loss": 0.1477, |
| "step": 1236 |
| }, |
| { |
| "epoch": 2.3528292914883497, |
| "grad_norm": 0.39781641960144043, |
| "learning_rate": 9.217529374404573e-05, |
| "loss": 0.1766, |
| "step": 1237 |
| }, |
| { |
| "epoch": 2.3547313361864006, |
| "grad_norm": 0.25478801131248474, |
| "learning_rate": 9.216894252143537e-05, |
| "loss": 0.1301, |
| "step": 1238 |
| }, |
| { |
| "epoch": 2.3566333808844506, |
| "grad_norm": 0.350087970495224, |
| "learning_rate": 9.216259129882502e-05, |
| "loss": 0.161, |
| "step": 1239 |
| }, |
| { |
| "epoch": 2.358535425582501, |
| "grad_norm": 0.4105963408946991, |
| "learning_rate": 9.215624007621468e-05, |
| "loss": 0.1887, |
| "step": 1240 |
| }, |
| { |
| "epoch": 2.3604374702805515, |
| "grad_norm": 0.4141649007797241, |
| "learning_rate": 9.214988885360433e-05, |
| "loss": 0.333, |
| "step": 1241 |
| }, |
| { |
| "epoch": 2.362339514978602, |
| "grad_norm": 0.4416482448577881, |
| "learning_rate": 9.214353763099397e-05, |
| "loss": 0.2329, |
| "step": 1242 |
| }, |
| { |
| "epoch": 2.3642415596766524, |
| "grad_norm": 0.4285755753517151, |
| "learning_rate": 9.213718640838362e-05, |
| "loss": 0.2194, |
| "step": 1243 |
| }, |
| { |
| "epoch": 2.366143604374703, |
| "grad_norm": 0.33636924624443054, |
| "learning_rate": 9.213083518577327e-05, |
| "loss": 0.1853, |
| "step": 1244 |
| }, |
| { |
| "epoch": 2.3680456490727533, |
| "grad_norm": 0.40267783403396606, |
| "learning_rate": 9.212448396316291e-05, |
| "loss": 0.1837, |
| "step": 1245 |
| }, |
| { |
| "epoch": 2.3699476937708037, |
| "grad_norm": 0.3251781463623047, |
| "learning_rate": 9.211813274055256e-05, |
| "loss": 0.1853, |
| "step": 1246 |
| }, |
| { |
| "epoch": 2.371849738468854, |
| "grad_norm": 0.3559510111808777, |
| "learning_rate": 9.211178151794221e-05, |
| "loss": 0.1735, |
| "step": 1247 |
| }, |
| { |
| "epoch": 2.3737517831669046, |
| "grad_norm": 0.3483911454677582, |
| "learning_rate": 9.210543029533185e-05, |
| "loss": 0.156, |
| "step": 1248 |
| }, |
| { |
| "epoch": 2.375653827864955, |
| "grad_norm": 0.4093637764453888, |
| "learning_rate": 9.20990790727215e-05, |
| "loss": 0.2013, |
| "step": 1249 |
| }, |
| { |
| "epoch": 2.377555872563005, |
| "grad_norm": 0.38886240124702454, |
| "learning_rate": 9.209272785011115e-05, |
| "loss": 0.1723, |
| "step": 1250 |
| }, |
| { |
| "epoch": 2.3794579172610555, |
| "grad_norm": 0.3627004325389862, |
| "learning_rate": 9.20863766275008e-05, |
| "loss": 0.1639, |
| "step": 1251 |
| }, |
| { |
| "epoch": 2.381359961959106, |
| "grad_norm": 0.33721840381622314, |
| "learning_rate": 9.208002540489044e-05, |
| "loss": 0.1613, |
| "step": 1252 |
| }, |
| { |
| "epoch": 2.3832620066571564, |
| "grad_norm": 0.4337291121482849, |
| "learning_rate": 9.20736741822801e-05, |
| "loss": 0.2036, |
| "step": 1253 |
| }, |
| { |
| "epoch": 2.385164051355207, |
| "grad_norm": 0.43212467432022095, |
| "learning_rate": 9.206732295966975e-05, |
| "loss": 0.1925, |
| "step": 1254 |
| }, |
| { |
| "epoch": 2.3870660960532573, |
| "grad_norm": 0.3450334966182709, |
| "learning_rate": 9.206097173705939e-05, |
| "loss": 0.1489, |
| "step": 1255 |
| }, |
| { |
| "epoch": 2.3889681407513077, |
| "grad_norm": 0.36295151710510254, |
| "learning_rate": 9.205462051444902e-05, |
| "loss": 0.1801, |
| "step": 1256 |
| }, |
| { |
| "epoch": 2.390870185449358, |
| "grad_norm": 0.469532310962677, |
| "learning_rate": 9.204826929183869e-05, |
| "loss": 0.2163, |
| "step": 1257 |
| }, |
| { |
| "epoch": 2.3927722301474086, |
| "grad_norm": 0.4618028402328491, |
| "learning_rate": 9.204191806922833e-05, |
| "loss": 0.2175, |
| "step": 1258 |
| }, |
| { |
| "epoch": 2.394674274845459, |
| "grad_norm": 0.3891139030456543, |
| "learning_rate": 9.203556684661798e-05, |
| "loss": 0.1585, |
| "step": 1259 |
| }, |
| { |
| "epoch": 2.3965763195435095, |
| "grad_norm": 0.4574741721153259, |
| "learning_rate": 9.202921562400763e-05, |
| "loss": 0.2545, |
| "step": 1260 |
| }, |
| { |
| "epoch": 2.3984783642415595, |
| "grad_norm": 0.49759337306022644, |
| "learning_rate": 9.202286440139727e-05, |
| "loss": 0.2208, |
| "step": 1261 |
| }, |
| { |
| "epoch": 2.40038040893961, |
| "grad_norm": 0.3180585503578186, |
| "learning_rate": 9.201651317878692e-05, |
| "loss": 0.157, |
| "step": 1262 |
| }, |
| { |
| "epoch": 2.4022824536376604, |
| "grad_norm": 0.3678848147392273, |
| "learning_rate": 9.201016195617656e-05, |
| "loss": 0.1891, |
| "step": 1263 |
| }, |
| { |
| "epoch": 2.404184498335711, |
| "grad_norm": 0.3016449809074402, |
| "learning_rate": 9.200381073356623e-05, |
| "loss": 0.1295, |
| "step": 1264 |
| }, |
| { |
| "epoch": 2.4060865430337612, |
| "grad_norm": 0.522779643535614, |
| "learning_rate": 9.199745951095586e-05, |
| "loss": 0.2814, |
| "step": 1265 |
| }, |
| { |
| "epoch": 2.4079885877318117, |
| "grad_norm": 0.45210519433021545, |
| "learning_rate": 9.19911082883455e-05, |
| "loss": 0.234, |
| "step": 1266 |
| }, |
| { |
| "epoch": 2.409890632429862, |
| "grad_norm": 0.3812367022037506, |
| "learning_rate": 9.198475706573517e-05, |
| "loss": 0.2104, |
| "step": 1267 |
| }, |
| { |
| "epoch": 2.4117926771279126, |
| "grad_norm": 0.3120013177394867, |
| "learning_rate": 9.19784058431248e-05, |
| "loss": 0.1511, |
| "step": 1268 |
| }, |
| { |
| "epoch": 2.413694721825963, |
| "grad_norm": 0.34164851903915405, |
| "learning_rate": 9.197205462051446e-05, |
| "loss": 0.1607, |
| "step": 1269 |
| }, |
| { |
| "epoch": 2.4155967665240135, |
| "grad_norm": 0.3127415180206299, |
| "learning_rate": 9.19657033979041e-05, |
| "loss": 0.143, |
| "step": 1270 |
| }, |
| { |
| "epoch": 2.417498811222064, |
| "grad_norm": 0.4628545641899109, |
| "learning_rate": 9.195935217529375e-05, |
| "loss": 0.2187, |
| "step": 1271 |
| }, |
| { |
| "epoch": 2.419400855920114, |
| "grad_norm": 0.3645714223384857, |
| "learning_rate": 9.19530009526834e-05, |
| "loss": 0.1648, |
| "step": 1272 |
| }, |
| { |
| "epoch": 2.4213029006181643, |
| "grad_norm": 0.41127142310142517, |
| "learning_rate": 9.194664973007304e-05, |
| "loss": 0.1712, |
| "step": 1273 |
| }, |
| { |
| "epoch": 2.4232049453162148, |
| "grad_norm": 0.48663556575775146, |
| "learning_rate": 9.194029850746269e-05, |
| "loss": 0.2713, |
| "step": 1274 |
| }, |
| { |
| "epoch": 2.425106990014265, |
| "grad_norm": 0.3965604305267334, |
| "learning_rate": 9.193394728485234e-05, |
| "loss": 0.1766, |
| "step": 1275 |
| }, |
| { |
| "epoch": 2.4270090347123157, |
| "grad_norm": 0.4565601646900177, |
| "learning_rate": 9.192759606224198e-05, |
| "loss": 0.1827, |
| "step": 1276 |
| }, |
| { |
| "epoch": 2.428911079410366, |
| "grad_norm": 0.4272227883338928, |
| "learning_rate": 9.192124483963163e-05, |
| "loss": 0.1874, |
| "step": 1277 |
| }, |
| { |
| "epoch": 2.4308131241084165, |
| "grad_norm": 0.42560452222824097, |
| "learning_rate": 9.191489361702128e-05, |
| "loss": 0.1829, |
| "step": 1278 |
| }, |
| { |
| "epoch": 2.432715168806467, |
| "grad_norm": 0.30827009677886963, |
| "learning_rate": 9.190854239441092e-05, |
| "loss": 0.1747, |
| "step": 1279 |
| }, |
| { |
| "epoch": 2.4346172135045174, |
| "grad_norm": 0.3780437707901001, |
| "learning_rate": 9.190219117180057e-05, |
| "loss": 0.1955, |
| "step": 1280 |
| }, |
| { |
| "epoch": 2.436519258202568, |
| "grad_norm": 0.32639580965042114, |
| "learning_rate": 9.189583994919023e-05, |
| "loss": 0.1568, |
| "step": 1281 |
| }, |
| { |
| "epoch": 2.4384213029006183, |
| "grad_norm": 0.37228289246559143, |
| "learning_rate": 9.188948872657988e-05, |
| "loss": 0.1871, |
| "step": 1282 |
| }, |
| { |
| "epoch": 2.4403233475986688, |
| "grad_norm": 0.4045466482639313, |
| "learning_rate": 9.188313750396952e-05, |
| "loss": 0.2237, |
| "step": 1283 |
| }, |
| { |
| "epoch": 2.442225392296719, |
| "grad_norm": 0.40609246492385864, |
| "learning_rate": 9.187678628135917e-05, |
| "loss": 0.2313, |
| "step": 1284 |
| }, |
| { |
| "epoch": 2.444127436994769, |
| "grad_norm": 0.36473485827445984, |
| "learning_rate": 9.187043505874882e-05, |
| "loss": 0.2528, |
| "step": 1285 |
| }, |
| { |
| "epoch": 2.4460294816928196, |
| "grad_norm": 0.4154009222984314, |
| "learning_rate": 9.186408383613846e-05, |
| "loss": 0.215, |
| "step": 1286 |
| }, |
| { |
| "epoch": 2.44793152639087, |
| "grad_norm": 0.33488062024116516, |
| "learning_rate": 9.185773261352811e-05, |
| "loss": 0.1666, |
| "step": 1287 |
| }, |
| { |
| "epoch": 2.4498335710889205, |
| "grad_norm": 0.392004132270813, |
| "learning_rate": 9.185138139091776e-05, |
| "loss": 0.2127, |
| "step": 1288 |
| }, |
| { |
| "epoch": 2.451735615786971, |
| "grad_norm": 0.32925739884376526, |
| "learning_rate": 9.18450301683074e-05, |
| "loss": 0.1459, |
| "step": 1289 |
| }, |
| { |
| "epoch": 2.4536376604850214, |
| "grad_norm": 0.3380909264087677, |
| "learning_rate": 9.183867894569705e-05, |
| "loss": 0.1482, |
| "step": 1290 |
| }, |
| { |
| "epoch": 2.455539705183072, |
| "grad_norm": 0.47436705231666565, |
| "learning_rate": 9.18323277230867e-05, |
| "loss": 0.2652, |
| "step": 1291 |
| }, |
| { |
| "epoch": 2.4574417498811223, |
| "grad_norm": 0.39543116092681885, |
| "learning_rate": 9.182597650047634e-05, |
| "loss": 0.1762, |
| "step": 1292 |
| }, |
| { |
| "epoch": 2.4593437945791727, |
| "grad_norm": 0.4776802659034729, |
| "learning_rate": 9.181962527786599e-05, |
| "loss": 0.1967, |
| "step": 1293 |
| }, |
| { |
| "epoch": 2.461245839277223, |
| "grad_norm": 0.37519994378089905, |
| "learning_rate": 9.181327405525563e-05, |
| "loss": 0.1909, |
| "step": 1294 |
| }, |
| { |
| "epoch": 2.4631478839752736, |
| "grad_norm": 0.37666913866996765, |
| "learning_rate": 9.18069228326453e-05, |
| "loss": 0.1477, |
| "step": 1295 |
| }, |
| { |
| "epoch": 2.4650499286733236, |
| "grad_norm": 0.3830261528491974, |
| "learning_rate": 9.180057161003494e-05, |
| "loss": 0.1825, |
| "step": 1296 |
| }, |
| { |
| "epoch": 2.466951973371374, |
| "grad_norm": 0.4064732789993286, |
| "learning_rate": 9.179422038742457e-05, |
| "loss": 0.2, |
| "step": 1297 |
| }, |
| { |
| "epoch": 2.4688540180694245, |
| "grad_norm": 0.318314790725708, |
| "learning_rate": 9.178786916481424e-05, |
| "loss": 0.1543, |
| "step": 1298 |
| }, |
| { |
| "epoch": 2.470756062767475, |
| "grad_norm": 0.3804973065853119, |
| "learning_rate": 9.178151794220388e-05, |
| "loss": 0.2248, |
| "step": 1299 |
| }, |
| { |
| "epoch": 2.4726581074655254, |
| "grad_norm": 0.4222256541252136, |
| "learning_rate": 9.177516671959353e-05, |
| "loss": 0.2037, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.474560152163576, |
| "grad_norm": 0.4317629337310791, |
| "learning_rate": 9.176881549698317e-05, |
| "loss": 0.1914, |
| "step": 1301 |
| }, |
| { |
| "epoch": 2.4764621968616263, |
| "grad_norm": 0.4674796760082245, |
| "learning_rate": 9.176246427437282e-05, |
| "loss": 0.212, |
| "step": 1302 |
| }, |
| { |
| "epoch": 2.4783642415596767, |
| "grad_norm": 0.40157684683799744, |
| "learning_rate": 9.175611305176247e-05, |
| "loss": 0.1948, |
| "step": 1303 |
| }, |
| { |
| "epoch": 2.480266286257727, |
| "grad_norm": 0.37824416160583496, |
| "learning_rate": 9.174976182915211e-05, |
| "loss": 0.1849, |
| "step": 1304 |
| }, |
| { |
| "epoch": 2.4821683309557776, |
| "grad_norm": 0.5870863199234009, |
| "learning_rate": 9.174341060654177e-05, |
| "loss": 0.1586, |
| "step": 1305 |
| }, |
| { |
| "epoch": 2.484070375653828, |
| "grad_norm": 0.3794877529144287, |
| "learning_rate": 9.173705938393141e-05, |
| "loss": 0.2162, |
| "step": 1306 |
| }, |
| { |
| "epoch": 2.485972420351878, |
| "grad_norm": 0.40509578585624695, |
| "learning_rate": 9.173070816132105e-05, |
| "loss": 0.1895, |
| "step": 1307 |
| }, |
| { |
| "epoch": 2.4878744650499285, |
| "grad_norm": 0.37314295768737793, |
| "learning_rate": 9.17243569387107e-05, |
| "loss": 0.1926, |
| "step": 1308 |
| }, |
| { |
| "epoch": 2.489776509747979, |
| "grad_norm": 0.32264095544815063, |
| "learning_rate": 9.171800571610035e-05, |
| "loss": 0.1385, |
| "step": 1309 |
| }, |
| { |
| "epoch": 2.4916785544460294, |
| "grad_norm": 0.43269702792167664, |
| "learning_rate": 9.171165449348999e-05, |
| "loss": 0.2189, |
| "step": 1310 |
| }, |
| { |
| "epoch": 2.49358059914408, |
| "grad_norm": 0.330098956823349, |
| "learning_rate": 9.170530327087964e-05, |
| "loss": 0.168, |
| "step": 1311 |
| }, |
| { |
| "epoch": 2.4954826438421303, |
| "grad_norm": 0.2726501524448395, |
| "learning_rate": 9.16989520482693e-05, |
| "loss": 0.1306, |
| "step": 1312 |
| }, |
| { |
| "epoch": 2.4973846885401807, |
| "grad_norm": 0.27615344524383545, |
| "learning_rate": 9.169260082565895e-05, |
| "loss": 0.1361, |
| "step": 1313 |
| }, |
| { |
| "epoch": 2.499286733238231, |
| "grad_norm": 0.3685866594314575, |
| "learning_rate": 9.168624960304859e-05, |
| "loss": 0.1901, |
| "step": 1314 |
| }, |
| { |
| "epoch": 2.5011887779362816, |
| "grad_norm": 0.323897123336792, |
| "learning_rate": 9.167989838043824e-05, |
| "loss": 0.2608, |
| "step": 1315 |
| }, |
| { |
| "epoch": 2.503090822634332, |
| "grad_norm": 0.6715079545974731, |
| "learning_rate": 9.167354715782789e-05, |
| "loss": 0.199, |
| "step": 1316 |
| }, |
| { |
| "epoch": 2.5049928673323825, |
| "grad_norm": 0.32039186358451843, |
| "learning_rate": 9.166719593521753e-05, |
| "loss": 0.1723, |
| "step": 1317 |
| }, |
| { |
| "epoch": 2.5068949120304325, |
| "grad_norm": 0.3974270224571228, |
| "learning_rate": 9.166084471260718e-05, |
| "loss": 0.1659, |
| "step": 1318 |
| }, |
| { |
| "epoch": 2.5087969567284834, |
| "grad_norm": 0.3953278362751007, |
| "learning_rate": 9.165449348999683e-05, |
| "loss": 0.1879, |
| "step": 1319 |
| }, |
| { |
| "epoch": 2.5106990014265333, |
| "grad_norm": 0.4061002731323242, |
| "learning_rate": 9.164814226738647e-05, |
| "loss": 0.1858, |
| "step": 1320 |
| }, |
| { |
| "epoch": 2.512601046124584, |
| "grad_norm": 0.3816406726837158, |
| "learning_rate": 9.164179104477612e-05, |
| "loss": 0.1899, |
| "step": 1321 |
| }, |
| { |
| "epoch": 2.5145030908226342, |
| "grad_norm": 0.3856441378593445, |
| "learning_rate": 9.163543982216577e-05, |
| "loss": 0.1727, |
| "step": 1322 |
| }, |
| { |
| "epoch": 2.5164051355206847, |
| "grad_norm": 0.47267359495162964, |
| "learning_rate": 9.162908859955543e-05, |
| "loss": 0.2137, |
| "step": 1323 |
| }, |
| { |
| "epoch": 2.518307180218735, |
| "grad_norm": 0.41764524579048157, |
| "learning_rate": 9.162273737694506e-05, |
| "loss": 0.2138, |
| "step": 1324 |
| }, |
| { |
| "epoch": 2.5202092249167856, |
| "grad_norm": 0.42864158749580383, |
| "learning_rate": 9.161638615433472e-05, |
| "loss": 0.1919, |
| "step": 1325 |
| }, |
| { |
| "epoch": 2.522111269614836, |
| "grad_norm": 0.5067504048347473, |
| "learning_rate": 9.161003493172437e-05, |
| "loss": 0.2068, |
| "step": 1326 |
| }, |
| { |
| "epoch": 2.5240133143128864, |
| "grad_norm": 0.430951863527298, |
| "learning_rate": 9.1603683709114e-05, |
| "loss": 0.2195, |
| "step": 1327 |
| }, |
| { |
| "epoch": 2.525915359010937, |
| "grad_norm": 0.37973999977111816, |
| "learning_rate": 9.159733248650364e-05, |
| "loss": 0.1799, |
| "step": 1328 |
| }, |
| { |
| "epoch": 2.527817403708987, |
| "grad_norm": 0.362768292427063, |
| "learning_rate": 9.159098126389331e-05, |
| "loss": 0.1555, |
| "step": 1329 |
| }, |
| { |
| "epoch": 2.5297194484070378, |
| "grad_norm": 0.41433513164520264, |
| "learning_rate": 9.158463004128295e-05, |
| "loss": 0.1958, |
| "step": 1330 |
| }, |
| { |
| "epoch": 2.5316214931050878, |
| "grad_norm": 0.3091717064380646, |
| "learning_rate": 9.15782788186726e-05, |
| "loss": 0.1622, |
| "step": 1331 |
| }, |
| { |
| "epoch": 2.533523537803138, |
| "grad_norm": 0.35242778062820435, |
| "learning_rate": 9.157192759606225e-05, |
| "loss": 0.1627, |
| "step": 1332 |
| }, |
| { |
| "epoch": 2.5354255825011887, |
| "grad_norm": 0.38102760910987854, |
| "learning_rate": 9.156557637345189e-05, |
| "loss": 0.1663, |
| "step": 1333 |
| }, |
| { |
| "epoch": 2.537327627199239, |
| "grad_norm": 0.4313855469226837, |
| "learning_rate": 9.155922515084154e-05, |
| "loss": 0.208, |
| "step": 1334 |
| }, |
| { |
| "epoch": 2.5392296718972895, |
| "grad_norm": 0.33921730518341064, |
| "learning_rate": 9.155287392823118e-05, |
| "loss": 0.1572, |
| "step": 1335 |
| }, |
| { |
| "epoch": 2.54113171659534, |
| "grad_norm": 0.3824930489063263, |
| "learning_rate": 9.154652270562085e-05, |
| "loss": 0.1986, |
| "step": 1336 |
| }, |
| { |
| "epoch": 2.5430337612933904, |
| "grad_norm": 0.33059945702552795, |
| "learning_rate": 9.154017148301048e-05, |
| "loss": 0.156, |
| "step": 1337 |
| }, |
| { |
| "epoch": 2.544935805991441, |
| "grad_norm": 0.4880346357822418, |
| "learning_rate": 9.153382026040012e-05, |
| "loss": 0.2319, |
| "step": 1338 |
| }, |
| { |
| "epoch": 2.5468378506894913, |
| "grad_norm": 0.27151229977607727, |
| "learning_rate": 9.152746903778979e-05, |
| "loss": 0.128, |
| "step": 1339 |
| }, |
| { |
| "epoch": 2.5487398953875418, |
| "grad_norm": 0.35515275597572327, |
| "learning_rate": 9.152111781517943e-05, |
| "loss": 0.1685, |
| "step": 1340 |
| }, |
| { |
| "epoch": 2.550641940085592, |
| "grad_norm": 0.41455206274986267, |
| "learning_rate": 9.151476659256908e-05, |
| "loss": 0.2354, |
| "step": 1341 |
| }, |
| { |
| "epoch": 2.552543984783642, |
| "grad_norm": 0.3215075731277466, |
| "learning_rate": 9.150841536995872e-05, |
| "loss": 0.1653, |
| "step": 1342 |
| }, |
| { |
| "epoch": 2.554446029481693, |
| "grad_norm": 0.34158623218536377, |
| "learning_rate": 9.150206414734837e-05, |
| "loss": 0.1598, |
| "step": 1343 |
| }, |
| { |
| "epoch": 2.556348074179743, |
| "grad_norm": 0.4195705056190491, |
| "learning_rate": 9.149571292473802e-05, |
| "loss": 0.228, |
| "step": 1344 |
| }, |
| { |
| "epoch": 2.5582501188777935, |
| "grad_norm": 0.34753212332725525, |
| "learning_rate": 9.148936170212766e-05, |
| "loss": 0.1948, |
| "step": 1345 |
| }, |
| { |
| "epoch": 2.560152163575844, |
| "grad_norm": 0.43792131543159485, |
| "learning_rate": 9.148301047951731e-05, |
| "loss": 0.2191, |
| "step": 1346 |
| }, |
| { |
| "epoch": 2.5620542082738944, |
| "grad_norm": 0.35464513301849365, |
| "learning_rate": 9.147665925690696e-05, |
| "loss": 0.1555, |
| "step": 1347 |
| }, |
| { |
| "epoch": 2.563956252971945, |
| "grad_norm": 0.50618976354599, |
| "learning_rate": 9.14703080342966e-05, |
| "loss": 0.2262, |
| "step": 1348 |
| }, |
| { |
| "epoch": 2.5658582976699953, |
| "grad_norm": 0.3603616952896118, |
| "learning_rate": 9.146395681168625e-05, |
| "loss": 0.1647, |
| "step": 1349 |
| }, |
| { |
| "epoch": 2.5677603423680457, |
| "grad_norm": 0.486316978931427, |
| "learning_rate": 9.14576055890759e-05, |
| "loss": 0.2052, |
| "step": 1350 |
| }, |
| { |
| "epoch": 2.569662387066096, |
| "grad_norm": 0.45915400981903076, |
| "learning_rate": 9.145125436646554e-05, |
| "loss": 0.218, |
| "step": 1351 |
| }, |
| { |
| "epoch": 2.5715644317641466, |
| "grad_norm": 0.3178432583808899, |
| "learning_rate": 9.14449031438552e-05, |
| "loss": 0.1453, |
| "step": 1352 |
| }, |
| { |
| "epoch": 2.5734664764621966, |
| "grad_norm": 0.3939111828804016, |
| "learning_rate": 9.143855192124485e-05, |
| "loss": 0.1784, |
| "step": 1353 |
| }, |
| { |
| "epoch": 2.5753685211602475, |
| "grad_norm": 0.3399297595024109, |
| "learning_rate": 9.14322006986345e-05, |
| "loss": 0.1644, |
| "step": 1354 |
| }, |
| { |
| "epoch": 2.5772705658582975, |
| "grad_norm": 0.39880868792533875, |
| "learning_rate": 9.142584947602414e-05, |
| "loss": 0.2139, |
| "step": 1355 |
| }, |
| { |
| "epoch": 2.579172610556348, |
| "grad_norm": 0.40534335374832153, |
| "learning_rate": 9.141949825341379e-05, |
| "loss": 0.1872, |
| "step": 1356 |
| }, |
| { |
| "epoch": 2.5810746552543984, |
| "grad_norm": 0.3201380968093872, |
| "learning_rate": 9.141314703080344e-05, |
| "loss": 0.1557, |
| "step": 1357 |
| }, |
| { |
| "epoch": 2.582976699952449, |
| "grad_norm": 0.31011682748794556, |
| "learning_rate": 9.140679580819308e-05, |
| "loss": 0.1301, |
| "step": 1358 |
| }, |
| { |
| "epoch": 2.5848787446504993, |
| "grad_norm": 0.3697820007801056, |
| "learning_rate": 9.140044458558273e-05, |
| "loss": 0.1856, |
| "step": 1359 |
| }, |
| { |
| "epoch": 2.5867807893485497, |
| "grad_norm": 0.291369765996933, |
| "learning_rate": 9.139409336297238e-05, |
| "loss": 0.1323, |
| "step": 1360 |
| }, |
| { |
| "epoch": 2.5886828340466, |
| "grad_norm": 0.4111400842666626, |
| "learning_rate": 9.138774214036202e-05, |
| "loss": 0.2271, |
| "step": 1361 |
| }, |
| { |
| "epoch": 2.5905848787446506, |
| "grad_norm": 0.4169454872608185, |
| "learning_rate": 9.138139091775167e-05, |
| "loss": 0.199, |
| "step": 1362 |
| }, |
| { |
| "epoch": 2.592486923442701, |
| "grad_norm": 0.4209660589694977, |
| "learning_rate": 9.137503969514132e-05, |
| "loss": 0.2296, |
| "step": 1363 |
| }, |
| { |
| "epoch": 2.594388968140751, |
| "grad_norm": 0.3968026041984558, |
| "learning_rate": 9.136868847253096e-05, |
| "loss": 0.2174, |
| "step": 1364 |
| }, |
| { |
| "epoch": 2.596291012838802, |
| "grad_norm": 0.3477707803249359, |
| "learning_rate": 9.136233724992061e-05, |
| "loss": 0.1818, |
| "step": 1365 |
| }, |
| { |
| "epoch": 2.598193057536852, |
| "grad_norm": 0.3979746699333191, |
| "learning_rate": 9.135598602731025e-05, |
| "loss": 0.2373, |
| "step": 1366 |
| }, |
| { |
| "epoch": 2.6000951022349024, |
| "grad_norm": 0.32050615549087524, |
| "learning_rate": 9.134963480469992e-05, |
| "loss": 0.1562, |
| "step": 1367 |
| }, |
| { |
| "epoch": 2.601997146932953, |
| "grad_norm": 0.4675930142402649, |
| "learning_rate": 9.134328358208956e-05, |
| "loss": 0.2942, |
| "step": 1368 |
| }, |
| { |
| "epoch": 2.6038991916310033, |
| "grad_norm": 0.32259052991867065, |
| "learning_rate": 9.13369323594792e-05, |
| "loss": 0.1411, |
| "step": 1369 |
| }, |
| { |
| "epoch": 2.6058012363290537, |
| "grad_norm": 0.3838285803794861, |
| "learning_rate": 9.133058113686886e-05, |
| "loss": 0.2098, |
| "step": 1370 |
| }, |
| { |
| "epoch": 2.607703281027104, |
| "grad_norm": 0.4749825596809387, |
| "learning_rate": 9.13242299142585e-05, |
| "loss": 0.2621, |
| "step": 1371 |
| }, |
| { |
| "epoch": 2.6096053257251546, |
| "grad_norm": 0.3093271255493164, |
| "learning_rate": 9.131787869164815e-05, |
| "loss": 0.1389, |
| "step": 1372 |
| }, |
| { |
| "epoch": 2.611507370423205, |
| "grad_norm": 0.4896688461303711, |
| "learning_rate": 9.131152746903779e-05, |
| "loss": 0.2347, |
| "step": 1373 |
| }, |
| { |
| "epoch": 2.6134094151212555, |
| "grad_norm": 0.39409998059272766, |
| "learning_rate": 9.130517624642744e-05, |
| "loss": 0.2224, |
| "step": 1374 |
| }, |
| { |
| "epoch": 2.615311459819306, |
| "grad_norm": 0.39578184485435486, |
| "learning_rate": 9.129882502381709e-05, |
| "loss": 0.1963, |
| "step": 1375 |
| }, |
| { |
| "epoch": 2.6172135045173563, |
| "grad_norm": 0.34999507665634155, |
| "learning_rate": 9.129247380120673e-05, |
| "loss": 0.1612, |
| "step": 1376 |
| }, |
| { |
| "epoch": 2.6191155492154063, |
| "grad_norm": 0.33919695019721985, |
| "learning_rate": 9.12861225785964e-05, |
| "loss": 0.1813, |
| "step": 1377 |
| }, |
| { |
| "epoch": 2.6210175939134572, |
| "grad_norm": 0.3273175060749054, |
| "learning_rate": 9.127977135598603e-05, |
| "loss": 0.1436, |
| "step": 1378 |
| }, |
| { |
| "epoch": 2.6229196386115072, |
| "grad_norm": 0.4175270199775696, |
| "learning_rate": 9.127342013337567e-05, |
| "loss": 0.1832, |
| "step": 1379 |
| }, |
| { |
| "epoch": 2.6248216833095577, |
| "grad_norm": 0.3580436408519745, |
| "learning_rate": 9.126706891076532e-05, |
| "loss": 0.1569, |
| "step": 1380 |
| }, |
| { |
| "epoch": 2.626723728007608, |
| "grad_norm": 0.3683449625968933, |
| "learning_rate": 9.126071768815498e-05, |
| "loss": 0.1955, |
| "step": 1381 |
| }, |
| { |
| "epoch": 2.6286257727056586, |
| "grad_norm": 0.3830251395702362, |
| "learning_rate": 9.125436646554461e-05, |
| "loss": 0.1626, |
| "step": 1382 |
| }, |
| { |
| "epoch": 2.630527817403709, |
| "grad_norm": 0.3428569734096527, |
| "learning_rate": 9.124801524293427e-05, |
| "loss": 0.1477, |
| "step": 1383 |
| }, |
| { |
| "epoch": 2.6324298621017594, |
| "grad_norm": 0.4621574878692627, |
| "learning_rate": 9.124166402032392e-05, |
| "loss": 0.1675, |
| "step": 1384 |
| }, |
| { |
| "epoch": 2.63433190679981, |
| "grad_norm": 0.40000998973846436, |
| "learning_rate": 9.123531279771357e-05, |
| "loss": 0.1751, |
| "step": 1385 |
| }, |
| { |
| "epoch": 2.6362339514978603, |
| "grad_norm": 0.4612349271774292, |
| "learning_rate": 9.122896157510321e-05, |
| "loss": 0.2165, |
| "step": 1386 |
| }, |
| { |
| "epoch": 2.6381359961959108, |
| "grad_norm": 0.47919005155563354, |
| "learning_rate": 9.122261035249286e-05, |
| "loss": 0.2, |
| "step": 1387 |
| }, |
| { |
| "epoch": 2.6400380408939608, |
| "grad_norm": 0.5020009875297546, |
| "learning_rate": 9.121625912988251e-05, |
| "loss": 0.1997, |
| "step": 1388 |
| }, |
| { |
| "epoch": 2.6419400855920117, |
| "grad_norm": 0.4959258437156677, |
| "learning_rate": 9.120990790727215e-05, |
| "loss": 0.1903, |
| "step": 1389 |
| }, |
| { |
| "epoch": 2.6438421302900617, |
| "grad_norm": 0.4882603585720062, |
| "learning_rate": 9.12035566846618e-05, |
| "loss": 0.2082, |
| "step": 1390 |
| }, |
| { |
| "epoch": 2.645744174988112, |
| "grad_norm": 0.37479934096336365, |
| "learning_rate": 9.119720546205145e-05, |
| "loss": 0.179, |
| "step": 1391 |
| }, |
| { |
| "epoch": 2.6476462196861625, |
| "grad_norm": 0.5104106068611145, |
| "learning_rate": 9.119085423944109e-05, |
| "loss": 0.2281, |
| "step": 1392 |
| }, |
| { |
| "epoch": 2.649548264384213, |
| "grad_norm": 0.3893817663192749, |
| "learning_rate": 9.118450301683074e-05, |
| "loss": 0.2324, |
| "step": 1393 |
| }, |
| { |
| "epoch": 2.6514503090822634, |
| "grad_norm": 0.35762450098991394, |
| "learning_rate": 9.11781517942204e-05, |
| "loss": 0.1933, |
| "step": 1394 |
| }, |
| { |
| "epoch": 2.653352353780314, |
| "grad_norm": 0.37635737657546997, |
| "learning_rate": 9.117180057161005e-05, |
| "loss": 0.1869, |
| "step": 1395 |
| }, |
| { |
| "epoch": 2.6552543984783643, |
| "grad_norm": 0.3230188488960266, |
| "learning_rate": 9.116544934899969e-05, |
| "loss": 0.1576, |
| "step": 1396 |
| }, |
| { |
| "epoch": 2.6571564431764148, |
| "grad_norm": 0.3708724081516266, |
| "learning_rate": 9.115909812638934e-05, |
| "loss": 0.168, |
| "step": 1397 |
| }, |
| { |
| "epoch": 2.659058487874465, |
| "grad_norm": 0.34403741359710693, |
| "learning_rate": 9.115274690377899e-05, |
| "loss": 0.2721, |
| "step": 1398 |
| }, |
| { |
| "epoch": 2.660960532572515, |
| "grad_norm": 0.2812383770942688, |
| "learning_rate": 9.114639568116863e-05, |
| "loss": 0.1605, |
| "step": 1399 |
| }, |
| { |
| "epoch": 2.662862577270566, |
| "grad_norm": 0.39116060733795166, |
| "learning_rate": 9.114004445855827e-05, |
| "loss": 0.1843, |
| "step": 1400 |
| }, |
| { |
| "epoch": 2.664764621968616, |
| "grad_norm": 0.3641309440135956, |
| "learning_rate": 9.113369323594793e-05, |
| "loss": 0.1818, |
| "step": 1401 |
| }, |
| { |
| "epoch": 2.6666666666666665, |
| "grad_norm": 0.4198780953884125, |
| "learning_rate": 9.112734201333757e-05, |
| "loss": 0.2044, |
| "step": 1402 |
| }, |
| { |
| "epoch": 2.668568711364717, |
| "grad_norm": 0.3912922739982605, |
| "learning_rate": 9.112099079072722e-05, |
| "loss": 0.1881, |
| "step": 1403 |
| }, |
| { |
| "epoch": 2.6704707560627674, |
| "grad_norm": 0.4235263764858246, |
| "learning_rate": 9.111463956811686e-05, |
| "loss": 0.2034, |
| "step": 1404 |
| }, |
| { |
| "epoch": 2.672372800760818, |
| "grad_norm": 0.3731124699115753, |
| "learning_rate": 9.110828834550651e-05, |
| "loss": 0.1803, |
| "step": 1405 |
| }, |
| { |
| "epoch": 2.6742748454588683, |
| "grad_norm": 0.3907954692840576, |
| "learning_rate": 9.110193712289616e-05, |
| "loss": 0.2074, |
| "step": 1406 |
| }, |
| { |
| "epoch": 2.6761768901569187, |
| "grad_norm": 0.3954913914203644, |
| "learning_rate": 9.10955859002858e-05, |
| "loss": 0.1797, |
| "step": 1407 |
| }, |
| { |
| "epoch": 2.678078934854969, |
| "grad_norm": 0.5066515207290649, |
| "learning_rate": 9.108923467767547e-05, |
| "loss": 0.2096, |
| "step": 1408 |
| }, |
| { |
| "epoch": 2.6799809795530196, |
| "grad_norm": 0.4380313456058502, |
| "learning_rate": 9.10828834550651e-05, |
| "loss": 0.2064, |
| "step": 1409 |
| }, |
| { |
| "epoch": 2.68188302425107, |
| "grad_norm": 0.3758976459503174, |
| "learning_rate": 9.107653223245474e-05, |
| "loss": 0.2076, |
| "step": 1410 |
| }, |
| { |
| "epoch": 2.6837850689491205, |
| "grad_norm": 0.38098809123039246, |
| "learning_rate": 9.10701810098444e-05, |
| "loss": 0.1727, |
| "step": 1411 |
| }, |
| { |
| "epoch": 2.6856871136471705, |
| "grad_norm": 0.4027041792869568, |
| "learning_rate": 9.106382978723405e-05, |
| "loss": 0.154, |
| "step": 1412 |
| }, |
| { |
| "epoch": 2.6875891583452214, |
| "grad_norm": 0.307954877614975, |
| "learning_rate": 9.10574785646237e-05, |
| "loss": 0.1766, |
| "step": 1413 |
| }, |
| { |
| "epoch": 2.6894912030432714, |
| "grad_norm": 0.4232465326786041, |
| "learning_rate": 9.105112734201334e-05, |
| "loss": 0.1866, |
| "step": 1414 |
| }, |
| { |
| "epoch": 2.691393247741322, |
| "grad_norm": 0.4296838641166687, |
| "learning_rate": 9.104477611940299e-05, |
| "loss": 0.1813, |
| "step": 1415 |
| }, |
| { |
| "epoch": 2.6932952924393723, |
| "grad_norm": 0.3334490954875946, |
| "learning_rate": 9.103842489679264e-05, |
| "loss": 0.1576, |
| "step": 1416 |
| }, |
| { |
| "epoch": 2.6951973371374227, |
| "grad_norm": 0.42984020709991455, |
| "learning_rate": 9.103207367418228e-05, |
| "loss": 0.1945, |
| "step": 1417 |
| }, |
| { |
| "epoch": 2.697099381835473, |
| "grad_norm": 0.4306494891643524, |
| "learning_rate": 9.102572245157193e-05, |
| "loss": 0.179, |
| "step": 1418 |
| }, |
| { |
| "epoch": 2.6990014265335236, |
| "grad_norm": 0.38049131631851196, |
| "learning_rate": 9.101937122896158e-05, |
| "loss": 0.1951, |
| "step": 1419 |
| }, |
| { |
| "epoch": 2.700903471231574, |
| "grad_norm": 0.3691817820072174, |
| "learning_rate": 9.101302000635122e-05, |
| "loss": 0.1725, |
| "step": 1420 |
| }, |
| { |
| "epoch": 2.7028055159296245, |
| "grad_norm": 0.32240816950798035, |
| "learning_rate": 9.100666878374087e-05, |
| "loss": 0.1852, |
| "step": 1421 |
| }, |
| { |
| "epoch": 2.704707560627675, |
| "grad_norm": 0.3735920488834381, |
| "learning_rate": 9.100031756113053e-05, |
| "loss": 0.1857, |
| "step": 1422 |
| }, |
| { |
| "epoch": 2.706609605325725, |
| "grad_norm": 0.3693629801273346, |
| "learning_rate": 9.099396633852016e-05, |
| "loss": 0.1698, |
| "step": 1423 |
| }, |
| { |
| "epoch": 2.708511650023776, |
| "grad_norm": 0.40846189856529236, |
| "learning_rate": 9.098761511590982e-05, |
| "loss": 0.2531, |
| "step": 1424 |
| }, |
| { |
| "epoch": 2.710413694721826, |
| "grad_norm": 0.3387136161327362, |
| "learning_rate": 9.098126389329947e-05, |
| "loss": 0.152, |
| "step": 1425 |
| }, |
| { |
| "epoch": 2.7123157394198762, |
| "grad_norm": 0.43113890290260315, |
| "learning_rate": 9.097491267068912e-05, |
| "loss": 0.1939, |
| "step": 1426 |
| }, |
| { |
| "epoch": 2.7142177841179267, |
| "grad_norm": 0.45811060070991516, |
| "learning_rate": 9.096856144807876e-05, |
| "loss": 0.2217, |
| "step": 1427 |
| }, |
| { |
| "epoch": 2.716119828815977, |
| "grad_norm": 0.3742765486240387, |
| "learning_rate": 9.096221022546841e-05, |
| "loss": 0.183, |
| "step": 1428 |
| }, |
| { |
| "epoch": 2.7180218735140276, |
| "grad_norm": 0.39835286140441895, |
| "learning_rate": 9.095585900285806e-05, |
| "loss": 0.248, |
| "step": 1429 |
| }, |
| { |
| "epoch": 2.719923918212078, |
| "grad_norm": 0.38528379797935486, |
| "learning_rate": 9.09495077802477e-05, |
| "loss": 0.1642, |
| "step": 1430 |
| }, |
| { |
| "epoch": 2.7218259629101285, |
| "grad_norm": 0.4142857789993286, |
| "learning_rate": 9.094315655763735e-05, |
| "loss": 0.1817, |
| "step": 1431 |
| }, |
| { |
| "epoch": 2.723728007608179, |
| "grad_norm": 0.4072723388671875, |
| "learning_rate": 9.0936805335027e-05, |
| "loss": 0.2017, |
| "step": 1432 |
| }, |
| { |
| "epoch": 2.7256300523062293, |
| "grad_norm": 0.37081822752952576, |
| "learning_rate": 9.093045411241664e-05, |
| "loss": 0.2262, |
| "step": 1433 |
| }, |
| { |
| "epoch": 2.7275320970042793, |
| "grad_norm": 0.3628768026828766, |
| "learning_rate": 9.092410288980629e-05, |
| "loss": 0.1714, |
| "step": 1434 |
| }, |
| { |
| "epoch": 2.7294341417023302, |
| "grad_norm": 0.46637046337127686, |
| "learning_rate": 9.091775166719594e-05, |
| "loss": 0.3189, |
| "step": 1435 |
| }, |
| { |
| "epoch": 2.7313361864003802, |
| "grad_norm": 0.2643025517463684, |
| "learning_rate": 9.091140044458558e-05, |
| "loss": 0.234, |
| "step": 1436 |
| }, |
| { |
| "epoch": 2.7332382310984307, |
| "grad_norm": 0.36125344038009644, |
| "learning_rate": 9.090504922197523e-05, |
| "loss": 0.1981, |
| "step": 1437 |
| }, |
| { |
| "epoch": 2.735140275796481, |
| "grad_norm": 0.3064311742782593, |
| "learning_rate": 9.089869799936487e-05, |
| "loss": 0.1644, |
| "step": 1438 |
| }, |
| { |
| "epoch": 2.7370423204945316, |
| "grad_norm": 0.372164249420166, |
| "learning_rate": 9.089234677675454e-05, |
| "loss": 0.2023, |
| "step": 1439 |
| }, |
| { |
| "epoch": 2.738944365192582, |
| "grad_norm": 0.346431165933609, |
| "learning_rate": 9.088599555414418e-05, |
| "loss": 0.1913, |
| "step": 1440 |
| }, |
| { |
| "epoch": 2.7408464098906324, |
| "grad_norm": 0.3421470522880554, |
| "learning_rate": 9.087964433153382e-05, |
| "loss": 0.1599, |
| "step": 1441 |
| }, |
| { |
| "epoch": 2.742748454588683, |
| "grad_norm": 0.33351296186447144, |
| "learning_rate": 9.087329310892348e-05, |
| "loss": 0.1775, |
| "step": 1442 |
| }, |
| { |
| "epoch": 2.7446504992867333, |
| "grad_norm": 0.3450356721878052, |
| "learning_rate": 9.086694188631312e-05, |
| "loss": 0.199, |
| "step": 1443 |
| }, |
| { |
| "epoch": 2.7465525439847838, |
| "grad_norm": 0.34339770674705505, |
| "learning_rate": 9.086059066370277e-05, |
| "loss": 0.1608, |
| "step": 1444 |
| }, |
| { |
| "epoch": 2.748454588682834, |
| "grad_norm": 0.35941675305366516, |
| "learning_rate": 9.085423944109241e-05, |
| "loss": 0.1566, |
| "step": 1445 |
| }, |
| { |
| "epoch": 2.7503566333808847, |
| "grad_norm": 0.396847665309906, |
| "learning_rate": 9.084788821848206e-05, |
| "loss": 0.1829, |
| "step": 1446 |
| }, |
| { |
| "epoch": 2.7522586780789347, |
| "grad_norm": 0.3818894028663635, |
| "learning_rate": 9.084153699587171e-05, |
| "loss": 0.2017, |
| "step": 1447 |
| }, |
| { |
| "epoch": 2.754160722776985, |
| "grad_norm": 0.46124422550201416, |
| "learning_rate": 9.083518577326135e-05, |
| "loss": 0.185, |
| "step": 1448 |
| }, |
| { |
| "epoch": 2.7560627674750355, |
| "grad_norm": 0.4047834575176239, |
| "learning_rate": 9.082883455065102e-05, |
| "loss": 0.1848, |
| "step": 1449 |
| }, |
| { |
| "epoch": 2.757964812173086, |
| "grad_norm": 0.5650888085365295, |
| "learning_rate": 9.082248332804065e-05, |
| "loss": 0.2274, |
| "step": 1450 |
| }, |
| { |
| "epoch": 2.7598668568711364, |
| "grad_norm": 0.35878250002861023, |
| "learning_rate": 9.081613210543029e-05, |
| "loss": 0.1414, |
| "step": 1451 |
| }, |
| { |
| "epoch": 2.761768901569187, |
| "grad_norm": 0.37223199009895325, |
| "learning_rate": 9.080978088281994e-05, |
| "loss": 0.1718, |
| "step": 1452 |
| }, |
| { |
| "epoch": 2.7636709462672373, |
| "grad_norm": 0.34717050194740295, |
| "learning_rate": 9.08034296602096e-05, |
| "loss": 0.1719, |
| "step": 1453 |
| }, |
| { |
| "epoch": 2.7655729909652877, |
| "grad_norm": 0.4706629812717438, |
| "learning_rate": 9.079707843759923e-05, |
| "loss": 0.1953, |
| "step": 1454 |
| }, |
| { |
| "epoch": 2.767475035663338, |
| "grad_norm": 0.40658390522003174, |
| "learning_rate": 9.079072721498889e-05, |
| "loss": 0.1723, |
| "step": 1455 |
| }, |
| { |
| "epoch": 2.7693770803613886, |
| "grad_norm": 0.5025349855422974, |
| "learning_rate": 9.078437599237854e-05, |
| "loss": 0.2122, |
| "step": 1456 |
| }, |
| { |
| "epoch": 2.771279125059439, |
| "grad_norm": 0.4134734272956848, |
| "learning_rate": 9.077802476976819e-05, |
| "loss": 0.1872, |
| "step": 1457 |
| }, |
| { |
| "epoch": 2.773181169757489, |
| "grad_norm": 0.4301147162914276, |
| "learning_rate": 9.077167354715783e-05, |
| "loss": 0.2102, |
| "step": 1458 |
| }, |
| { |
| "epoch": 2.77508321445554, |
| "grad_norm": 0.4295254051685333, |
| "learning_rate": 9.076532232454748e-05, |
| "loss": 0.2132, |
| "step": 1459 |
| }, |
| { |
| "epoch": 2.77698525915359, |
| "grad_norm": 0.40130358934402466, |
| "learning_rate": 9.075897110193713e-05, |
| "loss": 0.1891, |
| "step": 1460 |
| }, |
| { |
| "epoch": 2.7788873038516404, |
| "grad_norm": 0.4124513268470764, |
| "learning_rate": 9.075261987932677e-05, |
| "loss": 0.204, |
| "step": 1461 |
| }, |
| { |
| "epoch": 2.780789348549691, |
| "grad_norm": 0.3976169526576996, |
| "learning_rate": 9.074626865671642e-05, |
| "loss": 0.2016, |
| "step": 1462 |
| }, |
| { |
| "epoch": 2.7826913932477413, |
| "grad_norm": 0.3949052095413208, |
| "learning_rate": 9.073991743410607e-05, |
| "loss": 0.1924, |
| "step": 1463 |
| }, |
| { |
| "epoch": 2.7845934379457917, |
| "grad_norm": 0.4033919870853424, |
| "learning_rate": 9.073356621149571e-05, |
| "loss": 0.1967, |
| "step": 1464 |
| }, |
| { |
| "epoch": 2.786495482643842, |
| "grad_norm": 0.32922443747520447, |
| "learning_rate": 9.072721498888536e-05, |
| "loss": 0.1639, |
| "step": 1465 |
| }, |
| { |
| "epoch": 2.7883975273418926, |
| "grad_norm": 0.372179239988327, |
| "learning_rate": 9.072086376627502e-05, |
| "loss": 0.1783, |
| "step": 1466 |
| }, |
| { |
| "epoch": 2.790299572039943, |
| "grad_norm": 0.45123547315597534, |
| "learning_rate": 9.071451254366467e-05, |
| "loss": 0.2573, |
| "step": 1467 |
| }, |
| { |
| "epoch": 2.7922016167379935, |
| "grad_norm": 0.33130937814712524, |
| "learning_rate": 9.07081613210543e-05, |
| "loss": 0.1427, |
| "step": 1468 |
| }, |
| { |
| "epoch": 2.7941036614360435, |
| "grad_norm": 0.4377565085887909, |
| "learning_rate": 9.070181009844394e-05, |
| "loss": 0.1915, |
| "step": 1469 |
| }, |
| { |
| "epoch": 2.7960057061340944, |
| "grad_norm": 0.555698037147522, |
| "learning_rate": 9.069545887583361e-05, |
| "loss": 0.2495, |
| "step": 1470 |
| }, |
| { |
| "epoch": 2.7979077508321444, |
| "grad_norm": 0.4749322831630707, |
| "learning_rate": 9.068910765322325e-05, |
| "loss": 0.1944, |
| "step": 1471 |
| }, |
| { |
| "epoch": 2.799809795530195, |
| "grad_norm": 0.3543435335159302, |
| "learning_rate": 9.068275643061289e-05, |
| "loss": 0.1669, |
| "step": 1472 |
| }, |
| { |
| "epoch": 2.8017118402282453, |
| "grad_norm": 0.33086055517196655, |
| "learning_rate": 9.067640520800255e-05, |
| "loss": 0.1792, |
| "step": 1473 |
| }, |
| { |
| "epoch": 2.8036138849262957, |
| "grad_norm": 0.3898443281650543, |
| "learning_rate": 9.067005398539219e-05, |
| "loss": 0.171, |
| "step": 1474 |
| }, |
| { |
| "epoch": 2.805515929624346, |
| "grad_norm": 0.4169894754886627, |
| "learning_rate": 9.066370276278184e-05, |
| "loss": 0.2057, |
| "step": 1475 |
| }, |
| { |
| "epoch": 2.8074179743223966, |
| "grad_norm": 0.37259283661842346, |
| "learning_rate": 9.065735154017148e-05, |
| "loss": 0.1799, |
| "step": 1476 |
| }, |
| { |
| "epoch": 2.809320019020447, |
| "grad_norm": 0.3892917037010193, |
| "learning_rate": 9.065100031756113e-05, |
| "loss": 0.1847, |
| "step": 1477 |
| }, |
| { |
| "epoch": 2.8112220637184975, |
| "grad_norm": 0.5309971570968628, |
| "learning_rate": 9.064464909495078e-05, |
| "loss": 0.2462, |
| "step": 1478 |
| }, |
| { |
| "epoch": 2.813124108416548, |
| "grad_norm": 0.3646765351295471, |
| "learning_rate": 9.063829787234042e-05, |
| "loss": 0.168, |
| "step": 1479 |
| }, |
| { |
| "epoch": 2.815026153114598, |
| "grad_norm": 0.3424735963344574, |
| "learning_rate": 9.063194664973009e-05, |
| "loss": 0.1547, |
| "step": 1480 |
| }, |
| { |
| "epoch": 2.816928197812649, |
| "grad_norm": 0.38415202498435974, |
| "learning_rate": 9.062559542711973e-05, |
| "loss": 0.2186, |
| "step": 1481 |
| }, |
| { |
| "epoch": 2.818830242510699, |
| "grad_norm": 0.4032725393772125, |
| "learning_rate": 9.061924420450936e-05, |
| "loss": 0.1802, |
| "step": 1482 |
| }, |
| { |
| "epoch": 2.8207322872087492, |
| "grad_norm": 0.35286685824394226, |
| "learning_rate": 9.061289298189902e-05, |
| "loss": 0.139, |
| "step": 1483 |
| }, |
| { |
| "epoch": 2.8226343319067997, |
| "grad_norm": 0.35866954922676086, |
| "learning_rate": 9.060654175928867e-05, |
| "loss": 0.2022, |
| "step": 1484 |
| }, |
| { |
| "epoch": 2.82453637660485, |
| "grad_norm": 0.36488500237464905, |
| "learning_rate": 9.060019053667832e-05, |
| "loss": 0.1816, |
| "step": 1485 |
| }, |
| { |
| "epoch": 2.8264384213029006, |
| "grad_norm": 0.4557202160358429, |
| "learning_rate": 9.059383931406796e-05, |
| "loss": 0.1975, |
| "step": 1486 |
| }, |
| { |
| "epoch": 2.828340466000951, |
| "grad_norm": 0.32717350125312805, |
| "learning_rate": 9.058748809145761e-05, |
| "loss": 0.1639, |
| "step": 1487 |
| }, |
| { |
| "epoch": 2.8302425106990015, |
| "grad_norm": 0.41179734468460083, |
| "learning_rate": 9.058113686884726e-05, |
| "loss": 0.1841, |
| "step": 1488 |
| }, |
| { |
| "epoch": 2.832144555397052, |
| "grad_norm": 0.3747973144054413, |
| "learning_rate": 9.05747856462369e-05, |
| "loss": 0.1678, |
| "step": 1489 |
| }, |
| { |
| "epoch": 2.8340466000951023, |
| "grad_norm": 0.41899365186691284, |
| "learning_rate": 9.056843442362655e-05, |
| "loss": 0.2753, |
| "step": 1490 |
| }, |
| { |
| "epoch": 2.835948644793153, |
| "grad_norm": 0.397416889667511, |
| "learning_rate": 9.05620832010162e-05, |
| "loss": 0.1658, |
| "step": 1491 |
| }, |
| { |
| "epoch": 2.8378506894912032, |
| "grad_norm": 0.3874271810054779, |
| "learning_rate": 9.055573197840584e-05, |
| "loss": 0.1808, |
| "step": 1492 |
| }, |
| { |
| "epoch": 2.8397527341892532, |
| "grad_norm": 0.3698302209377289, |
| "learning_rate": 9.05493807557955e-05, |
| "loss": 0.1869, |
| "step": 1493 |
| }, |
| { |
| "epoch": 2.841654778887304, |
| "grad_norm": 0.3908369541168213, |
| "learning_rate": 9.054302953318515e-05, |
| "loss": 0.1866, |
| "step": 1494 |
| }, |
| { |
| "epoch": 2.843556823585354, |
| "grad_norm": 0.5696883201599121, |
| "learning_rate": 9.053667831057478e-05, |
| "loss": 0.2083, |
| "step": 1495 |
| }, |
| { |
| "epoch": 2.8454588682834046, |
| "grad_norm": 0.3560580611228943, |
| "learning_rate": 9.053032708796444e-05, |
| "loss": 0.1829, |
| "step": 1496 |
| }, |
| { |
| "epoch": 2.847360912981455, |
| "grad_norm": 0.4369358718395233, |
| "learning_rate": 9.052397586535409e-05, |
| "loss": 0.2302, |
| "step": 1497 |
| }, |
| { |
| "epoch": 2.8492629576795054, |
| "grad_norm": 0.4240768551826477, |
| "learning_rate": 9.051762464274374e-05, |
| "loss": 0.2204, |
| "step": 1498 |
| }, |
| { |
| "epoch": 2.851165002377556, |
| "grad_norm": 0.4078483581542969, |
| "learning_rate": 9.051127342013338e-05, |
| "loss": 0.181, |
| "step": 1499 |
| }, |
| { |
| "epoch": 2.8530670470756063, |
| "grad_norm": 0.4196905195713043, |
| "learning_rate": 9.050492219752303e-05, |
| "loss": 0.2147, |
| "step": 1500 |
| }, |
| { |
| "epoch": 2.8549690917736568, |
| "grad_norm": 0.3858025372028351, |
| "learning_rate": 9.049857097491268e-05, |
| "loss": 0.1719, |
| "step": 1501 |
| }, |
| { |
| "epoch": 2.856871136471707, |
| "grad_norm": 0.3923434019088745, |
| "learning_rate": 9.049221975230232e-05, |
| "loss": 0.1966, |
| "step": 1502 |
| }, |
| { |
| "epoch": 2.8587731811697576, |
| "grad_norm": 0.38231122493743896, |
| "learning_rate": 9.048586852969197e-05, |
| "loss": 0.186, |
| "step": 1503 |
| }, |
| { |
| "epoch": 2.8606752258678076, |
| "grad_norm": 0.3579331040382385, |
| "learning_rate": 9.047951730708162e-05, |
| "loss": 0.1777, |
| "step": 1504 |
| }, |
| { |
| "epoch": 2.8625772705658585, |
| "grad_norm": 0.2968972623348236, |
| "learning_rate": 9.047316608447126e-05, |
| "loss": 0.1456, |
| "step": 1505 |
| }, |
| { |
| "epoch": 2.8644793152639085, |
| "grad_norm": 0.3534374535083771, |
| "learning_rate": 9.046681486186091e-05, |
| "loss": 0.178, |
| "step": 1506 |
| }, |
| { |
| "epoch": 2.866381359961959, |
| "grad_norm": 0.4368778169155121, |
| "learning_rate": 9.046046363925057e-05, |
| "loss": 0.2349, |
| "step": 1507 |
| }, |
| { |
| "epoch": 2.8682834046600094, |
| "grad_norm": 0.43825942277908325, |
| "learning_rate": 9.04541124166402e-05, |
| "loss": 0.1857, |
| "step": 1508 |
| }, |
| { |
| "epoch": 2.87018544935806, |
| "grad_norm": 0.35765841603279114, |
| "learning_rate": 9.044776119402986e-05, |
| "loss": 0.1787, |
| "step": 1509 |
| }, |
| { |
| "epoch": 2.8720874940561103, |
| "grad_norm": 0.35496601462364197, |
| "learning_rate": 9.04414099714195e-05, |
| "loss": 0.1776, |
| "step": 1510 |
| }, |
| { |
| "epoch": 2.8739895387541607, |
| "grad_norm": 0.39673030376434326, |
| "learning_rate": 9.043505874880916e-05, |
| "loss": 0.1916, |
| "step": 1511 |
| }, |
| { |
| "epoch": 2.875891583452211, |
| "grad_norm": 0.3670983612537384, |
| "learning_rate": 9.04287075261988e-05, |
| "loss": 0.1726, |
| "step": 1512 |
| }, |
| { |
| "epoch": 2.8777936281502616, |
| "grad_norm": 0.4254002273082733, |
| "learning_rate": 9.042235630358844e-05, |
| "loss": 0.224, |
| "step": 1513 |
| }, |
| { |
| "epoch": 2.879695672848312, |
| "grad_norm": 0.37891489267349243, |
| "learning_rate": 9.041600508097809e-05, |
| "loss": 0.1805, |
| "step": 1514 |
| }, |
| { |
| "epoch": 2.881597717546362, |
| "grad_norm": 0.33309099078178406, |
| "learning_rate": 9.040965385836774e-05, |
| "loss": 0.1442, |
| "step": 1515 |
| }, |
| { |
| "epoch": 2.883499762244413, |
| "grad_norm": 0.4709990918636322, |
| "learning_rate": 9.040330263575739e-05, |
| "loss": 0.2342, |
| "step": 1516 |
| }, |
| { |
| "epoch": 2.885401806942463, |
| "grad_norm": 0.41639766097068787, |
| "learning_rate": 9.039695141314703e-05, |
| "loss": 0.1772, |
| "step": 1517 |
| }, |
| { |
| "epoch": 2.8873038516405134, |
| "grad_norm": 0.37914562225341797, |
| "learning_rate": 9.039060019053668e-05, |
| "loss": 0.1632, |
| "step": 1518 |
| }, |
| { |
| "epoch": 2.889205896338564, |
| "grad_norm": 0.4000544250011444, |
| "learning_rate": 9.038424896792633e-05, |
| "loss": 0.1927, |
| "step": 1519 |
| }, |
| { |
| "epoch": 2.8911079410366143, |
| "grad_norm": 0.42467859387397766, |
| "learning_rate": 9.037789774531597e-05, |
| "loss": 0.1789, |
| "step": 1520 |
| }, |
| { |
| "epoch": 2.8930099857346647, |
| "grad_norm": 0.46945691108703613, |
| "learning_rate": 9.037154652270562e-05, |
| "loss": 0.197, |
| "step": 1521 |
| }, |
| { |
| "epoch": 2.894912030432715, |
| "grad_norm": 0.43455827236175537, |
| "learning_rate": 9.036519530009528e-05, |
| "loss": 0.1877, |
| "step": 1522 |
| }, |
| { |
| "epoch": 2.8968140751307656, |
| "grad_norm": 0.5169146656990051, |
| "learning_rate": 9.035884407748491e-05, |
| "loss": 0.2039, |
| "step": 1523 |
| }, |
| { |
| "epoch": 2.898716119828816, |
| "grad_norm": 0.42767763137817383, |
| "learning_rate": 9.035249285487457e-05, |
| "loss": 0.2123, |
| "step": 1524 |
| }, |
| { |
| "epoch": 2.9006181645268665, |
| "grad_norm": 0.40808382630348206, |
| "learning_rate": 9.034614163226422e-05, |
| "loss": 0.2617, |
| "step": 1525 |
| }, |
| { |
| "epoch": 2.902520209224917, |
| "grad_norm": 0.3179365396499634, |
| "learning_rate": 9.033979040965386e-05, |
| "loss": 0.1548, |
| "step": 1526 |
| }, |
| { |
| "epoch": 2.9044222539229674, |
| "grad_norm": 0.259781152009964, |
| "learning_rate": 9.033343918704351e-05, |
| "loss": 0.1299, |
| "step": 1527 |
| }, |
| { |
| "epoch": 2.9063242986210174, |
| "grad_norm": 0.40235599875450134, |
| "learning_rate": 9.032708796443316e-05, |
| "loss": 0.1957, |
| "step": 1528 |
| }, |
| { |
| "epoch": 2.9082263433190683, |
| "grad_norm": 0.3170933127403259, |
| "learning_rate": 9.032073674182281e-05, |
| "loss": 0.1594, |
| "step": 1529 |
| }, |
| { |
| "epoch": 2.9101283880171183, |
| "grad_norm": 0.31572115421295166, |
| "learning_rate": 9.031438551921245e-05, |
| "loss": 0.1922, |
| "step": 1530 |
| }, |
| { |
| "epoch": 2.9120304327151687, |
| "grad_norm": 0.4456964433193207, |
| "learning_rate": 9.03080342966021e-05, |
| "loss": 0.2459, |
| "step": 1531 |
| }, |
| { |
| "epoch": 2.913932477413219, |
| "grad_norm": 0.3345606327056885, |
| "learning_rate": 9.030168307399175e-05, |
| "loss": 0.1708, |
| "step": 1532 |
| }, |
| { |
| "epoch": 2.9158345221112696, |
| "grad_norm": 0.4247712790966034, |
| "learning_rate": 9.029533185138139e-05, |
| "loss": 0.227, |
| "step": 1533 |
| }, |
| { |
| "epoch": 2.91773656680932, |
| "grad_norm": 0.3642347455024719, |
| "learning_rate": 9.028898062877104e-05, |
| "loss": 0.1971, |
| "step": 1534 |
| }, |
| { |
| "epoch": 2.9196386115073705, |
| "grad_norm": 0.40530455112457275, |
| "learning_rate": 9.02826294061607e-05, |
| "loss": 0.1574, |
| "step": 1535 |
| }, |
| { |
| "epoch": 2.921540656205421, |
| "grad_norm": 0.5143640637397766, |
| "learning_rate": 9.027627818355033e-05, |
| "loss": 0.207, |
| "step": 1536 |
| }, |
| { |
| "epoch": 2.9234427009034714, |
| "grad_norm": 0.4270274043083191, |
| "learning_rate": 9.026992696093999e-05, |
| "loss": 0.1971, |
| "step": 1537 |
| }, |
| { |
| "epoch": 2.925344745601522, |
| "grad_norm": 0.5170589685440063, |
| "learning_rate": 9.026357573832964e-05, |
| "loss": 0.2768, |
| "step": 1538 |
| }, |
| { |
| "epoch": 2.927246790299572, |
| "grad_norm": 0.41313278675079346, |
| "learning_rate": 9.025722451571929e-05, |
| "loss": 0.1765, |
| "step": 1539 |
| }, |
| { |
| "epoch": 2.9291488349976227, |
| "grad_norm": 0.4040130078792572, |
| "learning_rate": 9.025087329310893e-05, |
| "loss": 0.2002, |
| "step": 1540 |
| }, |
| { |
| "epoch": 2.9310508796956727, |
| "grad_norm": 0.37281498312950134, |
| "learning_rate": 9.024452207049857e-05, |
| "loss": 0.1542, |
| "step": 1541 |
| }, |
| { |
| "epoch": 2.932952924393723, |
| "grad_norm": 0.5352873802185059, |
| "learning_rate": 9.023817084788823e-05, |
| "loss": 0.2437, |
| "step": 1542 |
| }, |
| { |
| "epoch": 2.9348549690917736, |
| "grad_norm": 0.4044128358364105, |
| "learning_rate": 9.023181962527787e-05, |
| "loss": 0.1721, |
| "step": 1543 |
| }, |
| { |
| "epoch": 2.936757013789824, |
| "grad_norm": 0.35553574562072754, |
| "learning_rate": 9.022546840266751e-05, |
| "loss": 0.1838, |
| "step": 1544 |
| }, |
| { |
| "epoch": 2.9386590584878745, |
| "grad_norm": 0.42568060755729675, |
| "learning_rate": 9.021911718005717e-05, |
| "loss": 0.2022, |
| "step": 1545 |
| }, |
| { |
| "epoch": 2.940561103185925, |
| "grad_norm": 0.453700453042984, |
| "learning_rate": 9.021276595744681e-05, |
| "loss": 0.1866, |
| "step": 1546 |
| }, |
| { |
| "epoch": 2.9424631478839753, |
| "grad_norm": 0.3909238576889038, |
| "learning_rate": 9.020641473483646e-05, |
| "loss": 0.1628, |
| "step": 1547 |
| }, |
| { |
| "epoch": 2.944365192582026, |
| "grad_norm": 0.39725926518440247, |
| "learning_rate": 9.02000635122261e-05, |
| "loss": 0.217, |
| "step": 1548 |
| }, |
| { |
| "epoch": 2.9462672372800762, |
| "grad_norm": 0.34860628843307495, |
| "learning_rate": 9.019371228961575e-05, |
| "loss": 0.1724, |
| "step": 1549 |
| }, |
| { |
| "epoch": 2.948169281978126, |
| "grad_norm": 0.38813674449920654, |
| "learning_rate": 9.01873610670054e-05, |
| "loss": 0.2047, |
| "step": 1550 |
| }, |
| { |
| "epoch": 2.950071326676177, |
| "grad_norm": 0.37160560488700867, |
| "learning_rate": 9.018100984439504e-05, |
| "loss": 0.2119, |
| "step": 1551 |
| }, |
| { |
| "epoch": 2.951973371374227, |
| "grad_norm": 0.4166210889816284, |
| "learning_rate": 9.017465862178471e-05, |
| "loss": 0.2215, |
| "step": 1552 |
| }, |
| { |
| "epoch": 2.9538754160722775, |
| "grad_norm": 0.3657042980194092, |
| "learning_rate": 9.016830739917435e-05, |
| "loss": 0.1924, |
| "step": 1553 |
| }, |
| { |
| "epoch": 2.955777460770328, |
| "grad_norm": 0.37292999029159546, |
| "learning_rate": 9.016195617656399e-05, |
| "loss": 0.2329, |
| "step": 1554 |
| }, |
| { |
| "epoch": 2.9576795054683784, |
| "grad_norm": 0.3373647928237915, |
| "learning_rate": 9.015560495395364e-05, |
| "loss": 0.2034, |
| "step": 1555 |
| }, |
| { |
| "epoch": 2.959581550166429, |
| "grad_norm": 0.31643402576446533, |
| "learning_rate": 9.014925373134329e-05, |
| "loss": 0.1713, |
| "step": 1556 |
| }, |
| { |
| "epoch": 2.9614835948644793, |
| "grad_norm": 0.3107222318649292, |
| "learning_rate": 9.014290250873294e-05, |
| "loss": 0.1511, |
| "step": 1557 |
| }, |
| { |
| "epoch": 2.9633856395625298, |
| "grad_norm": 0.32063353061676025, |
| "learning_rate": 9.013655128612258e-05, |
| "loss": 0.1581, |
| "step": 1558 |
| }, |
| { |
| "epoch": 2.96528768426058, |
| "grad_norm": 0.4035079777240753, |
| "learning_rate": 9.013020006351223e-05, |
| "loss": 0.2036, |
| "step": 1559 |
| }, |
| { |
| "epoch": 2.9671897289586306, |
| "grad_norm": 0.28573077917099, |
| "learning_rate": 9.012384884090188e-05, |
| "loss": 0.1388, |
| "step": 1560 |
| }, |
| { |
| "epoch": 2.969091773656681, |
| "grad_norm": 0.38853904604911804, |
| "learning_rate": 9.011749761829152e-05, |
| "loss": 0.1981, |
| "step": 1561 |
| }, |
| { |
| "epoch": 2.9709938183547315, |
| "grad_norm": 0.39904823899269104, |
| "learning_rate": 9.011114639568117e-05, |
| "loss": 0.2249, |
| "step": 1562 |
| }, |
| { |
| "epoch": 2.9728958630527815, |
| "grad_norm": 0.3704228103160858, |
| "learning_rate": 9.010479517307082e-05, |
| "loss": 0.2176, |
| "step": 1563 |
| }, |
| { |
| "epoch": 2.9747979077508324, |
| "grad_norm": 0.3712176978588104, |
| "learning_rate": 9.009844395046046e-05, |
| "loss": 0.1685, |
| "step": 1564 |
| }, |
| { |
| "epoch": 2.9766999524488824, |
| "grad_norm": 0.47927892208099365, |
| "learning_rate": 9.009209272785011e-05, |
| "loss": 0.2027, |
| "step": 1565 |
| }, |
| { |
| "epoch": 2.978601997146933, |
| "grad_norm": 0.4230005443096161, |
| "learning_rate": 9.008574150523977e-05, |
| "loss": 0.212, |
| "step": 1566 |
| }, |
| { |
| "epoch": 2.9805040418449833, |
| "grad_norm": 0.32152169942855835, |
| "learning_rate": 9.00793902826294e-05, |
| "loss": 0.1639, |
| "step": 1567 |
| }, |
| { |
| "epoch": 2.9824060865430337, |
| "grad_norm": 0.42794153094291687, |
| "learning_rate": 9.007303906001906e-05, |
| "loss": 0.2143, |
| "step": 1568 |
| }, |
| { |
| "epoch": 2.984308131241084, |
| "grad_norm": 0.37590306997299194, |
| "learning_rate": 9.006668783740871e-05, |
| "loss": 0.189, |
| "step": 1569 |
| }, |
| { |
| "epoch": 2.9862101759391346, |
| "grad_norm": 0.3247901201248169, |
| "learning_rate": 9.006033661479836e-05, |
| "loss": 0.1616, |
| "step": 1570 |
| }, |
| { |
| "epoch": 2.988112220637185, |
| "grad_norm": 0.36269792914390564, |
| "learning_rate": 9.0053985392188e-05, |
| "loss": 0.2037, |
| "step": 1571 |
| }, |
| { |
| "epoch": 2.9900142653352355, |
| "grad_norm": 0.4436742067337036, |
| "learning_rate": 9.004763416957764e-05, |
| "loss": 0.202, |
| "step": 1572 |
| }, |
| { |
| "epoch": 2.991916310033286, |
| "grad_norm": 0.45660001039505005, |
| "learning_rate": 9.00412829469673e-05, |
| "loss": 0.2298, |
| "step": 1573 |
| }, |
| { |
| "epoch": 2.993818354731336, |
| "grad_norm": 0.3276821970939636, |
| "learning_rate": 9.003493172435694e-05, |
| "loss": 0.158, |
| "step": 1574 |
| }, |
| { |
| "epoch": 2.995720399429387, |
| "grad_norm": 0.3427131175994873, |
| "learning_rate": 9.002858050174659e-05, |
| "loss": 0.1781, |
| "step": 1575 |
| }, |
| { |
| "epoch": 2.997622444127437, |
| "grad_norm": 0.38842669129371643, |
| "learning_rate": 9.002222927913624e-05, |
| "loss": 0.1905, |
| "step": 1576 |
| }, |
| { |
| "epoch": 2.9995244888254873, |
| "grad_norm": 0.4034234285354614, |
| "learning_rate": 9.001587805652588e-05, |
| "loss": 0.1989, |
| "step": 1577 |
| }, |
| { |
| "epoch": 3.0014265335235377, |
| "grad_norm": 0.23682546615600586, |
| "learning_rate": 9.000952683391553e-05, |
| "loss": 0.0968, |
| "step": 1578 |
| }, |
| { |
| "epoch": 3.003328578221588, |
| "grad_norm": 0.23321636021137238, |
| "learning_rate": 9.000317561130517e-05, |
| "loss": 0.1278, |
| "step": 1579 |
| }, |
| { |
| "epoch": 3.0052306229196386, |
| "grad_norm": 0.2891576290130615, |
| "learning_rate": 8.999682438869482e-05, |
| "loss": 0.1297, |
| "step": 1580 |
| }, |
| { |
| "epoch": 3.007132667617689, |
| "grad_norm": 0.30067315697669983, |
| "learning_rate": 8.999047316608448e-05, |
| "loss": 0.1216, |
| "step": 1581 |
| }, |
| { |
| "epoch": 3.0090347123157395, |
| "grad_norm": 0.25676554441452026, |
| "learning_rate": 8.998412194347411e-05, |
| "loss": 0.1167, |
| "step": 1582 |
| }, |
| { |
| "epoch": 3.01093675701379, |
| "grad_norm": 0.30124133825302124, |
| "learning_rate": 8.997777072086378e-05, |
| "loss": 0.1243, |
| "step": 1583 |
| }, |
| { |
| "epoch": 3.0128388017118404, |
| "grad_norm": 0.30313733220100403, |
| "learning_rate": 8.997141949825342e-05, |
| "loss": 0.127, |
| "step": 1584 |
| }, |
| { |
| "epoch": 3.014740846409891, |
| "grad_norm": 0.36067837476730347, |
| "learning_rate": 8.996506827564306e-05, |
| "loss": 0.1331, |
| "step": 1585 |
| }, |
| { |
| "epoch": 3.0166428911079413, |
| "grad_norm": 0.3327738642692566, |
| "learning_rate": 8.995871705303271e-05, |
| "loss": 0.1304, |
| "step": 1586 |
| }, |
| { |
| "epoch": 3.0185449358059913, |
| "grad_norm": 0.2918979525566101, |
| "learning_rate": 8.995236583042236e-05, |
| "loss": 0.1127, |
| "step": 1587 |
| }, |
| { |
| "epoch": 3.0204469805040417, |
| "grad_norm": 0.40982192754745483, |
| "learning_rate": 8.994601460781201e-05, |
| "loss": 0.1283, |
| "step": 1588 |
| }, |
| { |
| "epoch": 3.022349025202092, |
| "grad_norm": 0.37201565504074097, |
| "learning_rate": 8.993966338520165e-05, |
| "loss": 0.1198, |
| "step": 1589 |
| }, |
| { |
| "epoch": 3.0242510699001426, |
| "grad_norm": 0.4271756708621979, |
| "learning_rate": 8.99333121625913e-05, |
| "loss": 0.1218, |
| "step": 1590 |
| }, |
| { |
| "epoch": 3.026153114598193, |
| "grad_norm": 0.3430047035217285, |
| "learning_rate": 8.992696093998095e-05, |
| "loss": 0.1213, |
| "step": 1591 |
| }, |
| { |
| "epoch": 3.0280551592962435, |
| "grad_norm": 0.3253467381000519, |
| "learning_rate": 8.992060971737059e-05, |
| "loss": 0.1124, |
| "step": 1592 |
| }, |
| { |
| "epoch": 3.029957203994294, |
| "grad_norm": 0.38685157895088196, |
| "learning_rate": 8.991425849476024e-05, |
| "loss": 0.112, |
| "step": 1593 |
| }, |
| { |
| "epoch": 3.0318592486923444, |
| "grad_norm": 0.36162498593330383, |
| "learning_rate": 8.99079072721499e-05, |
| "loss": 0.1061, |
| "step": 1594 |
| }, |
| { |
| "epoch": 3.033761293390395, |
| "grad_norm": 0.32084980607032776, |
| "learning_rate": 8.990155604953953e-05, |
| "loss": 0.0965, |
| "step": 1595 |
| }, |
| { |
| "epoch": 3.0356633380884452, |
| "grad_norm": 0.4037097096443176, |
| "learning_rate": 8.989520482692919e-05, |
| "loss": 0.1237, |
| "step": 1596 |
| }, |
| { |
| "epoch": 3.0375653827864957, |
| "grad_norm": 0.23668204247951508, |
| "learning_rate": 8.988885360431884e-05, |
| "loss": 0.1778, |
| "step": 1597 |
| }, |
| { |
| "epoch": 3.0394674274845457, |
| "grad_norm": 0.3448043167591095, |
| "learning_rate": 8.988250238170848e-05, |
| "loss": 0.1349, |
| "step": 1598 |
| }, |
| { |
| "epoch": 3.041369472182596, |
| "grad_norm": 0.39455583691596985, |
| "learning_rate": 8.987615115909813e-05, |
| "loss": 0.1175, |
| "step": 1599 |
| }, |
| { |
| "epoch": 3.0432715168806466, |
| "grad_norm": 0.39552587270736694, |
| "learning_rate": 8.986979993648778e-05, |
| "loss": 0.1296, |
| "step": 1600 |
| }, |
| { |
| "epoch": 3.045173561578697, |
| "grad_norm": 0.36603817343711853, |
| "learning_rate": 8.986344871387743e-05, |
| "loss": 0.1392, |
| "step": 1601 |
| }, |
| { |
| "epoch": 3.0470756062767475, |
| "grad_norm": 0.34084847569465637, |
| "learning_rate": 8.985709749126707e-05, |
| "loss": 0.1155, |
| "step": 1602 |
| }, |
| { |
| "epoch": 3.048977650974798, |
| "grad_norm": 0.36548131704330444, |
| "learning_rate": 8.985074626865672e-05, |
| "loss": 0.1381, |
| "step": 1603 |
| }, |
| { |
| "epoch": 3.0508796956728483, |
| "grad_norm": 0.30957910418510437, |
| "learning_rate": 8.984439504604637e-05, |
| "loss": 0.1123, |
| "step": 1604 |
| }, |
| { |
| "epoch": 3.0527817403708988, |
| "grad_norm": 0.38922393321990967, |
| "learning_rate": 8.983804382343601e-05, |
| "loss": 0.1588, |
| "step": 1605 |
| }, |
| { |
| "epoch": 3.054683785068949, |
| "grad_norm": 0.3416849672794342, |
| "learning_rate": 8.983169260082566e-05, |
| "loss": 0.1236, |
| "step": 1606 |
| }, |
| { |
| "epoch": 3.0565858297669997, |
| "grad_norm": 0.31353771686553955, |
| "learning_rate": 8.982534137821532e-05, |
| "loss": 0.1025, |
| "step": 1607 |
| }, |
| { |
| "epoch": 3.05848787446505, |
| "grad_norm": 0.36878702044487, |
| "learning_rate": 8.981899015560495e-05, |
| "loss": 0.1421, |
| "step": 1608 |
| }, |
| { |
| "epoch": 3.0603899191631005, |
| "grad_norm": 0.38487425446510315, |
| "learning_rate": 8.98126389329946e-05, |
| "loss": 0.1223, |
| "step": 1609 |
| }, |
| { |
| "epoch": 3.0622919638611505, |
| "grad_norm": 0.3435547649860382, |
| "learning_rate": 8.980628771038426e-05, |
| "loss": 0.1105, |
| "step": 1610 |
| }, |
| { |
| "epoch": 3.064194008559201, |
| "grad_norm": 0.422198086977005, |
| "learning_rate": 8.979993648777391e-05, |
| "loss": 0.1368, |
| "step": 1611 |
| }, |
| { |
| "epoch": 3.0660960532572514, |
| "grad_norm": 0.43352290987968445, |
| "learning_rate": 8.979358526516355e-05, |
| "loss": 0.1743, |
| "step": 1612 |
| }, |
| { |
| "epoch": 3.067998097955302, |
| "grad_norm": 0.3885476887226105, |
| "learning_rate": 8.978723404255319e-05, |
| "loss": 0.1979, |
| "step": 1613 |
| }, |
| { |
| "epoch": 3.0699001426533523, |
| "grad_norm": 0.3135451376438141, |
| "learning_rate": 8.978088281994285e-05, |
| "loss": 0.1105, |
| "step": 1614 |
| }, |
| { |
| "epoch": 3.0718021873514028, |
| "grad_norm": 0.4184531271457672, |
| "learning_rate": 8.977453159733249e-05, |
| "loss": 0.1335, |
| "step": 1615 |
| }, |
| { |
| "epoch": 3.073704232049453, |
| "grad_norm": 0.35463500022888184, |
| "learning_rate": 8.976818037472213e-05, |
| "loss": 0.1384, |
| "step": 1616 |
| }, |
| { |
| "epoch": 3.0756062767475036, |
| "grad_norm": 0.33959662914276123, |
| "learning_rate": 8.97618291521118e-05, |
| "loss": 0.118, |
| "step": 1617 |
| }, |
| { |
| "epoch": 3.077508321445554, |
| "grad_norm": 0.3295678198337555, |
| "learning_rate": 8.975547792950143e-05, |
| "loss": 0.1073, |
| "step": 1618 |
| }, |
| { |
| "epoch": 3.0794103661436045, |
| "grad_norm": 0.32906121015548706, |
| "learning_rate": 8.974912670689108e-05, |
| "loss": 0.0992, |
| "step": 1619 |
| }, |
| { |
| "epoch": 3.081312410841655, |
| "grad_norm": 0.2967415750026703, |
| "learning_rate": 8.974277548428072e-05, |
| "loss": 0.0901, |
| "step": 1620 |
| }, |
| { |
| "epoch": 3.0832144555397054, |
| "grad_norm": 0.3415001928806305, |
| "learning_rate": 8.973642426167037e-05, |
| "loss": 0.1248, |
| "step": 1621 |
| }, |
| { |
| "epoch": 3.0851165002377554, |
| "grad_norm": 0.2587614357471466, |
| "learning_rate": 8.973007303906003e-05, |
| "loss": 0.0872, |
| "step": 1622 |
| }, |
| { |
| "epoch": 3.087018544935806, |
| "grad_norm": 0.3469274640083313, |
| "learning_rate": 8.972372181644966e-05, |
| "loss": 0.1147, |
| "step": 1623 |
| }, |
| { |
| "epoch": 3.0889205896338563, |
| "grad_norm": 0.28534063696861267, |
| "learning_rate": 8.971737059383932e-05, |
| "loss": 0.1377, |
| "step": 1624 |
| }, |
| { |
| "epoch": 3.0908226343319067, |
| "grad_norm": 0.3836195170879364, |
| "learning_rate": 8.971101937122897e-05, |
| "loss": 0.1242, |
| "step": 1625 |
| }, |
| { |
| "epoch": 3.092724679029957, |
| "grad_norm": 0.40428081154823303, |
| "learning_rate": 8.97046681486186e-05, |
| "loss": 0.1017, |
| "step": 1626 |
| }, |
| { |
| "epoch": 3.0946267237280076, |
| "grad_norm": 0.37237152457237244, |
| "learning_rate": 8.969831692600826e-05, |
| "loss": 0.1318, |
| "step": 1627 |
| }, |
| { |
| "epoch": 3.096528768426058, |
| "grad_norm": 0.3669044077396393, |
| "learning_rate": 8.969196570339791e-05, |
| "loss": 0.1191, |
| "step": 1628 |
| }, |
| { |
| "epoch": 3.0984308131241085, |
| "grad_norm": 0.36814671754837036, |
| "learning_rate": 8.968561448078756e-05, |
| "loss": 0.1227, |
| "step": 1629 |
| }, |
| { |
| "epoch": 3.100332857822159, |
| "grad_norm": 0.3883667290210724, |
| "learning_rate": 8.96792632581772e-05, |
| "loss": 0.1556, |
| "step": 1630 |
| }, |
| { |
| "epoch": 3.1022349025202094, |
| "grad_norm": 0.44517648220062256, |
| "learning_rate": 8.967291203556685e-05, |
| "loss": 0.1439, |
| "step": 1631 |
| }, |
| { |
| "epoch": 3.10413694721826, |
| "grad_norm": 0.3230499029159546, |
| "learning_rate": 8.96665608129565e-05, |
| "loss": 0.117, |
| "step": 1632 |
| }, |
| { |
| "epoch": 3.10603899191631, |
| "grad_norm": 0.2505279779434204, |
| "learning_rate": 8.966020959034614e-05, |
| "loss": 0.0945, |
| "step": 1633 |
| }, |
| { |
| "epoch": 3.1079410366143603, |
| "grad_norm": 0.31753817200660706, |
| "learning_rate": 8.96538583677358e-05, |
| "loss": 0.1119, |
| "step": 1634 |
| }, |
| { |
| "epoch": 3.1098430813124107, |
| "grad_norm": 0.34199607372283936, |
| "learning_rate": 8.964750714512545e-05, |
| "loss": 0.1508, |
| "step": 1635 |
| }, |
| { |
| "epoch": 3.111745126010461, |
| "grad_norm": 0.39167290925979614, |
| "learning_rate": 8.964115592251508e-05, |
| "loss": 0.1422, |
| "step": 1636 |
| }, |
| { |
| "epoch": 3.1136471707085116, |
| "grad_norm": 0.28108343482017517, |
| "learning_rate": 8.963480469990474e-05, |
| "loss": 0.0981, |
| "step": 1637 |
| }, |
| { |
| "epoch": 3.115549215406562, |
| "grad_norm": 0.2806454598903656, |
| "learning_rate": 8.962845347729439e-05, |
| "loss": 0.1227, |
| "step": 1638 |
| }, |
| { |
| "epoch": 3.1174512601046125, |
| "grad_norm": 0.3393970727920532, |
| "learning_rate": 8.962210225468403e-05, |
| "loss": 0.1419, |
| "step": 1639 |
| }, |
| { |
| "epoch": 3.119353304802663, |
| "grad_norm": 0.3800428509712219, |
| "learning_rate": 8.961575103207368e-05, |
| "loss": 0.1323, |
| "step": 1640 |
| }, |
| { |
| "epoch": 3.1212553495007134, |
| "grad_norm": 0.3849729299545288, |
| "learning_rate": 8.960939980946333e-05, |
| "loss": 0.1505, |
| "step": 1641 |
| }, |
| { |
| "epoch": 3.123157394198764, |
| "grad_norm": 0.38189247250556946, |
| "learning_rate": 8.960304858685298e-05, |
| "loss": 0.1303, |
| "step": 1642 |
| }, |
| { |
| "epoch": 3.1250594388968143, |
| "grad_norm": 0.3030915856361389, |
| "learning_rate": 8.959669736424262e-05, |
| "loss": 0.1141, |
| "step": 1643 |
| }, |
| { |
| "epoch": 3.1269614835948643, |
| "grad_norm": 0.3842359185218811, |
| "learning_rate": 8.959034614163226e-05, |
| "loss": 0.1124, |
| "step": 1644 |
| }, |
| { |
| "epoch": 3.1288635282929147, |
| "grad_norm": 0.3637976348400116, |
| "learning_rate": 8.958399491902192e-05, |
| "loss": 0.1275, |
| "step": 1645 |
| }, |
| { |
| "epoch": 3.130765572990965, |
| "grad_norm": 0.2884964346885681, |
| "learning_rate": 8.957764369641156e-05, |
| "loss": 0.1065, |
| "step": 1646 |
| }, |
| { |
| "epoch": 3.1326676176890156, |
| "grad_norm": 0.3866124749183655, |
| "learning_rate": 8.957129247380121e-05, |
| "loss": 0.1389, |
| "step": 1647 |
| }, |
| { |
| "epoch": 3.134569662387066, |
| "grad_norm": 0.418950617313385, |
| "learning_rate": 8.956494125119087e-05, |
| "loss": 0.1406, |
| "step": 1648 |
| }, |
| { |
| "epoch": 3.1364717070851165, |
| "grad_norm": 0.37514927983283997, |
| "learning_rate": 8.95585900285805e-05, |
| "loss": 0.1239, |
| "step": 1649 |
| }, |
| { |
| "epoch": 3.138373751783167, |
| "grad_norm": 0.29558438062667847, |
| "learning_rate": 8.955223880597016e-05, |
| "loss": 0.1077, |
| "step": 1650 |
| }, |
| { |
| "epoch": 3.1402757964812174, |
| "grad_norm": 0.3241124749183655, |
| "learning_rate": 8.95458875833598e-05, |
| "loss": 0.1254, |
| "step": 1651 |
| }, |
| { |
| "epoch": 3.142177841179268, |
| "grad_norm": 0.40942251682281494, |
| "learning_rate": 8.953953636074945e-05, |
| "loss": 0.1388, |
| "step": 1652 |
| }, |
| { |
| "epoch": 3.1440798858773182, |
| "grad_norm": 0.3899609446525574, |
| "learning_rate": 8.95331851381391e-05, |
| "loss": 0.1279, |
| "step": 1653 |
| }, |
| { |
| "epoch": 3.1459819305753687, |
| "grad_norm": 0.37820303440093994, |
| "learning_rate": 8.952683391552874e-05, |
| "loss": 0.1146, |
| "step": 1654 |
| }, |
| { |
| "epoch": 3.147883975273419, |
| "grad_norm": 0.3521963059902191, |
| "learning_rate": 8.95204826929184e-05, |
| "loss": 0.1337, |
| "step": 1655 |
| }, |
| { |
| "epoch": 3.1497860199714696, |
| "grad_norm": 0.3292877674102783, |
| "learning_rate": 8.951413147030804e-05, |
| "loss": 0.1225, |
| "step": 1656 |
| }, |
| { |
| "epoch": 3.1516880646695196, |
| "grad_norm": 0.28479406237602234, |
| "learning_rate": 8.950778024769768e-05, |
| "loss": 0.1006, |
| "step": 1657 |
| }, |
| { |
| "epoch": 3.15359010936757, |
| "grad_norm": 0.2883979380130768, |
| "learning_rate": 8.950142902508733e-05, |
| "loss": 0.1114, |
| "step": 1658 |
| }, |
| { |
| "epoch": 3.1554921540656204, |
| "grad_norm": 0.33744558691978455, |
| "learning_rate": 8.949507780247698e-05, |
| "loss": 0.1263, |
| "step": 1659 |
| }, |
| { |
| "epoch": 3.157394198763671, |
| "grad_norm": 0.2845192551612854, |
| "learning_rate": 8.948872657986663e-05, |
| "loss": 0.1047, |
| "step": 1660 |
| }, |
| { |
| "epoch": 3.1592962434617213, |
| "grad_norm": 0.3539939224720001, |
| "learning_rate": 8.948237535725627e-05, |
| "loss": 0.1183, |
| "step": 1661 |
| }, |
| { |
| "epoch": 3.1611982881597718, |
| "grad_norm": 0.24927809834480286, |
| "learning_rate": 8.947602413464592e-05, |
| "loss": 0.0825, |
| "step": 1662 |
| }, |
| { |
| "epoch": 3.163100332857822, |
| "grad_norm": 0.4059623181819916, |
| "learning_rate": 8.946967291203558e-05, |
| "loss": 0.1457, |
| "step": 1663 |
| }, |
| { |
| "epoch": 3.1650023775558727, |
| "grad_norm": 0.3298782706260681, |
| "learning_rate": 8.946332168942521e-05, |
| "loss": 0.1226, |
| "step": 1664 |
| }, |
| { |
| "epoch": 3.166904422253923, |
| "grad_norm": 0.3750251829624176, |
| "learning_rate": 8.945697046681487e-05, |
| "loss": 0.144, |
| "step": 1665 |
| }, |
| { |
| "epoch": 3.1688064669519735, |
| "grad_norm": 0.40858665108680725, |
| "learning_rate": 8.945061924420452e-05, |
| "loss": 0.1426, |
| "step": 1666 |
| }, |
| { |
| "epoch": 3.170708511650024, |
| "grad_norm": 0.38032254576683044, |
| "learning_rate": 8.944426802159416e-05, |
| "loss": 0.1479, |
| "step": 1667 |
| }, |
| { |
| "epoch": 3.172610556348074, |
| "grad_norm": 0.3702940046787262, |
| "learning_rate": 8.943791679898381e-05, |
| "loss": 0.1262, |
| "step": 1668 |
| }, |
| { |
| "epoch": 3.1745126010461244, |
| "grad_norm": 0.43061700463294983, |
| "learning_rate": 8.943156557637346e-05, |
| "loss": 0.1463, |
| "step": 1669 |
| }, |
| { |
| "epoch": 3.176414645744175, |
| "grad_norm": 0.2968880832195282, |
| "learning_rate": 8.94252143537631e-05, |
| "loss": 0.1135, |
| "step": 1670 |
| }, |
| { |
| "epoch": 3.1783166904422253, |
| "grad_norm": 0.28398388624191284, |
| "learning_rate": 8.941886313115275e-05, |
| "loss": 0.1137, |
| "step": 1671 |
| }, |
| { |
| "epoch": 3.1802187351402758, |
| "grad_norm": 0.2764633595943451, |
| "learning_rate": 8.94125119085424e-05, |
| "loss": 0.0974, |
| "step": 1672 |
| }, |
| { |
| "epoch": 3.182120779838326, |
| "grad_norm": 0.39509302377700806, |
| "learning_rate": 8.940616068593205e-05, |
| "loss": 0.1491, |
| "step": 1673 |
| }, |
| { |
| "epoch": 3.1840228245363766, |
| "grad_norm": 0.2926827669143677, |
| "learning_rate": 8.939980946332169e-05, |
| "loss": 0.1207, |
| "step": 1674 |
| }, |
| { |
| "epoch": 3.185924869234427, |
| "grad_norm": 0.35445713996887207, |
| "learning_rate": 8.939345824071133e-05, |
| "loss": 0.1252, |
| "step": 1675 |
| }, |
| { |
| "epoch": 3.1878269139324775, |
| "grad_norm": 0.3183155059814453, |
| "learning_rate": 8.9387107018101e-05, |
| "loss": 0.1178, |
| "step": 1676 |
| }, |
| { |
| "epoch": 3.189728958630528, |
| "grad_norm": 0.40158188343048096, |
| "learning_rate": 8.938075579549063e-05, |
| "loss": 0.1266, |
| "step": 1677 |
| }, |
| { |
| "epoch": 3.1916310033285784, |
| "grad_norm": 0.33932897448539734, |
| "learning_rate": 8.937440457288029e-05, |
| "loss": 0.1321, |
| "step": 1678 |
| }, |
| { |
| "epoch": 3.1935330480266284, |
| "grad_norm": 0.3436925411224365, |
| "learning_rate": 8.936805335026994e-05, |
| "loss": 0.1204, |
| "step": 1679 |
| }, |
| { |
| "epoch": 3.195435092724679, |
| "grad_norm": 0.32970649003982544, |
| "learning_rate": 8.936170212765958e-05, |
| "loss": 0.1023, |
| "step": 1680 |
| }, |
| { |
| "epoch": 3.1973371374227293, |
| "grad_norm": 0.3206690549850464, |
| "learning_rate": 8.935535090504923e-05, |
| "loss": 0.1011, |
| "step": 1681 |
| }, |
| { |
| "epoch": 3.1992391821207797, |
| "grad_norm": 0.39323487877845764, |
| "learning_rate": 8.934899968243887e-05, |
| "loss": 0.1263, |
| "step": 1682 |
| }, |
| { |
| "epoch": 3.20114122681883, |
| "grad_norm": 0.3755662143230438, |
| "learning_rate": 8.934264845982853e-05, |
| "loss": 0.1345, |
| "step": 1683 |
| }, |
| { |
| "epoch": 3.2030432715168806, |
| "grad_norm": 0.3337384760379791, |
| "learning_rate": 8.933629723721817e-05, |
| "loss": 0.1094, |
| "step": 1684 |
| }, |
| { |
| "epoch": 3.204945316214931, |
| "grad_norm": 0.35307517647743225, |
| "learning_rate": 8.932994601460781e-05, |
| "loss": 0.1244, |
| "step": 1685 |
| }, |
| { |
| "epoch": 3.2068473609129815, |
| "grad_norm": 0.2809374928474426, |
| "learning_rate": 8.932359479199747e-05, |
| "loss": 0.0961, |
| "step": 1686 |
| }, |
| { |
| "epoch": 3.208749405611032, |
| "grad_norm": 0.35939821600914, |
| "learning_rate": 8.931724356938711e-05, |
| "loss": 0.1294, |
| "step": 1687 |
| }, |
| { |
| "epoch": 3.2106514503090824, |
| "grad_norm": 0.36626148223876953, |
| "learning_rate": 8.931089234677675e-05, |
| "loss": 0.141, |
| "step": 1688 |
| }, |
| { |
| "epoch": 3.212553495007133, |
| "grad_norm": 0.31976842880249023, |
| "learning_rate": 8.93045411241664e-05, |
| "loss": 0.1058, |
| "step": 1689 |
| }, |
| { |
| "epoch": 3.2144555397051833, |
| "grad_norm": 0.40340307354927063, |
| "learning_rate": 8.929818990155605e-05, |
| "loss": 0.142, |
| "step": 1690 |
| }, |
| { |
| "epoch": 3.2163575844032333, |
| "grad_norm": 0.3481243848800659, |
| "learning_rate": 8.92918386789457e-05, |
| "loss": 0.1301, |
| "step": 1691 |
| }, |
| { |
| "epoch": 3.2182596291012837, |
| "grad_norm": 0.41779786348342896, |
| "learning_rate": 8.928548745633534e-05, |
| "loss": 0.1531, |
| "step": 1692 |
| }, |
| { |
| "epoch": 3.220161673799334, |
| "grad_norm": 0.33376792073249817, |
| "learning_rate": 8.9279136233725e-05, |
| "loss": 0.1397, |
| "step": 1693 |
| }, |
| { |
| "epoch": 3.2220637184973846, |
| "grad_norm": 0.42083820700645447, |
| "learning_rate": 8.927278501111465e-05, |
| "loss": 0.1456, |
| "step": 1694 |
| }, |
| { |
| "epoch": 3.223965763195435, |
| "grad_norm": 0.23268885910511017, |
| "learning_rate": 8.926643378850429e-05, |
| "loss": 0.1261, |
| "step": 1695 |
| }, |
| { |
| "epoch": 3.2258678078934855, |
| "grad_norm": 0.3965808153152466, |
| "learning_rate": 8.926008256589394e-05, |
| "loss": 0.1454, |
| "step": 1696 |
| }, |
| { |
| "epoch": 3.227769852591536, |
| "grad_norm": 0.40782594680786133, |
| "learning_rate": 8.925373134328359e-05, |
| "loss": 0.137, |
| "step": 1697 |
| }, |
| { |
| "epoch": 3.2296718972895864, |
| "grad_norm": 0.37247705459594727, |
| "learning_rate": 8.924738012067323e-05, |
| "loss": 0.1227, |
| "step": 1698 |
| }, |
| { |
| "epoch": 3.231573941987637, |
| "grad_norm": 0.5225626230239868, |
| "learning_rate": 8.924102889806288e-05, |
| "loss": 0.1596, |
| "step": 1699 |
| }, |
| { |
| "epoch": 3.2334759866856873, |
| "grad_norm": 0.35236862301826477, |
| "learning_rate": 8.923467767545253e-05, |
| "loss": 0.1576, |
| "step": 1700 |
| }, |
| { |
| "epoch": 3.2353780313837377, |
| "grad_norm": 0.3305290639400482, |
| "learning_rate": 8.922832645284218e-05, |
| "loss": 0.1114, |
| "step": 1701 |
| }, |
| { |
| "epoch": 3.237280076081788, |
| "grad_norm": 0.37631455063819885, |
| "learning_rate": 8.922197523023182e-05, |
| "loss": 0.1278, |
| "step": 1702 |
| }, |
| { |
| "epoch": 3.239182120779838, |
| "grad_norm": 0.3439154624938965, |
| "learning_rate": 8.921562400762147e-05, |
| "loss": 0.1658, |
| "step": 1703 |
| }, |
| { |
| "epoch": 3.2410841654778886, |
| "grad_norm": 0.4184103310108185, |
| "learning_rate": 8.920927278501112e-05, |
| "loss": 0.1754, |
| "step": 1704 |
| }, |
| { |
| "epoch": 3.242986210175939, |
| "grad_norm": 0.3708958029747009, |
| "learning_rate": 8.920292156240076e-05, |
| "loss": 0.148, |
| "step": 1705 |
| }, |
| { |
| "epoch": 3.2448882548739895, |
| "grad_norm": 0.36626115441322327, |
| "learning_rate": 8.919657033979041e-05, |
| "loss": 0.152, |
| "step": 1706 |
| }, |
| { |
| "epoch": 3.24679029957204, |
| "grad_norm": 0.3738412857055664, |
| "learning_rate": 8.919021911718007e-05, |
| "loss": 0.1432, |
| "step": 1707 |
| }, |
| { |
| "epoch": 3.2486923442700903, |
| "grad_norm": 0.4470990002155304, |
| "learning_rate": 8.91838678945697e-05, |
| "loss": 0.1639, |
| "step": 1708 |
| }, |
| { |
| "epoch": 3.250594388968141, |
| "grad_norm": 0.3332229554653168, |
| "learning_rate": 8.917751667195936e-05, |
| "loss": 0.1257, |
| "step": 1709 |
| }, |
| { |
| "epoch": 3.2524964336661912, |
| "grad_norm": 0.3853921890258789, |
| "learning_rate": 8.917116544934901e-05, |
| "loss": 0.1262, |
| "step": 1710 |
| }, |
| { |
| "epoch": 3.2543984783642417, |
| "grad_norm": 0.32993221282958984, |
| "learning_rate": 8.916481422673865e-05, |
| "loss": 0.1231, |
| "step": 1711 |
| }, |
| { |
| "epoch": 3.256300523062292, |
| "grad_norm": 0.3631759285926819, |
| "learning_rate": 8.91584630041283e-05, |
| "loss": 0.148, |
| "step": 1712 |
| }, |
| { |
| "epoch": 3.2582025677603426, |
| "grad_norm": 0.40394118428230286, |
| "learning_rate": 8.915211178151795e-05, |
| "loss": 0.1542, |
| "step": 1713 |
| }, |
| { |
| "epoch": 3.2601046124583926, |
| "grad_norm": 0.3267883360385895, |
| "learning_rate": 8.91457605589076e-05, |
| "loss": 0.1411, |
| "step": 1714 |
| }, |
| { |
| "epoch": 3.262006657156443, |
| "grad_norm": 0.3076201379299164, |
| "learning_rate": 8.913940933629724e-05, |
| "loss": 0.1189, |
| "step": 1715 |
| }, |
| { |
| "epoch": 3.2639087018544934, |
| "grad_norm": 0.43854421377182007, |
| "learning_rate": 8.913305811368688e-05, |
| "loss": 0.1806, |
| "step": 1716 |
| }, |
| { |
| "epoch": 3.265810746552544, |
| "grad_norm": 0.2679373621940613, |
| "learning_rate": 8.912670689107654e-05, |
| "loss": 0.1251, |
| "step": 1717 |
| }, |
| { |
| "epoch": 3.2677127912505943, |
| "grad_norm": 0.35840150713920593, |
| "learning_rate": 8.912035566846618e-05, |
| "loss": 0.1276, |
| "step": 1718 |
| }, |
| { |
| "epoch": 3.2696148359486448, |
| "grad_norm": 0.368457168340683, |
| "learning_rate": 8.911400444585583e-05, |
| "loss": 0.1312, |
| "step": 1719 |
| }, |
| { |
| "epoch": 3.271516880646695, |
| "grad_norm": 0.3617841303348541, |
| "learning_rate": 8.910765322324549e-05, |
| "loss": 0.1165, |
| "step": 1720 |
| }, |
| { |
| "epoch": 3.2734189253447457, |
| "grad_norm": 0.34482330083847046, |
| "learning_rate": 8.910130200063512e-05, |
| "loss": 0.1246, |
| "step": 1721 |
| }, |
| { |
| "epoch": 3.275320970042796, |
| "grad_norm": 0.27358710765838623, |
| "learning_rate": 8.909495077802478e-05, |
| "loss": 0.1093, |
| "step": 1722 |
| }, |
| { |
| "epoch": 3.2772230147408465, |
| "grad_norm": 0.40264174342155457, |
| "learning_rate": 8.908859955541441e-05, |
| "loss": 0.146, |
| "step": 1723 |
| }, |
| { |
| "epoch": 3.279125059438897, |
| "grad_norm": 0.45845937728881836, |
| "learning_rate": 8.908224833280407e-05, |
| "loss": 0.1457, |
| "step": 1724 |
| }, |
| { |
| "epoch": 3.281027104136947, |
| "grad_norm": 0.34490594267845154, |
| "learning_rate": 8.907589711019372e-05, |
| "loss": 0.1247, |
| "step": 1725 |
| }, |
| { |
| "epoch": 3.282929148834998, |
| "grad_norm": 0.4256596267223358, |
| "learning_rate": 8.906954588758336e-05, |
| "loss": 0.1563, |
| "step": 1726 |
| }, |
| { |
| "epoch": 3.284831193533048, |
| "grad_norm": 0.3607080280780792, |
| "learning_rate": 8.906319466497302e-05, |
| "loss": 0.1279, |
| "step": 1727 |
| }, |
| { |
| "epoch": 3.2867332382310983, |
| "grad_norm": 0.30969080328941345, |
| "learning_rate": 8.905684344236266e-05, |
| "loss": 0.1238, |
| "step": 1728 |
| }, |
| { |
| "epoch": 3.2886352829291488, |
| "grad_norm": 0.34044647216796875, |
| "learning_rate": 8.90504922197523e-05, |
| "loss": 0.1237, |
| "step": 1729 |
| }, |
| { |
| "epoch": 3.290537327627199, |
| "grad_norm": 0.40037238597869873, |
| "learning_rate": 8.904414099714195e-05, |
| "loss": 0.1509, |
| "step": 1730 |
| }, |
| { |
| "epoch": 3.2924393723252496, |
| "grad_norm": 0.3565572500228882, |
| "learning_rate": 8.90377897745316e-05, |
| "loss": 0.1251, |
| "step": 1731 |
| }, |
| { |
| "epoch": 3.2943414170233, |
| "grad_norm": 0.33730757236480713, |
| "learning_rate": 8.903143855192125e-05, |
| "loss": 0.1527, |
| "step": 1732 |
| }, |
| { |
| "epoch": 3.2962434617213505, |
| "grad_norm": 0.4168394207954407, |
| "learning_rate": 8.902508732931089e-05, |
| "loss": 0.1429, |
| "step": 1733 |
| }, |
| { |
| "epoch": 3.298145506419401, |
| "grad_norm": 0.40814298391342163, |
| "learning_rate": 8.901873610670054e-05, |
| "loss": 0.1588, |
| "step": 1734 |
| }, |
| { |
| "epoch": 3.3000475511174514, |
| "grad_norm": 0.42030104994773865, |
| "learning_rate": 8.90123848840902e-05, |
| "loss": 0.1495, |
| "step": 1735 |
| }, |
| { |
| "epoch": 3.301949595815502, |
| "grad_norm": 0.3305467367172241, |
| "learning_rate": 8.900603366147983e-05, |
| "loss": 0.1239, |
| "step": 1736 |
| }, |
| { |
| "epoch": 3.3038516405135523, |
| "grad_norm": 0.31360068917274475, |
| "learning_rate": 8.899968243886949e-05, |
| "loss": 0.108, |
| "step": 1737 |
| }, |
| { |
| "epoch": 3.3057536852116023, |
| "grad_norm": 0.42463186383247375, |
| "learning_rate": 8.899333121625914e-05, |
| "loss": 0.1451, |
| "step": 1738 |
| }, |
| { |
| "epoch": 3.3076557299096527, |
| "grad_norm": 0.3854060471057892, |
| "learning_rate": 8.898697999364878e-05, |
| "loss": 0.1638, |
| "step": 1739 |
| }, |
| { |
| "epoch": 3.309557774607703, |
| "grad_norm": 0.46821728348731995, |
| "learning_rate": 8.898062877103843e-05, |
| "loss": 0.1718, |
| "step": 1740 |
| }, |
| { |
| "epoch": 3.3114598193057536, |
| "grad_norm": 0.33078089356422424, |
| "learning_rate": 8.897427754842808e-05, |
| "loss": 0.1153, |
| "step": 1741 |
| }, |
| { |
| "epoch": 3.313361864003804, |
| "grad_norm": 0.3746374249458313, |
| "learning_rate": 8.896792632581772e-05, |
| "loss": 0.1387, |
| "step": 1742 |
| }, |
| { |
| "epoch": 3.3152639087018545, |
| "grad_norm": 0.33252257108688354, |
| "learning_rate": 8.896157510320737e-05, |
| "loss": 0.1218, |
| "step": 1743 |
| }, |
| { |
| "epoch": 3.317165953399905, |
| "grad_norm": 0.3421841561794281, |
| "learning_rate": 8.895522388059702e-05, |
| "loss": 0.1376, |
| "step": 1744 |
| }, |
| { |
| "epoch": 3.3190679980979554, |
| "grad_norm": 0.3410481810569763, |
| "learning_rate": 8.894887265798667e-05, |
| "loss": 0.1174, |
| "step": 1745 |
| }, |
| { |
| "epoch": 3.320970042796006, |
| "grad_norm": 0.3556031882762909, |
| "learning_rate": 8.894252143537631e-05, |
| "loss": 0.1612, |
| "step": 1746 |
| }, |
| { |
| "epoch": 3.3228720874940563, |
| "grad_norm": 0.35139304399490356, |
| "learning_rate": 8.893617021276595e-05, |
| "loss": 0.1371, |
| "step": 1747 |
| }, |
| { |
| "epoch": 3.3247741321921067, |
| "grad_norm": 0.38646724820137024, |
| "learning_rate": 8.892981899015562e-05, |
| "loss": 0.1472, |
| "step": 1748 |
| }, |
| { |
| "epoch": 3.3266761768901567, |
| "grad_norm": 0.40337100625038147, |
| "learning_rate": 8.892346776754525e-05, |
| "loss": 0.1938, |
| "step": 1749 |
| }, |
| { |
| "epoch": 3.328578221588207, |
| "grad_norm": 0.2508182227611542, |
| "learning_rate": 8.89171165449349e-05, |
| "loss": 0.0987, |
| "step": 1750 |
| }, |
| { |
| "epoch": 3.3304802662862576, |
| "grad_norm": 0.392284631729126, |
| "learning_rate": 8.891076532232456e-05, |
| "loss": 0.1448, |
| "step": 1751 |
| }, |
| { |
| "epoch": 3.332382310984308, |
| "grad_norm": 0.25311291217803955, |
| "learning_rate": 8.89044140997142e-05, |
| "loss": 0.1227, |
| "step": 1752 |
| }, |
| { |
| "epoch": 3.3342843556823585, |
| "grad_norm": 0.38591787219047546, |
| "learning_rate": 8.889806287710385e-05, |
| "loss": 0.1251, |
| "step": 1753 |
| }, |
| { |
| "epoch": 3.336186400380409, |
| "grad_norm": 0.3149789869785309, |
| "learning_rate": 8.889171165449349e-05, |
| "loss": 0.1282, |
| "step": 1754 |
| }, |
| { |
| "epoch": 3.3380884450784594, |
| "grad_norm": 0.4134093225002289, |
| "learning_rate": 8.888536043188315e-05, |
| "loss": 0.1509, |
| "step": 1755 |
| }, |
| { |
| "epoch": 3.33999048977651, |
| "grad_norm": 0.3769814074039459, |
| "learning_rate": 8.887900920927279e-05, |
| "loss": 0.1283, |
| "step": 1756 |
| }, |
| { |
| "epoch": 3.3418925344745603, |
| "grad_norm": 0.42259126901626587, |
| "learning_rate": 8.887265798666243e-05, |
| "loss": 0.1319, |
| "step": 1757 |
| }, |
| { |
| "epoch": 3.3437945791726107, |
| "grad_norm": 0.4603644609451294, |
| "learning_rate": 8.88663067640521e-05, |
| "loss": 0.1427, |
| "step": 1758 |
| }, |
| { |
| "epoch": 3.345696623870661, |
| "grad_norm": 0.3804812431335449, |
| "learning_rate": 8.885995554144173e-05, |
| "loss": 0.1479, |
| "step": 1759 |
| }, |
| { |
| "epoch": 3.347598668568711, |
| "grad_norm": 0.42290598154067993, |
| "learning_rate": 8.885360431883137e-05, |
| "loss": 0.17, |
| "step": 1760 |
| }, |
| { |
| "epoch": 3.3495007132667616, |
| "grad_norm": 0.3739291727542877, |
| "learning_rate": 8.884725309622102e-05, |
| "loss": 0.1297, |
| "step": 1761 |
| }, |
| { |
| "epoch": 3.351402757964812, |
| "grad_norm": 0.36516469717025757, |
| "learning_rate": 8.884090187361067e-05, |
| "loss": 0.1294, |
| "step": 1762 |
| }, |
| { |
| "epoch": 3.3533048026628625, |
| "grad_norm": 0.32364609837532043, |
| "learning_rate": 8.883455065100033e-05, |
| "loss": 0.1211, |
| "step": 1763 |
| }, |
| { |
| "epoch": 3.355206847360913, |
| "grad_norm": 0.3903793394565582, |
| "learning_rate": 8.882819942838996e-05, |
| "loss": 0.1339, |
| "step": 1764 |
| }, |
| { |
| "epoch": 3.3571088920589633, |
| "grad_norm": 0.3321349322795868, |
| "learning_rate": 8.882184820577962e-05, |
| "loss": 0.1229, |
| "step": 1765 |
| }, |
| { |
| "epoch": 3.359010936757014, |
| "grad_norm": 0.3843282163143158, |
| "learning_rate": 8.881549698316927e-05, |
| "loss": 0.1425, |
| "step": 1766 |
| }, |
| { |
| "epoch": 3.3609129814550642, |
| "grad_norm": 0.34259116649627686, |
| "learning_rate": 8.88091457605589e-05, |
| "loss": 0.1275, |
| "step": 1767 |
| }, |
| { |
| "epoch": 3.3628150261531147, |
| "grad_norm": 0.335219144821167, |
| "learning_rate": 8.880279453794856e-05, |
| "loss": 0.1273, |
| "step": 1768 |
| }, |
| { |
| "epoch": 3.364717070851165, |
| "grad_norm": 0.3495425879955292, |
| "learning_rate": 8.879644331533821e-05, |
| "loss": 0.1112, |
| "step": 1769 |
| }, |
| { |
| "epoch": 3.3666191155492156, |
| "grad_norm": 0.430451899766922, |
| "learning_rate": 8.879009209272785e-05, |
| "loss": 0.1404, |
| "step": 1770 |
| }, |
| { |
| "epoch": 3.368521160247266, |
| "grad_norm": 0.24980789422988892, |
| "learning_rate": 8.87837408701175e-05, |
| "loss": 0.1034, |
| "step": 1771 |
| }, |
| { |
| "epoch": 3.3704232049453164, |
| "grad_norm": 0.4349839687347412, |
| "learning_rate": 8.877738964750715e-05, |
| "loss": 0.1371, |
| "step": 1772 |
| }, |
| { |
| "epoch": 3.3723252496433664, |
| "grad_norm": 0.3427116572856903, |
| "learning_rate": 8.87710384248968e-05, |
| "loss": 0.1224, |
| "step": 1773 |
| }, |
| { |
| "epoch": 3.374227294341417, |
| "grad_norm": 0.3835298418998718, |
| "learning_rate": 8.876468720228644e-05, |
| "loss": 0.1576, |
| "step": 1774 |
| }, |
| { |
| "epoch": 3.3761293390394673, |
| "grad_norm": 0.3284079432487488, |
| "learning_rate": 8.87583359796761e-05, |
| "loss": 0.1039, |
| "step": 1775 |
| }, |
| { |
| "epoch": 3.3780313837375178, |
| "grad_norm": 0.32109662890434265, |
| "learning_rate": 8.875198475706575e-05, |
| "loss": 0.1079, |
| "step": 1776 |
| }, |
| { |
| "epoch": 3.379933428435568, |
| "grad_norm": 0.27259504795074463, |
| "learning_rate": 8.874563353445538e-05, |
| "loss": 0.0983, |
| "step": 1777 |
| }, |
| { |
| "epoch": 3.3818354731336187, |
| "grad_norm": 0.3639247417449951, |
| "learning_rate": 8.873928231184504e-05, |
| "loss": 0.1297, |
| "step": 1778 |
| }, |
| { |
| "epoch": 3.383737517831669, |
| "grad_norm": 0.3729754388332367, |
| "learning_rate": 8.873293108923469e-05, |
| "loss": 0.1419, |
| "step": 1779 |
| }, |
| { |
| "epoch": 3.3856395625297195, |
| "grad_norm": 0.44657668471336365, |
| "learning_rate": 8.872657986662433e-05, |
| "loss": 0.1299, |
| "step": 1780 |
| }, |
| { |
| "epoch": 3.38754160722777, |
| "grad_norm": 0.2924906611442566, |
| "learning_rate": 8.872022864401398e-05, |
| "loss": 0.109, |
| "step": 1781 |
| }, |
| { |
| "epoch": 3.3894436519258204, |
| "grad_norm": 0.3643059730529785, |
| "learning_rate": 8.871387742140363e-05, |
| "loss": 0.1217, |
| "step": 1782 |
| }, |
| { |
| "epoch": 3.391345696623871, |
| "grad_norm": 0.31588301062583923, |
| "learning_rate": 8.870752619879327e-05, |
| "loss": 0.1309, |
| "step": 1783 |
| }, |
| { |
| "epoch": 3.393247741321921, |
| "grad_norm": 0.5099390149116516, |
| "learning_rate": 8.870117497618292e-05, |
| "loss": 0.3371, |
| "step": 1784 |
| }, |
| { |
| "epoch": 3.3951497860199713, |
| "grad_norm": 0.3374120891094208, |
| "learning_rate": 8.869482375357256e-05, |
| "loss": 0.1341, |
| "step": 1785 |
| }, |
| { |
| "epoch": 3.3970518307180217, |
| "grad_norm": 0.36739760637283325, |
| "learning_rate": 8.868847253096222e-05, |
| "loss": 0.135, |
| "step": 1786 |
| }, |
| { |
| "epoch": 3.398953875416072, |
| "grad_norm": 0.36785241961479187, |
| "learning_rate": 8.868212130835186e-05, |
| "loss": 0.1402, |
| "step": 1787 |
| }, |
| { |
| "epoch": 3.4008559201141226, |
| "grad_norm": 0.3834420442581177, |
| "learning_rate": 8.86757700857415e-05, |
| "loss": 0.132, |
| "step": 1788 |
| }, |
| { |
| "epoch": 3.402757964812173, |
| "grad_norm": 0.40532076358795166, |
| "learning_rate": 8.866941886313117e-05, |
| "loss": 0.1491, |
| "step": 1789 |
| }, |
| { |
| "epoch": 3.4046600095102235, |
| "grad_norm": 0.3840698003768921, |
| "learning_rate": 8.86630676405208e-05, |
| "loss": 0.1238, |
| "step": 1790 |
| }, |
| { |
| "epoch": 3.406562054208274, |
| "grad_norm": 0.3948921859264374, |
| "learning_rate": 8.865671641791046e-05, |
| "loss": 0.1452, |
| "step": 1791 |
| }, |
| { |
| "epoch": 3.4084640989063244, |
| "grad_norm": 0.30841973423957825, |
| "learning_rate": 8.86503651953001e-05, |
| "loss": 0.1152, |
| "step": 1792 |
| }, |
| { |
| "epoch": 3.410366143604375, |
| "grad_norm": 0.3028883635997772, |
| "learning_rate": 8.864401397268975e-05, |
| "loss": 0.103, |
| "step": 1793 |
| }, |
| { |
| "epoch": 3.4122681883024253, |
| "grad_norm": 0.3348149359226227, |
| "learning_rate": 8.86376627500794e-05, |
| "loss": 0.124, |
| "step": 1794 |
| }, |
| { |
| "epoch": 3.4141702330004753, |
| "grad_norm": 0.397709459066391, |
| "learning_rate": 8.863131152746904e-05, |
| "loss": 0.1489, |
| "step": 1795 |
| }, |
| { |
| "epoch": 3.4160722776985257, |
| "grad_norm": 0.33986514806747437, |
| "learning_rate": 8.862496030485869e-05, |
| "loss": 0.1243, |
| "step": 1796 |
| }, |
| { |
| "epoch": 3.417974322396576, |
| "grad_norm": 0.3443019688129425, |
| "learning_rate": 8.861860908224834e-05, |
| "loss": 0.1206, |
| "step": 1797 |
| }, |
| { |
| "epoch": 3.4198763670946266, |
| "grad_norm": 0.2696784734725952, |
| "learning_rate": 8.861225785963798e-05, |
| "loss": 0.0978, |
| "step": 1798 |
| }, |
| { |
| "epoch": 3.421778411792677, |
| "grad_norm": 0.3711314797401428, |
| "learning_rate": 8.860590663702763e-05, |
| "loss": 0.1416, |
| "step": 1799 |
| }, |
| { |
| "epoch": 3.4236804564907275, |
| "grad_norm": 0.4727902114391327, |
| "learning_rate": 8.859955541441728e-05, |
| "loss": 0.1749, |
| "step": 1800 |
| }, |
| { |
| "epoch": 3.425582501188778, |
| "grad_norm": 0.39370161294937134, |
| "learning_rate": 8.859320419180692e-05, |
| "loss": 0.1516, |
| "step": 1801 |
| }, |
| { |
| "epoch": 3.4274845458868284, |
| "grad_norm": 0.36975982785224915, |
| "learning_rate": 8.858685296919657e-05, |
| "loss": 0.1185, |
| "step": 1802 |
| }, |
| { |
| "epoch": 3.429386590584879, |
| "grad_norm": 0.30827558040618896, |
| "learning_rate": 8.858050174658622e-05, |
| "loss": 0.1292, |
| "step": 1803 |
| }, |
| { |
| "epoch": 3.4312886352829293, |
| "grad_norm": 0.3955543339252472, |
| "learning_rate": 8.857415052397588e-05, |
| "loss": 0.1484, |
| "step": 1804 |
| }, |
| { |
| "epoch": 3.4331906799809797, |
| "grad_norm": 0.35280320048332214, |
| "learning_rate": 8.856779930136551e-05, |
| "loss": 0.1241, |
| "step": 1805 |
| }, |
| { |
| "epoch": 3.4350927246790297, |
| "grad_norm": 0.4241807460784912, |
| "learning_rate": 8.856144807875517e-05, |
| "loss": 0.1663, |
| "step": 1806 |
| }, |
| { |
| "epoch": 3.4369947693770806, |
| "grad_norm": 0.41491755843162537, |
| "learning_rate": 8.855509685614482e-05, |
| "loss": 0.1465, |
| "step": 1807 |
| }, |
| { |
| "epoch": 3.4388968140751306, |
| "grad_norm": 0.3022492229938507, |
| "learning_rate": 8.854874563353446e-05, |
| "loss": 0.1132, |
| "step": 1808 |
| }, |
| { |
| "epoch": 3.440798858773181, |
| "grad_norm": 0.3701956570148468, |
| "learning_rate": 8.854239441092411e-05, |
| "loss": 0.1525, |
| "step": 1809 |
| }, |
| { |
| "epoch": 3.4427009034712315, |
| "grad_norm": 0.3692464232444763, |
| "learning_rate": 8.853604318831376e-05, |
| "loss": 0.1364, |
| "step": 1810 |
| }, |
| { |
| "epoch": 3.444602948169282, |
| "grad_norm": 0.2783905267715454, |
| "learning_rate": 8.85296919657034e-05, |
| "loss": 0.1112, |
| "step": 1811 |
| }, |
| { |
| "epoch": 3.4465049928673324, |
| "grad_norm": 0.26422539353370667, |
| "learning_rate": 8.852334074309305e-05, |
| "loss": 0.0871, |
| "step": 1812 |
| }, |
| { |
| "epoch": 3.448407037565383, |
| "grad_norm": 0.3428441882133484, |
| "learning_rate": 8.85169895204827e-05, |
| "loss": 0.1397, |
| "step": 1813 |
| }, |
| { |
| "epoch": 3.4503090822634332, |
| "grad_norm": 0.43042463064193726, |
| "learning_rate": 8.851063829787234e-05, |
| "loss": 0.1524, |
| "step": 1814 |
| }, |
| { |
| "epoch": 3.4522111269614837, |
| "grad_norm": 0.4124317765235901, |
| "learning_rate": 8.850428707526199e-05, |
| "loss": 0.165, |
| "step": 1815 |
| }, |
| { |
| "epoch": 3.454113171659534, |
| "grad_norm": 0.38967373967170715, |
| "learning_rate": 8.849793585265164e-05, |
| "loss": 0.129, |
| "step": 1816 |
| }, |
| { |
| "epoch": 3.4560152163575846, |
| "grad_norm": 0.3426058292388916, |
| "learning_rate": 8.84915846300413e-05, |
| "loss": 0.1229, |
| "step": 1817 |
| }, |
| { |
| "epoch": 3.457917261055635, |
| "grad_norm": 0.4571113884449005, |
| "learning_rate": 8.848523340743093e-05, |
| "loss": 0.1428, |
| "step": 1818 |
| }, |
| { |
| "epoch": 3.459819305753685, |
| "grad_norm": 0.43344834446907043, |
| "learning_rate": 8.847888218482057e-05, |
| "loss": 0.1561, |
| "step": 1819 |
| }, |
| { |
| "epoch": 3.4617213504517355, |
| "grad_norm": 0.36749354004859924, |
| "learning_rate": 8.847253096221024e-05, |
| "loss": 0.1313, |
| "step": 1820 |
| }, |
| { |
| "epoch": 3.463623395149786, |
| "grad_norm": 0.36647292971611023, |
| "learning_rate": 8.846617973959988e-05, |
| "loss": 0.1278, |
| "step": 1821 |
| }, |
| { |
| "epoch": 3.4655254398478363, |
| "grad_norm": 0.3204960525035858, |
| "learning_rate": 8.845982851698953e-05, |
| "loss": 0.11, |
| "step": 1822 |
| }, |
| { |
| "epoch": 3.467427484545887, |
| "grad_norm": 0.366187185049057, |
| "learning_rate": 8.845347729437918e-05, |
| "loss": 0.1443, |
| "step": 1823 |
| }, |
| { |
| "epoch": 3.4693295292439372, |
| "grad_norm": 0.4711836874485016, |
| "learning_rate": 8.844712607176882e-05, |
| "loss": 0.151, |
| "step": 1824 |
| }, |
| { |
| "epoch": 3.4712315739419877, |
| "grad_norm": 0.35596373677253723, |
| "learning_rate": 8.844077484915847e-05, |
| "loss": 0.1246, |
| "step": 1825 |
| }, |
| { |
| "epoch": 3.473133618640038, |
| "grad_norm": 0.41798681020736694, |
| "learning_rate": 8.843442362654811e-05, |
| "loss": 0.1575, |
| "step": 1826 |
| }, |
| { |
| "epoch": 3.4750356633380886, |
| "grad_norm": 0.3631289303302765, |
| "learning_rate": 8.842807240393777e-05, |
| "loss": 0.1105, |
| "step": 1827 |
| }, |
| { |
| "epoch": 3.476937708036139, |
| "grad_norm": 0.36891433596611023, |
| "learning_rate": 8.842172118132741e-05, |
| "loss": 0.146, |
| "step": 1828 |
| }, |
| { |
| "epoch": 3.4788397527341894, |
| "grad_norm": 0.33271533250808716, |
| "learning_rate": 8.841536995871705e-05, |
| "loss": 0.1246, |
| "step": 1829 |
| }, |
| { |
| "epoch": 3.4807417974322394, |
| "grad_norm": 0.2956920266151428, |
| "learning_rate": 8.840901873610671e-05, |
| "loss": 0.1181, |
| "step": 1830 |
| }, |
| { |
| "epoch": 3.48264384213029, |
| "grad_norm": 0.3685608506202698, |
| "learning_rate": 8.840266751349635e-05, |
| "loss": 0.1338, |
| "step": 1831 |
| }, |
| { |
| "epoch": 3.4845458868283403, |
| "grad_norm": 0.35031598806381226, |
| "learning_rate": 8.839631629088599e-05, |
| "loss": 0.1166, |
| "step": 1832 |
| }, |
| { |
| "epoch": 3.4864479315263908, |
| "grad_norm": 0.5173628330230713, |
| "learning_rate": 8.838996506827564e-05, |
| "loss": 0.157, |
| "step": 1833 |
| }, |
| { |
| "epoch": 3.488349976224441, |
| "grad_norm": 0.4643428921699524, |
| "learning_rate": 8.83836138456653e-05, |
| "loss": 0.1842, |
| "step": 1834 |
| }, |
| { |
| "epoch": 3.4902520209224916, |
| "grad_norm": 0.3688521981239319, |
| "learning_rate": 8.837726262305495e-05, |
| "loss": 0.1375, |
| "step": 1835 |
| }, |
| { |
| "epoch": 3.492154065620542, |
| "grad_norm": 0.3947365880012512, |
| "learning_rate": 8.837091140044458e-05, |
| "loss": 0.149, |
| "step": 1836 |
| }, |
| { |
| "epoch": 3.4940561103185925, |
| "grad_norm": 0.35394486784935, |
| "learning_rate": 8.836456017783424e-05, |
| "loss": 0.1252, |
| "step": 1837 |
| }, |
| { |
| "epoch": 3.495958155016643, |
| "grad_norm": 0.37168943881988525, |
| "learning_rate": 8.835820895522389e-05, |
| "loss": 0.1318, |
| "step": 1838 |
| }, |
| { |
| "epoch": 3.4978601997146934, |
| "grad_norm": 0.37239521741867065, |
| "learning_rate": 8.835185773261353e-05, |
| "loss": 0.1214, |
| "step": 1839 |
| }, |
| { |
| "epoch": 3.499762244412744, |
| "grad_norm": 0.36515411734580994, |
| "learning_rate": 8.834550651000318e-05, |
| "loss": 0.1412, |
| "step": 1840 |
| }, |
| { |
| "epoch": 3.501664289110794, |
| "grad_norm": 0.38534054160118103, |
| "learning_rate": 8.833915528739283e-05, |
| "loss": 0.1334, |
| "step": 1841 |
| }, |
| { |
| "epoch": 3.5035663338088447, |
| "grad_norm": 0.36949092149734497, |
| "learning_rate": 8.833280406478247e-05, |
| "loss": 0.1283, |
| "step": 1842 |
| }, |
| { |
| "epoch": 3.5054683785068947, |
| "grad_norm": 0.39546898007392883, |
| "learning_rate": 8.832645284217212e-05, |
| "loss": 0.1471, |
| "step": 1843 |
| }, |
| { |
| "epoch": 3.507370423204945, |
| "grad_norm": 0.34906435012817383, |
| "learning_rate": 8.832010161956177e-05, |
| "loss": 0.1386, |
| "step": 1844 |
| }, |
| { |
| "epoch": 3.5092724679029956, |
| "grad_norm": 0.44590094685554504, |
| "learning_rate": 8.831375039695142e-05, |
| "loss": 0.157, |
| "step": 1845 |
| }, |
| { |
| "epoch": 3.511174512601046, |
| "grad_norm": 0.3336107134819031, |
| "learning_rate": 8.830739917434106e-05, |
| "loss": 0.1435, |
| "step": 1846 |
| }, |
| { |
| "epoch": 3.5130765572990965, |
| "grad_norm": 0.4013485610485077, |
| "learning_rate": 8.830104795173071e-05, |
| "loss": 0.1209, |
| "step": 1847 |
| }, |
| { |
| "epoch": 3.514978601997147, |
| "grad_norm": 0.30285441875457764, |
| "learning_rate": 8.829469672912037e-05, |
| "loss": 0.108, |
| "step": 1848 |
| }, |
| { |
| "epoch": 3.5168806466951974, |
| "grad_norm": 0.440489798784256, |
| "learning_rate": 8.828834550651e-05, |
| "loss": 0.1514, |
| "step": 1849 |
| }, |
| { |
| "epoch": 3.518782691393248, |
| "grad_norm": 0.26309430599212646, |
| "learning_rate": 8.828199428389964e-05, |
| "loss": 0.0953, |
| "step": 1850 |
| }, |
| { |
| "epoch": 3.5206847360912983, |
| "grad_norm": 0.548433244228363, |
| "learning_rate": 8.827564306128931e-05, |
| "loss": 0.1977, |
| "step": 1851 |
| }, |
| { |
| "epoch": 3.5225867807893483, |
| "grad_norm": 0.4941021203994751, |
| "learning_rate": 8.826929183867895e-05, |
| "loss": 0.1268, |
| "step": 1852 |
| }, |
| { |
| "epoch": 3.524488825487399, |
| "grad_norm": 0.3945002555847168, |
| "learning_rate": 8.82629406160686e-05, |
| "loss": 0.1304, |
| "step": 1853 |
| }, |
| { |
| "epoch": 3.526390870185449, |
| "grad_norm": 0.3647942841053009, |
| "learning_rate": 8.825658939345825e-05, |
| "loss": 0.1454, |
| "step": 1854 |
| }, |
| { |
| "epoch": 3.5282929148834996, |
| "grad_norm": 0.3890063762664795, |
| "learning_rate": 8.825023817084789e-05, |
| "loss": 0.1384, |
| "step": 1855 |
| }, |
| { |
| "epoch": 3.53019495958155, |
| "grad_norm": 0.4001372456550598, |
| "learning_rate": 8.824388694823754e-05, |
| "loss": 0.1429, |
| "step": 1856 |
| }, |
| { |
| "epoch": 3.5320970042796005, |
| "grad_norm": 0.407721608877182, |
| "learning_rate": 8.823753572562718e-05, |
| "loss": 0.1374, |
| "step": 1857 |
| }, |
| { |
| "epoch": 3.533999048977651, |
| "grad_norm": 0.37832140922546387, |
| "learning_rate": 8.823118450301684e-05, |
| "loss": 0.1236, |
| "step": 1858 |
| }, |
| { |
| "epoch": 3.5359010936757014, |
| "grad_norm": 0.35406047105789185, |
| "learning_rate": 8.822483328040648e-05, |
| "loss": 0.1306, |
| "step": 1859 |
| }, |
| { |
| "epoch": 3.537803138373752, |
| "grad_norm": 0.2923578917980194, |
| "learning_rate": 8.821848205779612e-05, |
| "loss": 0.0986, |
| "step": 1860 |
| }, |
| { |
| "epoch": 3.5397051830718023, |
| "grad_norm": 0.3824620544910431, |
| "learning_rate": 8.821213083518579e-05, |
| "loss": 0.1492, |
| "step": 1861 |
| }, |
| { |
| "epoch": 3.5416072277698527, |
| "grad_norm": 0.38851413130760193, |
| "learning_rate": 8.820577961257542e-05, |
| "loss": 0.1612, |
| "step": 1862 |
| }, |
| { |
| "epoch": 3.543509272467903, |
| "grad_norm": 0.3961692154407501, |
| "learning_rate": 8.819942838996508e-05, |
| "loss": 0.1525, |
| "step": 1863 |
| }, |
| { |
| "epoch": 3.5454113171659536, |
| "grad_norm": 0.423235684633255, |
| "learning_rate": 8.819307716735471e-05, |
| "loss": 0.1514, |
| "step": 1864 |
| }, |
| { |
| "epoch": 3.5473133618640036, |
| "grad_norm": 0.3355453610420227, |
| "learning_rate": 8.818672594474437e-05, |
| "loss": 0.1183, |
| "step": 1865 |
| }, |
| { |
| "epoch": 3.5492154065620545, |
| "grad_norm": 0.44291865825653076, |
| "learning_rate": 8.818037472213402e-05, |
| "loss": 0.1457, |
| "step": 1866 |
| }, |
| { |
| "epoch": 3.5511174512601045, |
| "grad_norm": 0.39356529712677, |
| "learning_rate": 8.817402349952366e-05, |
| "loss": 0.146, |
| "step": 1867 |
| }, |
| { |
| "epoch": 3.553019495958155, |
| "grad_norm": 0.28863412141799927, |
| "learning_rate": 8.816767227691331e-05, |
| "loss": 0.1113, |
| "step": 1868 |
| }, |
| { |
| "epoch": 3.5549215406562054, |
| "grad_norm": 0.3859669268131256, |
| "learning_rate": 8.816132105430296e-05, |
| "loss": 0.1234, |
| "step": 1869 |
| }, |
| { |
| "epoch": 3.556823585354256, |
| "grad_norm": 0.3483799993991852, |
| "learning_rate": 8.81549698316926e-05, |
| "loss": 0.1324, |
| "step": 1870 |
| }, |
| { |
| "epoch": 3.5587256300523062, |
| "grad_norm": 0.3053433299064636, |
| "learning_rate": 8.814861860908225e-05, |
| "loss": 0.1252, |
| "step": 1871 |
| }, |
| { |
| "epoch": 3.5606276747503567, |
| "grad_norm": 0.44125038385391235, |
| "learning_rate": 8.81422673864719e-05, |
| "loss": 0.1627, |
| "step": 1872 |
| }, |
| { |
| "epoch": 3.562529719448407, |
| "grad_norm": 0.35409316420555115, |
| "learning_rate": 8.813591616386154e-05, |
| "loss": 0.1312, |
| "step": 1873 |
| }, |
| { |
| "epoch": 3.5644317641464576, |
| "grad_norm": 0.4219510853290558, |
| "learning_rate": 8.812956494125119e-05, |
| "loss": 0.1522, |
| "step": 1874 |
| }, |
| { |
| "epoch": 3.566333808844508, |
| "grad_norm": 0.4153057932853699, |
| "learning_rate": 8.812321371864084e-05, |
| "loss": 0.1272, |
| "step": 1875 |
| }, |
| { |
| "epoch": 3.568235853542558, |
| "grad_norm": 0.3225264549255371, |
| "learning_rate": 8.81168624960305e-05, |
| "loss": 0.1461, |
| "step": 1876 |
| }, |
| { |
| "epoch": 3.570137898240609, |
| "grad_norm": 0.41065141558647156, |
| "learning_rate": 8.811051127342013e-05, |
| "loss": 0.1466, |
| "step": 1877 |
| }, |
| { |
| "epoch": 3.572039942938659, |
| "grad_norm": 0.33854374289512634, |
| "learning_rate": 8.810416005080979e-05, |
| "loss": 0.2636, |
| "step": 1878 |
| }, |
| { |
| "epoch": 3.5739419876367093, |
| "grad_norm": 0.4266054034233093, |
| "learning_rate": 8.809780882819944e-05, |
| "loss": 0.1546, |
| "step": 1879 |
| }, |
| { |
| "epoch": 3.57584403233476, |
| "grad_norm": 0.32462188601493835, |
| "learning_rate": 8.809145760558908e-05, |
| "loss": 0.0992, |
| "step": 1880 |
| }, |
| { |
| "epoch": 3.5777460770328102, |
| "grad_norm": 0.3243044912815094, |
| "learning_rate": 8.808510638297873e-05, |
| "loss": 0.127, |
| "step": 1881 |
| }, |
| { |
| "epoch": 3.5796481217308607, |
| "grad_norm": 0.36742255091667175, |
| "learning_rate": 8.807875516036838e-05, |
| "loss": 0.1648, |
| "step": 1882 |
| }, |
| { |
| "epoch": 3.581550166428911, |
| "grad_norm": 0.47478726506233215, |
| "learning_rate": 8.807240393775802e-05, |
| "loss": 0.1402, |
| "step": 1883 |
| }, |
| { |
| "epoch": 3.5834522111269616, |
| "grad_norm": 0.29675087332725525, |
| "learning_rate": 8.806605271514767e-05, |
| "loss": 0.1102, |
| "step": 1884 |
| }, |
| { |
| "epoch": 3.585354255825012, |
| "grad_norm": 0.26269370317459106, |
| "learning_rate": 8.805970149253732e-05, |
| "loss": 0.0926, |
| "step": 1885 |
| }, |
| { |
| "epoch": 3.5872563005230624, |
| "grad_norm": 0.42690059542655945, |
| "learning_rate": 8.805335026992696e-05, |
| "loss": 0.1663, |
| "step": 1886 |
| }, |
| { |
| "epoch": 3.5891583452211124, |
| "grad_norm": 0.4843170940876007, |
| "learning_rate": 8.804699904731661e-05, |
| "loss": 0.156, |
| "step": 1887 |
| }, |
| { |
| "epoch": 3.5910603899191633, |
| "grad_norm": 0.4166446030139923, |
| "learning_rate": 8.804064782470626e-05, |
| "loss": 0.1556, |
| "step": 1888 |
| }, |
| { |
| "epoch": 3.5929624346172133, |
| "grad_norm": 0.3265363872051239, |
| "learning_rate": 8.803429660209592e-05, |
| "loss": 0.122, |
| "step": 1889 |
| }, |
| { |
| "epoch": 3.5948644793152638, |
| "grad_norm": 0.4674152433872223, |
| "learning_rate": 8.802794537948555e-05, |
| "loss": 0.1706, |
| "step": 1890 |
| }, |
| { |
| "epoch": 3.596766524013314, |
| "grad_norm": 0.4072030782699585, |
| "learning_rate": 8.802159415687519e-05, |
| "loss": 0.1465, |
| "step": 1891 |
| }, |
| { |
| "epoch": 3.5986685687113646, |
| "grad_norm": 0.4924727976322174, |
| "learning_rate": 8.801524293426486e-05, |
| "loss": 0.153, |
| "step": 1892 |
| }, |
| { |
| "epoch": 3.600570613409415, |
| "grad_norm": 0.34262821078300476, |
| "learning_rate": 8.80088917116545e-05, |
| "loss": 0.1221, |
| "step": 1893 |
| }, |
| { |
| "epoch": 3.6024726581074655, |
| "grad_norm": 0.3641190528869629, |
| "learning_rate": 8.800254048904415e-05, |
| "loss": 0.1146, |
| "step": 1894 |
| }, |
| { |
| "epoch": 3.604374702805516, |
| "grad_norm": 0.3594358265399933, |
| "learning_rate": 8.799618926643379e-05, |
| "loss": 0.1198, |
| "step": 1895 |
| }, |
| { |
| "epoch": 3.6062767475035664, |
| "grad_norm": 0.40045297145843506, |
| "learning_rate": 8.798983804382344e-05, |
| "loss": 0.2122, |
| "step": 1896 |
| }, |
| { |
| "epoch": 3.608178792201617, |
| "grad_norm": 0.40417537093162537, |
| "learning_rate": 8.798348682121309e-05, |
| "loss": 0.1523, |
| "step": 1897 |
| }, |
| { |
| "epoch": 3.6100808368996673, |
| "grad_norm": 0.3493559658527374, |
| "learning_rate": 8.797713559860273e-05, |
| "loss": 0.1105, |
| "step": 1898 |
| }, |
| { |
| "epoch": 3.6119828815977177, |
| "grad_norm": 0.3540056645870209, |
| "learning_rate": 8.79707843759924e-05, |
| "loss": 0.1205, |
| "step": 1899 |
| }, |
| { |
| "epoch": 3.6138849262957677, |
| "grad_norm": 0.4836410582065582, |
| "learning_rate": 8.796443315338203e-05, |
| "loss": 0.184, |
| "step": 1900 |
| }, |
| { |
| "epoch": 3.6157869709938186, |
| "grad_norm": 0.34036317467689514, |
| "learning_rate": 8.795808193077167e-05, |
| "loss": 0.1313, |
| "step": 1901 |
| }, |
| { |
| "epoch": 3.6176890156918686, |
| "grad_norm": 0.34924453496932983, |
| "learning_rate": 8.795173070816132e-05, |
| "loss": 0.1018, |
| "step": 1902 |
| }, |
| { |
| "epoch": 3.619591060389919, |
| "grad_norm": 0.4308503270149231, |
| "learning_rate": 8.794537948555097e-05, |
| "loss": 0.1396, |
| "step": 1903 |
| }, |
| { |
| "epoch": 3.6214931050879695, |
| "grad_norm": 0.44268596172332764, |
| "learning_rate": 8.793902826294061e-05, |
| "loss": 0.1377, |
| "step": 1904 |
| }, |
| { |
| "epoch": 3.62339514978602, |
| "grad_norm": 0.36984702944755554, |
| "learning_rate": 8.793267704033026e-05, |
| "loss": 0.1343, |
| "step": 1905 |
| }, |
| { |
| "epoch": 3.6252971944840704, |
| "grad_norm": 0.3913877606391907, |
| "learning_rate": 8.792632581771992e-05, |
| "loss": 0.1443, |
| "step": 1906 |
| }, |
| { |
| "epoch": 3.627199239182121, |
| "grad_norm": 0.4213595986366272, |
| "learning_rate": 8.791997459510957e-05, |
| "loss": 0.1537, |
| "step": 1907 |
| }, |
| { |
| "epoch": 3.6291012838801713, |
| "grad_norm": 0.4095703959465027, |
| "learning_rate": 8.79136233724992e-05, |
| "loss": 0.151, |
| "step": 1908 |
| }, |
| { |
| "epoch": 3.6310033285782217, |
| "grad_norm": 0.366328626871109, |
| "learning_rate": 8.790727214988886e-05, |
| "loss": 0.1198, |
| "step": 1909 |
| }, |
| { |
| "epoch": 3.632905373276272, |
| "grad_norm": 0.4124557375907898, |
| "learning_rate": 8.790092092727851e-05, |
| "loss": 0.1408, |
| "step": 1910 |
| }, |
| { |
| "epoch": 3.634807417974322, |
| "grad_norm": 0.36249884963035583, |
| "learning_rate": 8.789456970466815e-05, |
| "loss": 0.2058, |
| "step": 1911 |
| }, |
| { |
| "epoch": 3.636709462672373, |
| "grad_norm": 0.40580618381500244, |
| "learning_rate": 8.78882184820578e-05, |
| "loss": 0.1247, |
| "step": 1912 |
| }, |
| { |
| "epoch": 3.638611507370423, |
| "grad_norm": 0.30640462040901184, |
| "learning_rate": 8.788186725944745e-05, |
| "loss": 0.1078, |
| "step": 1913 |
| }, |
| { |
| "epoch": 3.6405135520684735, |
| "grad_norm": 0.4200808107852936, |
| "learning_rate": 8.787551603683709e-05, |
| "loss": 0.1572, |
| "step": 1914 |
| }, |
| { |
| "epoch": 3.642415596766524, |
| "grad_norm": 0.43338900804519653, |
| "learning_rate": 8.786916481422674e-05, |
| "loss": 0.1606, |
| "step": 1915 |
| }, |
| { |
| "epoch": 3.6443176414645744, |
| "grad_norm": 0.4340536296367645, |
| "learning_rate": 8.78628135916164e-05, |
| "loss": 0.1711, |
| "step": 1916 |
| }, |
| { |
| "epoch": 3.646219686162625, |
| "grad_norm": 0.3239591419696808, |
| "learning_rate": 8.785646236900605e-05, |
| "loss": 0.1166, |
| "step": 1917 |
| }, |
| { |
| "epoch": 3.6481217308606753, |
| "grad_norm": 0.3957262933254242, |
| "learning_rate": 8.785011114639568e-05, |
| "loss": 0.1605, |
| "step": 1918 |
| }, |
| { |
| "epoch": 3.6500237755587257, |
| "grad_norm": 0.4386723041534424, |
| "learning_rate": 8.784375992378534e-05, |
| "loss": 0.1595, |
| "step": 1919 |
| }, |
| { |
| "epoch": 3.651925820256776, |
| "grad_norm": 0.376113623380661, |
| "learning_rate": 8.783740870117499e-05, |
| "loss": 0.1708, |
| "step": 1920 |
| }, |
| { |
| "epoch": 3.6538278649548266, |
| "grad_norm": 0.2861535847187042, |
| "learning_rate": 8.783105747856463e-05, |
| "loss": 0.1134, |
| "step": 1921 |
| }, |
| { |
| "epoch": 3.6557299096528766, |
| "grad_norm": 0.3381497263908386, |
| "learning_rate": 8.782470625595426e-05, |
| "loss": 0.1522, |
| "step": 1922 |
| }, |
| { |
| "epoch": 3.6576319543509275, |
| "grad_norm": 0.2682400047779083, |
| "learning_rate": 8.781835503334393e-05, |
| "loss": 0.1007, |
| "step": 1923 |
| }, |
| { |
| "epoch": 3.6595339990489775, |
| "grad_norm": 0.4277699887752533, |
| "learning_rate": 8.781200381073357e-05, |
| "loss": 0.1757, |
| "step": 1924 |
| }, |
| { |
| "epoch": 3.661436043747028, |
| "grad_norm": 0.3176470696926117, |
| "learning_rate": 8.780565258812322e-05, |
| "loss": 0.1186, |
| "step": 1925 |
| }, |
| { |
| "epoch": 3.6633380884450784, |
| "grad_norm": 0.32315725088119507, |
| "learning_rate": 8.779930136551287e-05, |
| "loss": 0.1353, |
| "step": 1926 |
| }, |
| { |
| "epoch": 3.665240133143129, |
| "grad_norm": 0.44492077827453613, |
| "learning_rate": 8.779295014290251e-05, |
| "loss": 0.1689, |
| "step": 1927 |
| }, |
| { |
| "epoch": 3.6671421778411792, |
| "grad_norm": 0.33450883626937866, |
| "learning_rate": 8.778659892029216e-05, |
| "loss": 0.1171, |
| "step": 1928 |
| }, |
| { |
| "epoch": 3.6690442225392297, |
| "grad_norm": 0.45678386092185974, |
| "learning_rate": 8.77802476976818e-05, |
| "loss": 0.1547, |
| "step": 1929 |
| }, |
| { |
| "epoch": 3.67094626723728, |
| "grad_norm": 0.3756123185157776, |
| "learning_rate": 8.777389647507147e-05, |
| "loss": 0.1441, |
| "step": 1930 |
| }, |
| { |
| "epoch": 3.6728483119353306, |
| "grad_norm": 0.30440792441368103, |
| "learning_rate": 8.77675452524611e-05, |
| "loss": 0.1034, |
| "step": 1931 |
| }, |
| { |
| "epoch": 3.674750356633381, |
| "grad_norm": 0.38540956377983093, |
| "learning_rate": 8.776119402985074e-05, |
| "loss": 0.1456, |
| "step": 1932 |
| }, |
| { |
| "epoch": 3.6766524013314315, |
| "grad_norm": 0.42409566044807434, |
| "learning_rate": 8.775484280724041e-05, |
| "loss": 0.1445, |
| "step": 1933 |
| }, |
| { |
| "epoch": 3.678554446029482, |
| "grad_norm": 0.3903610408306122, |
| "learning_rate": 8.774849158463005e-05, |
| "loss": 0.1428, |
| "step": 1934 |
| }, |
| { |
| "epoch": 3.680456490727532, |
| "grad_norm": 0.4002249836921692, |
| "learning_rate": 8.77421403620197e-05, |
| "loss": 0.1328, |
| "step": 1935 |
| }, |
| { |
| "epoch": 3.6823585354255823, |
| "grad_norm": 0.37625521421432495, |
| "learning_rate": 8.773578913940934e-05, |
| "loss": 0.1271, |
| "step": 1936 |
| }, |
| { |
| "epoch": 3.6842605801236328, |
| "grad_norm": 0.333882600069046, |
| "learning_rate": 8.772943791679899e-05, |
| "loss": 0.1209, |
| "step": 1937 |
| }, |
| { |
| "epoch": 3.686162624821683, |
| "grad_norm": 0.3934018313884735, |
| "learning_rate": 8.772308669418864e-05, |
| "loss": 0.1383, |
| "step": 1938 |
| }, |
| { |
| "epoch": 3.6880646695197337, |
| "grad_norm": 0.3329316973686218, |
| "learning_rate": 8.771673547157828e-05, |
| "loss": 0.1334, |
| "step": 1939 |
| }, |
| { |
| "epoch": 3.689966714217784, |
| "grad_norm": 0.3686552047729492, |
| "learning_rate": 8.771038424896793e-05, |
| "loss": 0.1163, |
| "step": 1940 |
| }, |
| { |
| "epoch": 3.6918687589158345, |
| "grad_norm": 0.35531577467918396, |
| "learning_rate": 8.770403302635758e-05, |
| "loss": 0.114, |
| "step": 1941 |
| }, |
| { |
| "epoch": 3.693770803613885, |
| "grad_norm": 0.4164102375507355, |
| "learning_rate": 8.769768180374722e-05, |
| "loss": 0.1271, |
| "step": 1942 |
| }, |
| { |
| "epoch": 3.6956728483119354, |
| "grad_norm": 0.4182850420475006, |
| "learning_rate": 8.769133058113687e-05, |
| "loss": 0.1343, |
| "step": 1943 |
| }, |
| { |
| "epoch": 3.697574893009986, |
| "grad_norm": 0.3373199701309204, |
| "learning_rate": 8.768497935852652e-05, |
| "loss": 0.1424, |
| "step": 1944 |
| }, |
| { |
| "epoch": 3.6994769377080363, |
| "grad_norm": 0.44398215413093567, |
| "learning_rate": 8.767862813591616e-05, |
| "loss": 0.1626, |
| "step": 1945 |
| }, |
| { |
| "epoch": 3.7013789824060863, |
| "grad_norm": 0.2877051830291748, |
| "learning_rate": 8.767227691330581e-05, |
| "loss": 0.0941, |
| "step": 1946 |
| }, |
| { |
| "epoch": 3.703281027104137, |
| "grad_norm": 0.30384746193885803, |
| "learning_rate": 8.766592569069547e-05, |
| "loss": 0.1239, |
| "step": 1947 |
| }, |
| { |
| "epoch": 3.705183071802187, |
| "grad_norm": 0.41360363364219666, |
| "learning_rate": 8.765957446808512e-05, |
| "loss": 0.1567, |
| "step": 1948 |
| }, |
| { |
| "epoch": 3.7070851165002376, |
| "grad_norm": 0.28865674138069153, |
| "learning_rate": 8.765322324547476e-05, |
| "loss": 0.1165, |
| "step": 1949 |
| }, |
| { |
| "epoch": 3.708987161198288, |
| "grad_norm": 0.341654509305954, |
| "learning_rate": 8.764687202286441e-05, |
| "loss": 0.1199, |
| "step": 1950 |
| }, |
| { |
| "epoch": 3.7108892058963385, |
| "grad_norm": 0.33211663365364075, |
| "learning_rate": 8.764052080025406e-05, |
| "loss": 0.1386, |
| "step": 1951 |
| }, |
| { |
| "epoch": 3.712791250594389, |
| "grad_norm": 0.37999534606933594, |
| "learning_rate": 8.76341695776437e-05, |
| "loss": 0.1411, |
| "step": 1952 |
| }, |
| { |
| "epoch": 3.7146932952924394, |
| "grad_norm": 0.3158533573150635, |
| "learning_rate": 8.762781835503335e-05, |
| "loss": 0.1082, |
| "step": 1953 |
| }, |
| { |
| "epoch": 3.71659533999049, |
| "grad_norm": 0.42071765661239624, |
| "learning_rate": 8.7621467132423e-05, |
| "loss": 0.2395, |
| "step": 1954 |
| }, |
| { |
| "epoch": 3.7184973846885403, |
| "grad_norm": 0.3723015785217285, |
| "learning_rate": 8.761511590981264e-05, |
| "loss": 0.1427, |
| "step": 1955 |
| }, |
| { |
| "epoch": 3.7203994293865907, |
| "grad_norm": 0.31827929615974426, |
| "learning_rate": 8.760876468720229e-05, |
| "loss": 0.0983, |
| "step": 1956 |
| }, |
| { |
| "epoch": 3.7223014740846407, |
| "grad_norm": 0.45022010803222656, |
| "learning_rate": 8.760241346459194e-05, |
| "loss": 0.1658, |
| "step": 1957 |
| }, |
| { |
| "epoch": 3.7242035187826916, |
| "grad_norm": 0.4069976508617401, |
| "learning_rate": 8.759606224198158e-05, |
| "loss": 0.1277, |
| "step": 1958 |
| }, |
| { |
| "epoch": 3.7261055634807416, |
| "grad_norm": 0.3239624500274658, |
| "learning_rate": 8.758971101937123e-05, |
| "loss": 0.1204, |
| "step": 1959 |
| }, |
| { |
| "epoch": 3.728007608178792, |
| "grad_norm": 0.38038089871406555, |
| "learning_rate": 8.758335979676087e-05, |
| "loss": 0.1305, |
| "step": 1960 |
| }, |
| { |
| "epoch": 3.7299096528768425, |
| "grad_norm": 0.44531160593032837, |
| "learning_rate": 8.757700857415054e-05, |
| "loss": 0.1504, |
| "step": 1961 |
| }, |
| { |
| "epoch": 3.731811697574893, |
| "grad_norm": 0.380256712436676, |
| "learning_rate": 8.757065735154017e-05, |
| "loss": 0.1213, |
| "step": 1962 |
| }, |
| { |
| "epoch": 3.7337137422729434, |
| "grad_norm": 0.39982911944389343, |
| "learning_rate": 8.756430612892981e-05, |
| "loss": 0.1255, |
| "step": 1963 |
| }, |
| { |
| "epoch": 3.735615786970994, |
| "grad_norm": 0.39186495542526245, |
| "learning_rate": 8.755795490631948e-05, |
| "loss": 0.1459, |
| "step": 1964 |
| }, |
| { |
| "epoch": 3.7375178316690443, |
| "grad_norm": 0.4191820025444031, |
| "learning_rate": 8.755160368370912e-05, |
| "loss": 0.1269, |
| "step": 1965 |
| }, |
| { |
| "epoch": 3.7394198763670947, |
| "grad_norm": 0.3438499867916107, |
| "learning_rate": 8.754525246109877e-05, |
| "loss": 0.124, |
| "step": 1966 |
| }, |
| { |
| "epoch": 3.741321921065145, |
| "grad_norm": 0.3626823127269745, |
| "learning_rate": 8.753890123848841e-05, |
| "loss": 0.1326, |
| "step": 1967 |
| }, |
| { |
| "epoch": 3.743223965763195, |
| "grad_norm": 0.3823707103729248, |
| "learning_rate": 8.753255001587806e-05, |
| "loss": 0.1351, |
| "step": 1968 |
| }, |
| { |
| "epoch": 3.745126010461246, |
| "grad_norm": 0.3537774980068207, |
| "learning_rate": 8.752619879326771e-05, |
| "loss": 0.1079, |
| "step": 1969 |
| }, |
| { |
| "epoch": 3.747028055159296, |
| "grad_norm": 0.4008922576904297, |
| "learning_rate": 8.751984757065735e-05, |
| "loss": 0.1752, |
| "step": 1970 |
| }, |
| { |
| "epoch": 3.7489300998573465, |
| "grad_norm": 0.3501138687133789, |
| "learning_rate": 8.751349634804701e-05, |
| "loss": 0.1296, |
| "step": 1971 |
| }, |
| { |
| "epoch": 3.750832144555397, |
| "grad_norm": 0.3441070318222046, |
| "learning_rate": 8.750714512543665e-05, |
| "loss": 0.1161, |
| "step": 1972 |
| }, |
| { |
| "epoch": 3.7527341892534474, |
| "grad_norm": 0.42847099900245667, |
| "learning_rate": 8.750079390282629e-05, |
| "loss": 0.1483, |
| "step": 1973 |
| }, |
| { |
| "epoch": 3.754636233951498, |
| "grad_norm": 0.4879817068576813, |
| "learning_rate": 8.749444268021594e-05, |
| "loss": 0.1725, |
| "step": 1974 |
| }, |
| { |
| "epoch": 3.7565382786495483, |
| "grad_norm": 0.32576873898506165, |
| "learning_rate": 8.74880914576056e-05, |
| "loss": 0.1211, |
| "step": 1975 |
| }, |
| { |
| "epoch": 3.7584403233475987, |
| "grad_norm": 0.4470548927783966, |
| "learning_rate": 8.748174023499523e-05, |
| "loss": 0.155, |
| "step": 1976 |
| }, |
| { |
| "epoch": 3.760342368045649, |
| "grad_norm": 0.506020724773407, |
| "learning_rate": 8.747538901238488e-05, |
| "loss": 0.1924, |
| "step": 1977 |
| }, |
| { |
| "epoch": 3.7622444127436996, |
| "grad_norm": 0.3949258625507355, |
| "learning_rate": 8.746903778977454e-05, |
| "loss": 0.1365, |
| "step": 1978 |
| }, |
| { |
| "epoch": 3.76414645744175, |
| "grad_norm": 0.381511390209198, |
| "learning_rate": 8.746268656716419e-05, |
| "loss": 0.1706, |
| "step": 1979 |
| }, |
| { |
| "epoch": 3.7660485021398005, |
| "grad_norm": 0.32848381996154785, |
| "learning_rate": 8.745633534455383e-05, |
| "loss": 0.1302, |
| "step": 1980 |
| }, |
| { |
| "epoch": 3.7679505468378505, |
| "grad_norm": 0.39011678099632263, |
| "learning_rate": 8.744998412194348e-05, |
| "loss": 0.1501, |
| "step": 1981 |
| }, |
| { |
| "epoch": 3.7698525915359014, |
| "grad_norm": 0.35527095198631287, |
| "learning_rate": 8.744363289933313e-05, |
| "loss": 0.1218, |
| "step": 1982 |
| }, |
| { |
| "epoch": 3.7717546362339514, |
| "grad_norm": 0.4448065459728241, |
| "learning_rate": 8.743728167672277e-05, |
| "loss": 0.1527, |
| "step": 1983 |
| }, |
| { |
| "epoch": 3.773656680932002, |
| "grad_norm": 0.45173025131225586, |
| "learning_rate": 8.743093045411242e-05, |
| "loss": 0.1546, |
| "step": 1984 |
| }, |
| { |
| "epoch": 3.7755587256300522, |
| "grad_norm": 0.3051410913467407, |
| "learning_rate": 8.742457923150207e-05, |
| "loss": 0.1176, |
| "step": 1985 |
| }, |
| { |
| "epoch": 3.7774607703281027, |
| "grad_norm": 0.4559077322483063, |
| "learning_rate": 8.741822800889171e-05, |
| "loss": 0.1466, |
| "step": 1986 |
| }, |
| { |
| "epoch": 3.779362815026153, |
| "grad_norm": 0.33901482820510864, |
| "learning_rate": 8.741187678628136e-05, |
| "loss": 0.1263, |
| "step": 1987 |
| }, |
| { |
| "epoch": 3.7812648597242036, |
| "grad_norm": 0.3377963900566101, |
| "learning_rate": 8.740552556367101e-05, |
| "loss": 0.1029, |
| "step": 1988 |
| }, |
| { |
| "epoch": 3.783166904422254, |
| "grad_norm": 0.3285292088985443, |
| "learning_rate": 8.739917434106067e-05, |
| "loss": 0.1256, |
| "step": 1989 |
| }, |
| { |
| "epoch": 3.7850689491203044, |
| "grad_norm": 0.4042280614376068, |
| "learning_rate": 8.73928231184503e-05, |
| "loss": 0.1554, |
| "step": 1990 |
| }, |
| { |
| "epoch": 3.786970993818355, |
| "grad_norm": 0.374153733253479, |
| "learning_rate": 8.738647189583996e-05, |
| "loss": 0.1109, |
| "step": 1991 |
| }, |
| { |
| "epoch": 3.788873038516405, |
| "grad_norm": 0.3667593002319336, |
| "learning_rate": 8.738012067322961e-05, |
| "loss": 0.1014, |
| "step": 1992 |
| }, |
| { |
| "epoch": 3.7907750832144558, |
| "grad_norm": 0.40893805027008057, |
| "learning_rate": 8.737376945061925e-05, |
| "loss": 0.137, |
| "step": 1993 |
| }, |
| { |
| "epoch": 3.7926771279125058, |
| "grad_norm": 0.4428877830505371, |
| "learning_rate": 8.736741822800888e-05, |
| "loss": 0.1516, |
| "step": 1994 |
| }, |
| { |
| "epoch": 3.794579172610556, |
| "grad_norm": 0.4404061734676361, |
| "learning_rate": 8.736106700539855e-05, |
| "loss": 0.155, |
| "step": 1995 |
| }, |
| { |
| "epoch": 3.7964812173086067, |
| "grad_norm": 0.3298742473125458, |
| "learning_rate": 8.735471578278819e-05, |
| "loss": 0.1244, |
| "step": 1996 |
| }, |
| { |
| "epoch": 3.798383262006657, |
| "grad_norm": 0.36190545558929443, |
| "learning_rate": 8.734836456017784e-05, |
| "loss": 0.148, |
| "step": 1997 |
| }, |
| { |
| "epoch": 3.8002853067047075, |
| "grad_norm": 0.34386786818504333, |
| "learning_rate": 8.734201333756749e-05, |
| "loss": 0.1479, |
| "step": 1998 |
| }, |
| { |
| "epoch": 3.802187351402758, |
| "grad_norm": 0.434257835149765, |
| "learning_rate": 8.733566211495713e-05, |
| "loss": 0.1624, |
| "step": 1999 |
| }, |
| { |
| "epoch": 3.8040893961008084, |
| "grad_norm": 0.369232177734375, |
| "learning_rate": 8.732931089234678e-05, |
| "loss": 0.1297, |
| "step": 2000 |
| }, |
| { |
| "epoch": 3.805991440798859, |
| "grad_norm": 0.31438469886779785, |
| "learning_rate": 8.732295966973642e-05, |
| "loss": 0.1074, |
| "step": 2001 |
| }, |
| { |
| "epoch": 3.8078934854969093, |
| "grad_norm": 0.4128814935684204, |
| "learning_rate": 8.731660844712609e-05, |
| "loss": 0.1489, |
| "step": 2002 |
| }, |
| { |
| "epoch": 3.8097955301949593, |
| "grad_norm": 0.2960624694824219, |
| "learning_rate": 8.731025722451572e-05, |
| "loss": 0.1063, |
| "step": 2003 |
| }, |
| { |
| "epoch": 3.81169757489301, |
| "grad_norm": 0.35740041732788086, |
| "learning_rate": 8.730390600190536e-05, |
| "loss": 0.1438, |
| "step": 2004 |
| }, |
| { |
| "epoch": 3.81359961959106, |
| "grad_norm": 0.3402657210826874, |
| "learning_rate": 8.729755477929501e-05, |
| "loss": 0.151, |
| "step": 2005 |
| }, |
| { |
| "epoch": 3.8155016642891106, |
| "grad_norm": 0.3280869722366333, |
| "learning_rate": 8.729120355668467e-05, |
| "loss": 0.112, |
| "step": 2006 |
| }, |
| { |
| "epoch": 3.817403708987161, |
| "grad_norm": 0.3747129440307617, |
| "learning_rate": 8.728485233407432e-05, |
| "loss": 0.1191, |
| "step": 2007 |
| }, |
| { |
| "epoch": 3.8193057536852115, |
| "grad_norm": 0.3609796464443207, |
| "learning_rate": 8.727850111146396e-05, |
| "loss": 0.1373, |
| "step": 2008 |
| }, |
| { |
| "epoch": 3.821207798383262, |
| "grad_norm": 0.38992708921432495, |
| "learning_rate": 8.727214988885361e-05, |
| "loss": 0.1474, |
| "step": 2009 |
| }, |
| { |
| "epoch": 3.8231098430813124, |
| "grad_norm": 0.3531118929386139, |
| "learning_rate": 8.726579866624326e-05, |
| "loss": 0.1188, |
| "step": 2010 |
| }, |
| { |
| "epoch": 3.825011887779363, |
| "grad_norm": 0.30585137009620667, |
| "learning_rate": 8.72594474436329e-05, |
| "loss": 0.1072, |
| "step": 2011 |
| }, |
| { |
| "epoch": 3.8269139324774133, |
| "grad_norm": 0.40438538789749146, |
| "learning_rate": 8.725309622102255e-05, |
| "loss": 0.1527, |
| "step": 2012 |
| }, |
| { |
| "epoch": 3.8288159771754637, |
| "grad_norm": 0.31290772557258606, |
| "learning_rate": 8.72467449984122e-05, |
| "loss": 0.1251, |
| "step": 2013 |
| }, |
| { |
| "epoch": 3.830718021873514, |
| "grad_norm": 0.389160692691803, |
| "learning_rate": 8.724039377580184e-05, |
| "loss": 0.1387, |
| "step": 2014 |
| }, |
| { |
| "epoch": 3.8326200665715646, |
| "grad_norm": 0.34139397740364075, |
| "learning_rate": 8.723404255319149e-05, |
| "loss": 0.1205, |
| "step": 2015 |
| }, |
| { |
| "epoch": 3.8345221112696146, |
| "grad_norm": 0.4144088923931122, |
| "learning_rate": 8.722769133058114e-05, |
| "loss": 0.1493, |
| "step": 2016 |
| }, |
| { |
| "epoch": 3.8364241559676655, |
| "grad_norm": 0.3793914318084717, |
| "learning_rate": 8.722134010797078e-05, |
| "loss": 0.1379, |
| "step": 2017 |
| }, |
| { |
| "epoch": 3.8383262006657155, |
| "grad_norm": 0.3809344470500946, |
| "learning_rate": 8.721498888536043e-05, |
| "loss": 0.196, |
| "step": 2018 |
| }, |
| { |
| "epoch": 3.840228245363766, |
| "grad_norm": 0.3764810860157013, |
| "learning_rate": 8.720863766275009e-05, |
| "loss": 0.1096, |
| "step": 2019 |
| }, |
| { |
| "epoch": 3.8421302900618164, |
| "grad_norm": 0.47973567247390747, |
| "learning_rate": 8.720228644013974e-05, |
| "loss": 0.1195, |
| "step": 2020 |
| }, |
| { |
| "epoch": 3.844032334759867, |
| "grad_norm": 0.4527863562107086, |
| "learning_rate": 8.719593521752938e-05, |
| "loss": 0.2112, |
| "step": 2021 |
| }, |
| { |
| "epoch": 3.8459343794579173, |
| "grad_norm": 0.39066699147224426, |
| "learning_rate": 8.718958399491903e-05, |
| "loss": 0.1281, |
| "step": 2022 |
| }, |
| { |
| "epoch": 3.8478364241559677, |
| "grad_norm": 0.37056446075439453, |
| "learning_rate": 8.718323277230868e-05, |
| "loss": 0.1519, |
| "step": 2023 |
| }, |
| { |
| "epoch": 3.849738468854018, |
| "grad_norm": 0.516057550907135, |
| "learning_rate": 8.717688154969832e-05, |
| "loss": 0.1657, |
| "step": 2024 |
| }, |
| { |
| "epoch": 3.8516405135520686, |
| "grad_norm": 0.3468872010707855, |
| "learning_rate": 8.717053032708797e-05, |
| "loss": 0.1408, |
| "step": 2025 |
| }, |
| { |
| "epoch": 3.853542558250119, |
| "grad_norm": 0.5452744364738464, |
| "learning_rate": 8.716417910447762e-05, |
| "loss": 0.3173, |
| "step": 2026 |
| }, |
| { |
| "epoch": 3.855444602948169, |
| "grad_norm": 0.4378301501274109, |
| "learning_rate": 8.715782788186726e-05, |
| "loss": 0.136, |
| "step": 2027 |
| }, |
| { |
| "epoch": 3.85734664764622, |
| "grad_norm": 0.49818679690361023, |
| "learning_rate": 8.715147665925691e-05, |
| "loss": 0.233, |
| "step": 2028 |
| }, |
| { |
| "epoch": 3.85924869234427, |
| "grad_norm": 0.4228188693523407, |
| "learning_rate": 8.714512543664656e-05, |
| "loss": 0.1485, |
| "step": 2029 |
| }, |
| { |
| "epoch": 3.8611507370423204, |
| "grad_norm": 0.34110891819000244, |
| "learning_rate": 8.71387742140362e-05, |
| "loss": 0.1455, |
| "step": 2030 |
| }, |
| { |
| "epoch": 3.863052781740371, |
| "grad_norm": 0.38667479157447815, |
| "learning_rate": 8.713242299142585e-05, |
| "loss": 0.1302, |
| "step": 2031 |
| }, |
| { |
| "epoch": 3.8649548264384213, |
| "grad_norm": 0.3971845805644989, |
| "learning_rate": 8.712607176881549e-05, |
| "loss": 0.1562, |
| "step": 2032 |
| }, |
| { |
| "epoch": 3.8668568711364717, |
| "grad_norm": 0.32637760043144226, |
| "learning_rate": 8.711972054620516e-05, |
| "loss": 0.1213, |
| "step": 2033 |
| }, |
| { |
| "epoch": 3.868758915834522, |
| "grad_norm": 0.3475836217403412, |
| "learning_rate": 8.71133693235948e-05, |
| "loss": 0.1514, |
| "step": 2034 |
| }, |
| { |
| "epoch": 3.8706609605325726, |
| "grad_norm": 0.37775367498397827, |
| "learning_rate": 8.710701810098443e-05, |
| "loss": 0.1672, |
| "step": 2035 |
| }, |
| { |
| "epoch": 3.872563005230623, |
| "grad_norm": 0.4611580967903137, |
| "learning_rate": 8.71006668783741e-05, |
| "loss": 0.1977, |
| "step": 2036 |
| }, |
| { |
| "epoch": 3.8744650499286735, |
| "grad_norm": 0.34681427478790283, |
| "learning_rate": 8.709431565576374e-05, |
| "loss": 0.127, |
| "step": 2037 |
| }, |
| { |
| "epoch": 3.8763670946267235, |
| "grad_norm": 0.3547581732273102, |
| "learning_rate": 8.708796443315339e-05, |
| "loss": 0.1432, |
| "step": 2038 |
| }, |
| { |
| "epoch": 3.8782691393247744, |
| "grad_norm": 0.3560992479324341, |
| "learning_rate": 8.708161321054303e-05, |
| "loss": 0.1269, |
| "step": 2039 |
| }, |
| { |
| "epoch": 3.8801711840228243, |
| "grad_norm": 0.48965948820114136, |
| "learning_rate": 8.707526198793268e-05, |
| "loss": 0.1694, |
| "step": 2040 |
| }, |
| { |
| "epoch": 3.882073228720875, |
| "grad_norm": 0.4042951464653015, |
| "learning_rate": 8.706891076532233e-05, |
| "loss": 0.1432, |
| "step": 2041 |
| }, |
| { |
| "epoch": 3.8839752734189252, |
| "grad_norm": 0.40321534872055054, |
| "learning_rate": 8.706255954271197e-05, |
| "loss": 0.1206, |
| "step": 2042 |
| }, |
| { |
| "epoch": 3.8858773181169757, |
| "grad_norm": 0.5154759883880615, |
| "learning_rate": 8.705620832010164e-05, |
| "loss": 0.2034, |
| "step": 2043 |
| }, |
| { |
| "epoch": 3.887779362815026, |
| "grad_norm": 0.3707939684391022, |
| "learning_rate": 8.704985709749127e-05, |
| "loss": 0.1408, |
| "step": 2044 |
| }, |
| { |
| "epoch": 3.8896814075130766, |
| "grad_norm": 0.46117648482322693, |
| "learning_rate": 8.704350587488091e-05, |
| "loss": 0.1921, |
| "step": 2045 |
| }, |
| { |
| "epoch": 3.891583452211127, |
| "grad_norm": 0.4917357265949249, |
| "learning_rate": 8.703715465227056e-05, |
| "loss": 0.1684, |
| "step": 2046 |
| }, |
| { |
| "epoch": 3.8934854969091774, |
| "grad_norm": 0.36523228883743286, |
| "learning_rate": 8.703080342966022e-05, |
| "loss": 0.1977, |
| "step": 2047 |
| }, |
| { |
| "epoch": 3.895387541607228, |
| "grad_norm": 0.3557770550251007, |
| "learning_rate": 8.702445220704985e-05, |
| "loss": 0.1326, |
| "step": 2048 |
| }, |
| { |
| "epoch": 3.8972895863052783, |
| "grad_norm": 0.2716139853000641, |
| "learning_rate": 8.70181009844395e-05, |
| "loss": 0.1119, |
| "step": 2049 |
| }, |
| { |
| "epoch": 3.8991916310033288, |
| "grad_norm": 0.3266098201274872, |
| "learning_rate": 8.701174976182916e-05, |
| "loss": 0.1355, |
| "step": 2050 |
| }, |
| { |
| "epoch": 3.9010936757013788, |
| "grad_norm": 0.4549683928489685, |
| "learning_rate": 8.700539853921881e-05, |
| "loss": 0.174, |
| "step": 2051 |
| }, |
| { |
| "epoch": 3.9029957203994297, |
| "grad_norm": 0.3865867555141449, |
| "learning_rate": 8.699904731660845e-05, |
| "loss": 0.131, |
| "step": 2052 |
| }, |
| { |
| "epoch": 3.9048977650974797, |
| "grad_norm": 0.4354785084724426, |
| "learning_rate": 8.69926960939981e-05, |
| "loss": 0.1497, |
| "step": 2053 |
| }, |
| { |
| "epoch": 3.90679980979553, |
| "grad_norm": 0.38822686672210693, |
| "learning_rate": 8.698634487138775e-05, |
| "loss": 0.1272, |
| "step": 2054 |
| }, |
| { |
| "epoch": 3.9087018544935805, |
| "grad_norm": 0.4395056366920471, |
| "learning_rate": 8.697999364877739e-05, |
| "loss": 0.1801, |
| "step": 2055 |
| }, |
| { |
| "epoch": 3.910603899191631, |
| "grad_norm": 0.4310166835784912, |
| "learning_rate": 8.697364242616704e-05, |
| "loss": 0.1457, |
| "step": 2056 |
| }, |
| { |
| "epoch": 3.9125059438896814, |
| "grad_norm": 0.42527538537979126, |
| "learning_rate": 8.69672912035567e-05, |
| "loss": 0.1827, |
| "step": 2057 |
| }, |
| { |
| "epoch": 3.914407988587732, |
| "grad_norm": 0.41284388303756714, |
| "learning_rate": 8.696093998094633e-05, |
| "loss": 0.1588, |
| "step": 2058 |
| }, |
| { |
| "epoch": 3.9163100332857823, |
| "grad_norm": 0.3561374247074127, |
| "learning_rate": 8.695458875833598e-05, |
| "loss": 0.138, |
| "step": 2059 |
| }, |
| { |
| "epoch": 3.9182120779838328, |
| "grad_norm": 0.4057970941066742, |
| "learning_rate": 8.694823753572564e-05, |
| "loss": 0.1504, |
| "step": 2060 |
| }, |
| { |
| "epoch": 3.920114122681883, |
| "grad_norm": 0.47292712330818176, |
| "learning_rate": 8.694188631311529e-05, |
| "loss": 0.1417, |
| "step": 2061 |
| }, |
| { |
| "epoch": 3.922016167379933, |
| "grad_norm": 0.4207940995693207, |
| "learning_rate": 8.693553509050493e-05, |
| "loss": 0.1372, |
| "step": 2062 |
| }, |
| { |
| "epoch": 3.923918212077984, |
| "grad_norm": 0.5482998490333557, |
| "learning_rate": 8.692918386789456e-05, |
| "loss": 0.1917, |
| "step": 2063 |
| }, |
| { |
| "epoch": 3.925820256776034, |
| "grad_norm": 0.41113635897636414, |
| "learning_rate": 8.692283264528423e-05, |
| "loss": 0.1479, |
| "step": 2064 |
| }, |
| { |
| "epoch": 3.9277223014740845, |
| "grad_norm": 0.3470059037208557, |
| "learning_rate": 8.691648142267387e-05, |
| "loss": 0.1235, |
| "step": 2065 |
| }, |
| { |
| "epoch": 3.929624346172135, |
| "grad_norm": 0.4131185710430145, |
| "learning_rate": 8.69101302000635e-05, |
| "loss": 0.1476, |
| "step": 2066 |
| }, |
| { |
| "epoch": 3.9315263908701854, |
| "grad_norm": 0.3750738501548767, |
| "learning_rate": 8.690377897745317e-05, |
| "loss": 0.1517, |
| "step": 2067 |
| }, |
| { |
| "epoch": 3.933428435568236, |
| "grad_norm": 0.37411704659461975, |
| "learning_rate": 8.689742775484281e-05, |
| "loss": 0.1493, |
| "step": 2068 |
| }, |
| { |
| "epoch": 3.9353304802662863, |
| "grad_norm": 0.4208986759185791, |
| "learning_rate": 8.689107653223246e-05, |
| "loss": 0.1558, |
| "step": 2069 |
| }, |
| { |
| "epoch": 3.9372325249643367, |
| "grad_norm": 0.36959660053253174, |
| "learning_rate": 8.68847253096221e-05, |
| "loss": 0.1247, |
| "step": 2070 |
| }, |
| { |
| "epoch": 3.939134569662387, |
| "grad_norm": 0.3977148234844208, |
| "learning_rate": 8.687837408701175e-05, |
| "loss": 0.1428, |
| "step": 2071 |
| }, |
| { |
| "epoch": 3.9410366143604376, |
| "grad_norm": 0.40076392889022827, |
| "learning_rate": 8.68720228644014e-05, |
| "loss": 0.1652, |
| "step": 2072 |
| }, |
| { |
| "epoch": 3.9429386590584876, |
| "grad_norm": 0.3828325569629669, |
| "learning_rate": 8.686567164179104e-05, |
| "loss": 0.1518, |
| "step": 2073 |
| }, |
| { |
| "epoch": 3.9448407037565385, |
| "grad_norm": 0.35112518072128296, |
| "learning_rate": 8.685932041918071e-05, |
| "loss": 0.1303, |
| "step": 2074 |
| }, |
| { |
| "epoch": 3.9467427484545885, |
| "grad_norm": 0.31564921140670776, |
| "learning_rate": 8.685296919657035e-05, |
| "loss": 0.1325, |
| "step": 2075 |
| }, |
| { |
| "epoch": 3.948644793152639, |
| "grad_norm": 0.3110829293727875, |
| "learning_rate": 8.684661797395998e-05, |
| "loss": 0.0958, |
| "step": 2076 |
| }, |
| { |
| "epoch": 3.9505468378506894, |
| "grad_norm": 0.41574040055274963, |
| "learning_rate": 8.684026675134964e-05, |
| "loss": 0.142, |
| "step": 2077 |
| }, |
| { |
| "epoch": 3.95244888254874, |
| "grad_norm": 0.4371127188205719, |
| "learning_rate": 8.683391552873929e-05, |
| "loss": 0.1699, |
| "step": 2078 |
| }, |
| { |
| "epoch": 3.9543509272467903, |
| "grad_norm": 0.41888341307640076, |
| "learning_rate": 8.682756430612894e-05, |
| "loss": 0.1467, |
| "step": 2079 |
| }, |
| { |
| "epoch": 3.9562529719448407, |
| "grad_norm": 0.4013144373893738, |
| "learning_rate": 8.682121308351858e-05, |
| "loss": 0.1541, |
| "step": 2080 |
| }, |
| { |
| "epoch": 3.958155016642891, |
| "grad_norm": 0.3627847135066986, |
| "learning_rate": 8.681486186090823e-05, |
| "loss": 0.1412, |
| "step": 2081 |
| }, |
| { |
| "epoch": 3.9600570613409416, |
| "grad_norm": 0.34517934918403625, |
| "learning_rate": 8.680851063829788e-05, |
| "loss": 0.1302, |
| "step": 2082 |
| }, |
| { |
| "epoch": 3.961959106038992, |
| "grad_norm": 0.409612238407135, |
| "learning_rate": 8.680215941568752e-05, |
| "loss": 0.1806, |
| "step": 2083 |
| }, |
| { |
| "epoch": 3.9638611507370425, |
| "grad_norm": 0.37562572956085205, |
| "learning_rate": 8.679580819307717e-05, |
| "loss": 0.1305, |
| "step": 2084 |
| }, |
| { |
| "epoch": 3.965763195435093, |
| "grad_norm": 0.30839917063713074, |
| "learning_rate": 8.678945697046682e-05, |
| "loss": 0.1179, |
| "step": 2085 |
| }, |
| { |
| "epoch": 3.967665240133143, |
| "grad_norm": 0.4009683430194855, |
| "learning_rate": 8.678310574785646e-05, |
| "loss": 0.1392, |
| "step": 2086 |
| }, |
| { |
| "epoch": 3.969567284831194, |
| "grad_norm": 0.5373052358627319, |
| "learning_rate": 8.677675452524611e-05, |
| "loss": 0.2366, |
| "step": 2087 |
| }, |
| { |
| "epoch": 3.971469329529244, |
| "grad_norm": 0.44061073660850525, |
| "learning_rate": 8.677040330263576e-05, |
| "loss": 0.1541, |
| "step": 2088 |
| }, |
| { |
| "epoch": 3.9733713742272943, |
| "grad_norm": 0.6880194544792175, |
| "learning_rate": 8.67640520800254e-05, |
| "loss": 0.1822, |
| "step": 2089 |
| }, |
| { |
| "epoch": 3.9752734189253447, |
| "grad_norm": 0.4342186450958252, |
| "learning_rate": 8.675770085741505e-05, |
| "loss": 0.1398, |
| "step": 2090 |
| }, |
| { |
| "epoch": 3.977175463623395, |
| "grad_norm": 0.3437482714653015, |
| "learning_rate": 8.675134963480471e-05, |
| "loss": 0.1407, |
| "step": 2091 |
| }, |
| { |
| "epoch": 3.9790775083214456, |
| "grad_norm": 0.43729832768440247, |
| "learning_rate": 8.674499841219436e-05, |
| "loss": 0.1604, |
| "step": 2092 |
| }, |
| { |
| "epoch": 3.980979553019496, |
| "grad_norm": 0.36654895544052124, |
| "learning_rate": 8.6738647189584e-05, |
| "loss": 0.1261, |
| "step": 2093 |
| }, |
| { |
| "epoch": 3.9828815977175465, |
| "grad_norm": 0.40422323346138, |
| "learning_rate": 8.673229596697365e-05, |
| "loss": 0.1463, |
| "step": 2094 |
| }, |
| { |
| "epoch": 3.984783642415597, |
| "grad_norm": 0.37436428666114807, |
| "learning_rate": 8.67259447443633e-05, |
| "loss": 0.1283, |
| "step": 2095 |
| }, |
| { |
| "epoch": 3.9866856871136473, |
| "grad_norm": 0.4568138122558594, |
| "learning_rate": 8.671959352175294e-05, |
| "loss": 0.1735, |
| "step": 2096 |
| }, |
| { |
| "epoch": 3.9885877318116973, |
| "grad_norm": 0.3864310681819916, |
| "learning_rate": 8.671324229914259e-05, |
| "loss": 0.1458, |
| "step": 2097 |
| }, |
| { |
| "epoch": 3.9904897765097482, |
| "grad_norm": 0.3622378408908844, |
| "learning_rate": 8.670689107653224e-05, |
| "loss": 0.1333, |
| "step": 2098 |
| }, |
| { |
| "epoch": 3.9923918212077982, |
| "grad_norm": 0.5126944780349731, |
| "learning_rate": 8.670053985392188e-05, |
| "loss": 0.1897, |
| "step": 2099 |
| }, |
| { |
| "epoch": 3.9942938659058487, |
| "grad_norm": 0.3905584216117859, |
| "learning_rate": 8.669418863131153e-05, |
| "loss": 0.1743, |
| "step": 2100 |
| }, |
| { |
| "epoch": 3.996195910603899, |
| "grad_norm": 0.4149746298789978, |
| "learning_rate": 8.668783740870118e-05, |
| "loss": 0.1686, |
| "step": 2101 |
| }, |
| { |
| "epoch": 3.9980979553019496, |
| "grad_norm": 0.30447009205818176, |
| "learning_rate": 8.668148618609082e-05, |
| "loss": 0.1079, |
| "step": 2102 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.533173143863678, |
| "learning_rate": 8.667513496348047e-05, |
| "loss": 0.1652, |
| "step": 2103 |
| }, |
| { |
| "epoch": 4.00190204469805, |
| "grad_norm": 0.26669684052467346, |
| "learning_rate": 8.666878374087011e-05, |
| "loss": 0.1105, |
| "step": 2104 |
| }, |
| { |
| "epoch": 4.003804089396101, |
| "grad_norm": 0.2511195242404938, |
| "learning_rate": 8.666243251825978e-05, |
| "loss": 0.1018, |
| "step": 2105 |
| }, |
| { |
| "epoch": 4.005706134094151, |
| "grad_norm": 0.2838079035282135, |
| "learning_rate": 8.665608129564942e-05, |
| "loss": 0.0979, |
| "step": 2106 |
| }, |
| { |
| "epoch": 4.007608178792202, |
| "grad_norm": 0.3789231479167938, |
| "learning_rate": 8.664973007303905e-05, |
| "loss": 0.1216, |
| "step": 2107 |
| }, |
| { |
| "epoch": 4.009510223490252, |
| "grad_norm": 0.36412686109542847, |
| "learning_rate": 8.664337885042872e-05, |
| "loss": 0.0924, |
| "step": 2108 |
| }, |
| { |
| "epoch": 4.011412268188303, |
| "grad_norm": 0.3399736285209656, |
| "learning_rate": 8.663702762781836e-05, |
| "loss": 0.1007, |
| "step": 2109 |
| }, |
| { |
| "epoch": 4.013314312886353, |
| "grad_norm": 0.3104216456413269, |
| "learning_rate": 8.663067640520801e-05, |
| "loss": 0.1146, |
| "step": 2110 |
| }, |
| { |
| "epoch": 4.0152163575844035, |
| "grad_norm": 0.33002039790153503, |
| "learning_rate": 8.662432518259765e-05, |
| "loss": 0.1112, |
| "step": 2111 |
| }, |
| { |
| "epoch": 4.0171184022824535, |
| "grad_norm": 0.3158220946788788, |
| "learning_rate": 8.66179739599873e-05, |
| "loss": 0.0983, |
| "step": 2112 |
| }, |
| { |
| "epoch": 4.019020446980504, |
| "grad_norm": 0.3281852900981903, |
| "learning_rate": 8.661162273737695e-05, |
| "loss": 0.1002, |
| "step": 2113 |
| }, |
| { |
| "epoch": 4.020922491678554, |
| "grad_norm": 0.42810752987861633, |
| "learning_rate": 8.660527151476659e-05, |
| "loss": 0.145, |
| "step": 2114 |
| }, |
| { |
| "epoch": 4.022824536376604, |
| "grad_norm": 0.343757301568985, |
| "learning_rate": 8.659892029215624e-05, |
| "loss": 0.1046, |
| "step": 2115 |
| }, |
| { |
| "epoch": 4.024726581074655, |
| "grad_norm": 0.3978208601474762, |
| "learning_rate": 8.65925690695459e-05, |
| "loss": 0.1232, |
| "step": 2116 |
| }, |
| { |
| "epoch": 4.026628625772705, |
| "grad_norm": 0.3716939687728882, |
| "learning_rate": 8.658621784693553e-05, |
| "loss": 0.1073, |
| "step": 2117 |
| }, |
| { |
| "epoch": 4.028530670470756, |
| "grad_norm": 0.3938986659049988, |
| "learning_rate": 8.657986662432518e-05, |
| "loss": 0.1162, |
| "step": 2118 |
| }, |
| { |
| "epoch": 4.030432715168806, |
| "grad_norm": 0.26515620946884155, |
| "learning_rate": 8.657351540171484e-05, |
| "loss": 0.0927, |
| "step": 2119 |
| }, |
| { |
| "epoch": 4.032334759866857, |
| "grad_norm": 0.4481755197048187, |
| "learning_rate": 8.656716417910447e-05, |
| "loss": 0.1192, |
| "step": 2120 |
| }, |
| { |
| "epoch": 4.034236804564907, |
| "grad_norm": 0.2902253568172455, |
| "learning_rate": 8.656081295649413e-05, |
| "loss": 0.0972, |
| "step": 2121 |
| }, |
| { |
| "epoch": 4.036138849262958, |
| "grad_norm": 0.3764674961566925, |
| "learning_rate": 8.655446173388378e-05, |
| "loss": 0.1242, |
| "step": 2122 |
| }, |
| { |
| "epoch": 4.038040893961008, |
| "grad_norm": 0.4040977954864502, |
| "learning_rate": 8.654811051127343e-05, |
| "loss": 0.1053, |
| "step": 2123 |
| }, |
| { |
| "epoch": 4.039942938659059, |
| "grad_norm": 0.3967365026473999, |
| "learning_rate": 8.654175928866307e-05, |
| "loss": 0.1132, |
| "step": 2124 |
| }, |
| { |
| "epoch": 4.041844983357109, |
| "grad_norm": 0.4135635197162628, |
| "learning_rate": 8.653540806605272e-05, |
| "loss": 0.1171, |
| "step": 2125 |
| }, |
| { |
| "epoch": 4.04374702805516, |
| "grad_norm": 0.43473535776138306, |
| "learning_rate": 8.652905684344237e-05, |
| "loss": 0.1227, |
| "step": 2126 |
| }, |
| { |
| "epoch": 4.04564907275321, |
| "grad_norm": 0.30436238646507263, |
| "learning_rate": 8.652270562083201e-05, |
| "loss": 0.0853, |
| "step": 2127 |
| }, |
| { |
| "epoch": 4.04755111745126, |
| "grad_norm": 0.3265203535556793, |
| "learning_rate": 8.651635439822166e-05, |
| "loss": 0.1007, |
| "step": 2128 |
| }, |
| { |
| "epoch": 4.049453162149311, |
| "grad_norm": 0.3733639121055603, |
| "learning_rate": 8.651000317561131e-05, |
| "loss": 0.1164, |
| "step": 2129 |
| }, |
| { |
| "epoch": 4.051355206847361, |
| "grad_norm": 0.3707481324672699, |
| "learning_rate": 8.650365195300095e-05, |
| "loss": 0.1225, |
| "step": 2130 |
| }, |
| { |
| "epoch": 4.0532572515454115, |
| "grad_norm": 0.39869242906570435, |
| "learning_rate": 8.64973007303906e-05, |
| "loss": 0.1127, |
| "step": 2131 |
| }, |
| { |
| "epoch": 4.0551592962434615, |
| "grad_norm": 0.31656894087791443, |
| "learning_rate": 8.649094950778026e-05, |
| "loss": 0.0936, |
| "step": 2132 |
| }, |
| { |
| "epoch": 4.057061340941512, |
| "grad_norm": 0.32848450541496277, |
| "learning_rate": 8.648459828516991e-05, |
| "loss": 0.1192, |
| "step": 2133 |
| }, |
| { |
| "epoch": 4.058963385639562, |
| "grad_norm": 0.41309690475463867, |
| "learning_rate": 8.647824706255955e-05, |
| "loss": 0.1224, |
| "step": 2134 |
| }, |
| { |
| "epoch": 4.060865430337613, |
| "grad_norm": 0.30171439051628113, |
| "learning_rate": 8.647189583994918e-05, |
| "loss": 0.1108, |
| "step": 2135 |
| }, |
| { |
| "epoch": 4.062767475035663, |
| "grad_norm": 0.31793013215065, |
| "learning_rate": 8.646554461733885e-05, |
| "loss": 0.0958, |
| "step": 2136 |
| }, |
| { |
| "epoch": 4.064669519733714, |
| "grad_norm": 0.3515986502170563, |
| "learning_rate": 8.645919339472849e-05, |
| "loss": 0.098, |
| "step": 2137 |
| }, |
| { |
| "epoch": 4.066571564431764, |
| "grad_norm": 0.2572970390319824, |
| "learning_rate": 8.645284217211813e-05, |
| "loss": 0.0782, |
| "step": 2138 |
| }, |
| { |
| "epoch": 4.068473609129814, |
| "grad_norm": 0.40460988879203796, |
| "learning_rate": 8.644649094950779e-05, |
| "loss": 0.111, |
| "step": 2139 |
| }, |
| { |
| "epoch": 4.070375653827865, |
| "grad_norm": 0.25654932856559753, |
| "learning_rate": 8.644013972689743e-05, |
| "loss": 0.078, |
| "step": 2140 |
| }, |
| { |
| "epoch": 4.072277698525915, |
| "grad_norm": 0.3793332278728485, |
| "learning_rate": 8.643378850428708e-05, |
| "loss": 0.1113, |
| "step": 2141 |
| }, |
| { |
| "epoch": 4.074179743223966, |
| "grad_norm": 0.3457014560699463, |
| "learning_rate": 8.642743728167672e-05, |
| "loss": 0.1016, |
| "step": 2142 |
| }, |
| { |
| "epoch": 4.076081787922016, |
| "grad_norm": 0.41619420051574707, |
| "learning_rate": 8.642108605906637e-05, |
| "loss": 0.1379, |
| "step": 2143 |
| }, |
| { |
| "epoch": 4.077983832620067, |
| "grad_norm": 0.3582102656364441, |
| "learning_rate": 8.641473483645602e-05, |
| "loss": 0.1068, |
| "step": 2144 |
| }, |
| { |
| "epoch": 4.079885877318117, |
| "grad_norm": 0.4142124652862549, |
| "learning_rate": 8.640838361384566e-05, |
| "loss": 0.1155, |
| "step": 2145 |
| }, |
| { |
| "epoch": 4.081787922016168, |
| "grad_norm": 0.3544979393482208, |
| "learning_rate": 8.640203239123533e-05, |
| "loss": 0.0969, |
| "step": 2146 |
| }, |
| { |
| "epoch": 4.083689966714218, |
| "grad_norm": 0.37561002373695374, |
| "learning_rate": 8.639568116862497e-05, |
| "loss": 0.1218, |
| "step": 2147 |
| }, |
| { |
| "epoch": 4.085592011412269, |
| "grad_norm": 0.3568158447742462, |
| "learning_rate": 8.63893299460146e-05, |
| "loss": 0.1225, |
| "step": 2148 |
| }, |
| { |
| "epoch": 4.087494056110319, |
| "grad_norm": 0.3126932382583618, |
| "learning_rate": 8.638297872340426e-05, |
| "loss": 0.084, |
| "step": 2149 |
| }, |
| { |
| "epoch": 4.089396100808369, |
| "grad_norm": 0.4232020378112793, |
| "learning_rate": 8.637662750079391e-05, |
| "loss": 0.1155, |
| "step": 2150 |
| }, |
| { |
| "epoch": 4.0912981455064195, |
| "grad_norm": 0.4121897518634796, |
| "learning_rate": 8.637027627818356e-05, |
| "loss": 0.1352, |
| "step": 2151 |
| }, |
| { |
| "epoch": 4.0932001902044695, |
| "grad_norm": 0.3292025923728943, |
| "learning_rate": 8.63639250555732e-05, |
| "loss": 0.115, |
| "step": 2152 |
| }, |
| { |
| "epoch": 4.09510223490252, |
| "grad_norm": 0.3273860514163971, |
| "learning_rate": 8.635757383296285e-05, |
| "loss": 0.1087, |
| "step": 2153 |
| }, |
| { |
| "epoch": 4.09700427960057, |
| "grad_norm": 0.36760157346725464, |
| "learning_rate": 8.63512226103525e-05, |
| "loss": 0.1206, |
| "step": 2154 |
| }, |
| { |
| "epoch": 4.098906324298621, |
| "grad_norm": 0.3717329502105713, |
| "learning_rate": 8.634487138774214e-05, |
| "loss": 0.1244, |
| "step": 2155 |
| }, |
| { |
| "epoch": 4.100808368996671, |
| "grad_norm": 0.379068523645401, |
| "learning_rate": 8.633852016513179e-05, |
| "loss": 0.1048, |
| "step": 2156 |
| }, |
| { |
| "epoch": 4.102710413694722, |
| "grad_norm": 0.30912551283836365, |
| "learning_rate": 8.633216894252144e-05, |
| "loss": 0.0838, |
| "step": 2157 |
| }, |
| { |
| "epoch": 4.104612458392772, |
| "grad_norm": 0.3093559741973877, |
| "learning_rate": 8.632581771991108e-05, |
| "loss": 0.0948, |
| "step": 2158 |
| }, |
| { |
| "epoch": 4.106514503090823, |
| "grad_norm": 0.2924623489379883, |
| "learning_rate": 8.631946649730073e-05, |
| "loss": 0.085, |
| "step": 2159 |
| }, |
| { |
| "epoch": 4.108416547788873, |
| "grad_norm": 0.335437536239624, |
| "learning_rate": 8.631311527469039e-05, |
| "loss": 0.102, |
| "step": 2160 |
| }, |
| { |
| "epoch": 4.110318592486923, |
| "grad_norm": 0.37450480461120605, |
| "learning_rate": 8.630676405208002e-05, |
| "loss": 0.1102, |
| "step": 2161 |
| }, |
| { |
| "epoch": 4.112220637184974, |
| "grad_norm": 0.40548086166381836, |
| "learning_rate": 8.630041282946968e-05, |
| "loss": 0.1122, |
| "step": 2162 |
| }, |
| { |
| "epoch": 4.114122681883024, |
| "grad_norm": 0.2255704551935196, |
| "learning_rate": 8.629406160685933e-05, |
| "loss": 0.0875, |
| "step": 2163 |
| }, |
| { |
| "epoch": 4.116024726581075, |
| "grad_norm": 0.3774515390396118, |
| "learning_rate": 8.628771038424898e-05, |
| "loss": 0.1007, |
| "step": 2164 |
| }, |
| { |
| "epoch": 4.117926771279125, |
| "grad_norm": 0.4410356879234314, |
| "learning_rate": 8.628135916163862e-05, |
| "loss": 0.1238, |
| "step": 2165 |
| }, |
| { |
| "epoch": 4.119828815977176, |
| "grad_norm": 0.3007069230079651, |
| "learning_rate": 8.627500793902826e-05, |
| "loss": 0.0849, |
| "step": 2166 |
| }, |
| { |
| "epoch": 4.121730860675226, |
| "grad_norm": 0.3165019750595093, |
| "learning_rate": 8.626865671641792e-05, |
| "loss": 0.0959, |
| "step": 2167 |
| }, |
| { |
| "epoch": 4.1236329053732765, |
| "grad_norm": 0.3213941752910614, |
| "learning_rate": 8.626230549380756e-05, |
| "loss": 0.1011, |
| "step": 2168 |
| }, |
| { |
| "epoch": 4.1255349500713265, |
| "grad_norm": 0.2742742598056793, |
| "learning_rate": 8.625595427119721e-05, |
| "loss": 0.0855, |
| "step": 2169 |
| }, |
| { |
| "epoch": 4.127436994769377, |
| "grad_norm": 0.35063308477401733, |
| "learning_rate": 8.624960304858686e-05, |
| "loss": 0.1115, |
| "step": 2170 |
| }, |
| { |
| "epoch": 4.129339039467427, |
| "grad_norm": 0.4272489845752716, |
| "learning_rate": 8.62432518259765e-05, |
| "loss": 0.1162, |
| "step": 2171 |
| }, |
| { |
| "epoch": 4.131241084165478, |
| "grad_norm": 0.27256911993026733, |
| "learning_rate": 8.623690060336615e-05, |
| "loss": 0.1066, |
| "step": 2172 |
| }, |
| { |
| "epoch": 4.133143128863528, |
| "grad_norm": 0.275309294462204, |
| "learning_rate": 8.623054938075579e-05, |
| "loss": 0.1029, |
| "step": 2173 |
| }, |
| { |
| "epoch": 4.135045173561578, |
| "grad_norm": 0.2678431570529938, |
| "learning_rate": 8.622419815814544e-05, |
| "loss": 0.0836, |
| "step": 2174 |
| }, |
| { |
| "epoch": 4.136947218259629, |
| "grad_norm": 0.3313474953174591, |
| "learning_rate": 8.62178469355351e-05, |
| "loss": 0.0925, |
| "step": 2175 |
| }, |
| { |
| "epoch": 4.138849262957679, |
| "grad_norm": 0.2514117658138275, |
| "learning_rate": 8.621149571292473e-05, |
| "loss": 0.0905, |
| "step": 2176 |
| }, |
| { |
| "epoch": 4.14075130765573, |
| "grad_norm": 0.2868940532207489, |
| "learning_rate": 8.62051444903144e-05, |
| "loss": 0.1057, |
| "step": 2177 |
| }, |
| { |
| "epoch": 4.14265335235378, |
| "grad_norm": 0.3867243826389313, |
| "learning_rate": 8.619879326770404e-05, |
| "loss": 0.1151, |
| "step": 2178 |
| }, |
| { |
| "epoch": 4.144555397051831, |
| "grad_norm": 0.3011827766895294, |
| "learning_rate": 8.619244204509368e-05, |
| "loss": 0.1152, |
| "step": 2179 |
| }, |
| { |
| "epoch": 4.146457441749881, |
| "grad_norm": 0.33059659600257874, |
| "learning_rate": 8.618609082248333e-05, |
| "loss": 0.1121, |
| "step": 2180 |
| }, |
| { |
| "epoch": 4.148359486447932, |
| "grad_norm": 0.45777612924575806, |
| "learning_rate": 8.617973959987298e-05, |
| "loss": 0.133, |
| "step": 2181 |
| }, |
| { |
| "epoch": 4.150261531145982, |
| "grad_norm": 0.39224299788475037, |
| "learning_rate": 8.617338837726263e-05, |
| "loss": 0.1381, |
| "step": 2182 |
| }, |
| { |
| "epoch": 4.152163575844033, |
| "grad_norm": 0.2813168168067932, |
| "learning_rate": 8.616703715465227e-05, |
| "loss": 0.0939, |
| "step": 2183 |
| }, |
| { |
| "epoch": 4.154065620542083, |
| "grad_norm": 0.30850479006767273, |
| "learning_rate": 8.616068593204192e-05, |
| "loss": 0.1016, |
| "step": 2184 |
| }, |
| { |
| "epoch": 4.155967665240133, |
| "grad_norm": 0.2755066156387329, |
| "learning_rate": 8.615433470943157e-05, |
| "loss": 0.1253, |
| "step": 2185 |
| }, |
| { |
| "epoch": 4.157869709938184, |
| "grad_norm": 0.25375935435295105, |
| "learning_rate": 8.614798348682121e-05, |
| "loss": 0.088, |
| "step": 2186 |
| }, |
| { |
| "epoch": 4.159771754636234, |
| "grad_norm": 0.27644097805023193, |
| "learning_rate": 8.614163226421086e-05, |
| "loss": 0.1053, |
| "step": 2187 |
| }, |
| { |
| "epoch": 4.1616737993342845, |
| "grad_norm": 0.30916059017181396, |
| "learning_rate": 8.613528104160052e-05, |
| "loss": 0.1075, |
| "step": 2188 |
| }, |
| { |
| "epoch": 4.1635758440323345, |
| "grad_norm": 0.3316441476345062, |
| "learning_rate": 8.612892981899015e-05, |
| "loss": 0.1087, |
| "step": 2189 |
| }, |
| { |
| "epoch": 4.165477888730385, |
| "grad_norm": 0.27464917302131653, |
| "learning_rate": 8.61225785963798e-05, |
| "loss": 0.079, |
| "step": 2190 |
| }, |
| { |
| "epoch": 4.167379933428435, |
| "grad_norm": 0.3684466779232025, |
| "learning_rate": 8.611622737376946e-05, |
| "loss": 0.1312, |
| "step": 2191 |
| }, |
| { |
| "epoch": 4.169281978126486, |
| "grad_norm": 0.33914482593536377, |
| "learning_rate": 8.61098761511591e-05, |
| "loss": 0.0991, |
| "step": 2192 |
| }, |
| { |
| "epoch": 4.171184022824536, |
| "grad_norm": 0.3610948324203491, |
| "learning_rate": 8.610352492854875e-05, |
| "loss": 0.1068, |
| "step": 2193 |
| }, |
| { |
| "epoch": 4.173086067522587, |
| "grad_norm": 0.2824098765850067, |
| "learning_rate": 8.60971737059384e-05, |
| "loss": 0.0913, |
| "step": 2194 |
| }, |
| { |
| "epoch": 4.174988112220637, |
| "grad_norm": 0.28685760498046875, |
| "learning_rate": 8.609082248332805e-05, |
| "loss": 0.098, |
| "step": 2195 |
| }, |
| { |
| "epoch": 4.176890156918688, |
| "grad_norm": 0.44503989815711975, |
| "learning_rate": 8.608447126071769e-05, |
| "loss": 0.1441, |
| "step": 2196 |
| }, |
| { |
| "epoch": 4.178792201616738, |
| "grad_norm": 0.4228593409061432, |
| "learning_rate": 8.607812003810734e-05, |
| "loss": 0.1228, |
| "step": 2197 |
| }, |
| { |
| "epoch": 4.180694246314788, |
| "grad_norm": 0.34366467595100403, |
| "learning_rate": 8.607176881549699e-05, |
| "loss": 0.0969, |
| "step": 2198 |
| }, |
| { |
| "epoch": 4.182596291012839, |
| "grad_norm": 0.3302469849586487, |
| "learning_rate": 8.606541759288663e-05, |
| "loss": 0.1093, |
| "step": 2199 |
| }, |
| { |
| "epoch": 4.184498335710889, |
| "grad_norm": 0.316914826631546, |
| "learning_rate": 8.605906637027628e-05, |
| "loss": 0.096, |
| "step": 2200 |
| }, |
| { |
| "epoch": 4.18640038040894, |
| "grad_norm": 0.3100655972957611, |
| "learning_rate": 8.605271514766594e-05, |
| "loss": 0.0902, |
| "step": 2201 |
| }, |
| { |
| "epoch": 4.18830242510699, |
| "grad_norm": 0.2934771776199341, |
| "learning_rate": 8.604636392505557e-05, |
| "loss": 0.1011, |
| "step": 2202 |
| }, |
| { |
| "epoch": 4.190204469805041, |
| "grad_norm": 0.32837802171707153, |
| "learning_rate": 8.604001270244523e-05, |
| "loss": 0.1284, |
| "step": 2203 |
| }, |
| { |
| "epoch": 4.192106514503091, |
| "grad_norm": 0.3842618465423584, |
| "learning_rate": 8.603366147983488e-05, |
| "loss": 0.1072, |
| "step": 2204 |
| }, |
| { |
| "epoch": 4.194008559201142, |
| "grad_norm": 0.29006102681159973, |
| "learning_rate": 8.602731025722453e-05, |
| "loss": 0.0919, |
| "step": 2205 |
| }, |
| { |
| "epoch": 4.195910603899192, |
| "grad_norm": 0.31507110595703125, |
| "learning_rate": 8.602095903461417e-05, |
| "loss": 0.1103, |
| "step": 2206 |
| }, |
| { |
| "epoch": 4.1978126485972425, |
| "grad_norm": 0.35961470007896423, |
| "learning_rate": 8.60146078120038e-05, |
| "loss": 0.1738, |
| "step": 2207 |
| }, |
| { |
| "epoch": 4.1997146932952925, |
| "grad_norm": 0.34587833285331726, |
| "learning_rate": 8.600825658939347e-05, |
| "loss": 0.1096, |
| "step": 2208 |
| }, |
| { |
| "epoch": 4.2016167379933425, |
| "grad_norm": 0.37271326780319214, |
| "learning_rate": 8.600190536678311e-05, |
| "loss": 0.1186, |
| "step": 2209 |
| }, |
| { |
| "epoch": 4.203518782691393, |
| "grad_norm": 0.31880611181259155, |
| "learning_rate": 8.599555414417275e-05, |
| "loss": 0.1046, |
| "step": 2210 |
| }, |
| { |
| "epoch": 4.205420827389443, |
| "grad_norm": 0.28906506299972534, |
| "learning_rate": 8.598920292156241e-05, |
| "loss": 0.0988, |
| "step": 2211 |
| }, |
| { |
| "epoch": 4.207322872087494, |
| "grad_norm": 0.33470967411994934, |
| "learning_rate": 8.598285169895205e-05, |
| "loss": 0.1056, |
| "step": 2212 |
| }, |
| { |
| "epoch": 4.209224916785544, |
| "grad_norm": 0.3186233341693878, |
| "learning_rate": 8.59765004763417e-05, |
| "loss": 0.1203, |
| "step": 2213 |
| }, |
| { |
| "epoch": 4.211126961483595, |
| "grad_norm": 0.3465280532836914, |
| "learning_rate": 8.597014925373134e-05, |
| "loss": 0.1073, |
| "step": 2214 |
| }, |
| { |
| "epoch": 4.213029006181645, |
| "grad_norm": 0.27451473474502563, |
| "learning_rate": 8.596379803112099e-05, |
| "loss": 0.0965, |
| "step": 2215 |
| }, |
| { |
| "epoch": 4.214931050879696, |
| "grad_norm": 0.35004234313964844, |
| "learning_rate": 8.595744680851064e-05, |
| "loss": 0.1003, |
| "step": 2216 |
| }, |
| { |
| "epoch": 4.216833095577746, |
| "grad_norm": 0.36494818329811096, |
| "learning_rate": 8.595109558590028e-05, |
| "loss": 0.1143, |
| "step": 2217 |
| }, |
| { |
| "epoch": 4.218735140275797, |
| "grad_norm": 0.4278135597705841, |
| "learning_rate": 8.594474436328995e-05, |
| "loss": 0.1234, |
| "step": 2218 |
| }, |
| { |
| "epoch": 4.220637184973847, |
| "grad_norm": 0.5124382972717285, |
| "learning_rate": 8.593839314067959e-05, |
| "loss": 0.1158, |
| "step": 2219 |
| }, |
| { |
| "epoch": 4.222539229671897, |
| "grad_norm": 0.39850741624832153, |
| "learning_rate": 8.593204191806923e-05, |
| "loss": 0.1295, |
| "step": 2220 |
| }, |
| { |
| "epoch": 4.224441274369948, |
| "grad_norm": 0.4141925573348999, |
| "learning_rate": 8.592569069545888e-05, |
| "loss": 0.1103, |
| "step": 2221 |
| }, |
| { |
| "epoch": 4.226343319067998, |
| "grad_norm": 0.274980366230011, |
| "learning_rate": 8.591933947284853e-05, |
| "loss": 0.0927, |
| "step": 2222 |
| }, |
| { |
| "epoch": 4.228245363766049, |
| "grad_norm": 0.4274260103702545, |
| "learning_rate": 8.591298825023818e-05, |
| "loss": 0.1248, |
| "step": 2223 |
| }, |
| { |
| "epoch": 4.230147408464099, |
| "grad_norm": 0.39051416516304016, |
| "learning_rate": 8.590663702762782e-05, |
| "loss": 0.1068, |
| "step": 2224 |
| }, |
| { |
| "epoch": 4.2320494531621495, |
| "grad_norm": 0.3913654685020447, |
| "learning_rate": 8.590028580501747e-05, |
| "loss": 0.1212, |
| "step": 2225 |
| }, |
| { |
| "epoch": 4.2339514978601995, |
| "grad_norm": 0.33034393191337585, |
| "learning_rate": 8.589393458240712e-05, |
| "loss": 0.0875, |
| "step": 2226 |
| }, |
| { |
| "epoch": 4.23585354255825, |
| "grad_norm": 0.405618280172348, |
| "learning_rate": 8.588758335979676e-05, |
| "loss": 0.1228, |
| "step": 2227 |
| }, |
| { |
| "epoch": 4.2377555872563, |
| "grad_norm": 0.3220268189907074, |
| "learning_rate": 8.588123213718641e-05, |
| "loss": 0.1046, |
| "step": 2228 |
| }, |
| { |
| "epoch": 4.239657631954351, |
| "grad_norm": 0.32537737488746643, |
| "learning_rate": 8.587488091457606e-05, |
| "loss": 0.0901, |
| "step": 2229 |
| }, |
| { |
| "epoch": 4.241559676652401, |
| "grad_norm": 0.3968732953071594, |
| "learning_rate": 8.58685296919657e-05, |
| "loss": 0.1753, |
| "step": 2230 |
| }, |
| { |
| "epoch": 4.243461721350451, |
| "grad_norm": 0.3441084325313568, |
| "learning_rate": 8.586217846935535e-05, |
| "loss": 0.1181, |
| "step": 2231 |
| }, |
| { |
| "epoch": 4.245363766048502, |
| "grad_norm": 0.4014514684677124, |
| "learning_rate": 8.5855827246745e-05, |
| "loss": 0.1067, |
| "step": 2232 |
| }, |
| { |
| "epoch": 4.247265810746552, |
| "grad_norm": 0.40167930722236633, |
| "learning_rate": 8.584947602413464e-05, |
| "loss": 0.1142, |
| "step": 2233 |
| }, |
| { |
| "epoch": 4.249167855444603, |
| "grad_norm": 0.3604772984981537, |
| "learning_rate": 8.58431248015243e-05, |
| "loss": 0.108, |
| "step": 2234 |
| }, |
| { |
| "epoch": 4.251069900142653, |
| "grad_norm": 0.4210832118988037, |
| "learning_rate": 8.583677357891395e-05, |
| "loss": 0.1161, |
| "step": 2235 |
| }, |
| { |
| "epoch": 4.252971944840704, |
| "grad_norm": 0.34467047452926636, |
| "learning_rate": 8.58304223563036e-05, |
| "loss": 0.1187, |
| "step": 2236 |
| }, |
| { |
| "epoch": 4.254873989538754, |
| "grad_norm": 0.8141130805015564, |
| "learning_rate": 8.582407113369324e-05, |
| "loss": 0.1766, |
| "step": 2237 |
| }, |
| { |
| "epoch": 4.256776034236805, |
| "grad_norm": 0.28791263699531555, |
| "learning_rate": 8.581771991108288e-05, |
| "loss": 0.0953, |
| "step": 2238 |
| }, |
| { |
| "epoch": 4.258678078934855, |
| "grad_norm": 0.2527415454387665, |
| "learning_rate": 8.581136868847254e-05, |
| "loss": 0.0847, |
| "step": 2239 |
| }, |
| { |
| "epoch": 4.260580123632906, |
| "grad_norm": 0.2793647050857544, |
| "learning_rate": 8.580501746586218e-05, |
| "loss": 0.116, |
| "step": 2240 |
| }, |
| { |
| "epoch": 4.262482168330956, |
| "grad_norm": 0.5324682593345642, |
| "learning_rate": 8.579866624325183e-05, |
| "loss": 0.1357, |
| "step": 2241 |
| }, |
| { |
| "epoch": 4.264384213029006, |
| "grad_norm": 0.31979575753211975, |
| "learning_rate": 8.579231502064148e-05, |
| "loss": 0.1004, |
| "step": 2242 |
| }, |
| { |
| "epoch": 4.266286257727057, |
| "grad_norm": 0.453645795583725, |
| "learning_rate": 8.578596379803112e-05, |
| "loss": 0.121, |
| "step": 2243 |
| }, |
| { |
| "epoch": 4.268188302425107, |
| "grad_norm": 0.2688881754875183, |
| "learning_rate": 8.577961257542077e-05, |
| "loss": 0.0935, |
| "step": 2244 |
| }, |
| { |
| "epoch": 4.2700903471231575, |
| "grad_norm": 0.30262473225593567, |
| "learning_rate": 8.577326135281041e-05, |
| "loss": 0.086, |
| "step": 2245 |
| }, |
| { |
| "epoch": 4.2719923918212075, |
| "grad_norm": 0.4076935648918152, |
| "learning_rate": 8.576691013020006e-05, |
| "loss": 0.1075, |
| "step": 2246 |
| }, |
| { |
| "epoch": 4.273894436519258, |
| "grad_norm": 0.5229641199111938, |
| "learning_rate": 8.576055890758972e-05, |
| "loss": 0.1585, |
| "step": 2247 |
| }, |
| { |
| "epoch": 4.275796481217308, |
| "grad_norm": 0.3732607960700989, |
| "learning_rate": 8.575420768497935e-05, |
| "loss": 0.1065, |
| "step": 2248 |
| }, |
| { |
| "epoch": 4.277698525915359, |
| "grad_norm": 0.39624014496803284, |
| "learning_rate": 8.574785646236902e-05, |
| "loss": 0.1229, |
| "step": 2249 |
| }, |
| { |
| "epoch": 4.279600570613409, |
| "grad_norm": 0.47354966402053833, |
| "learning_rate": 8.574150523975866e-05, |
| "loss": 0.1574, |
| "step": 2250 |
| }, |
| { |
| "epoch": 4.28150261531146, |
| "grad_norm": 0.35089337825775146, |
| "learning_rate": 8.57351540171483e-05, |
| "loss": 0.1098, |
| "step": 2251 |
| }, |
| { |
| "epoch": 4.28340466000951, |
| "grad_norm": 0.3599602282047272, |
| "learning_rate": 8.572880279453795e-05, |
| "loss": 0.1136, |
| "step": 2252 |
| }, |
| { |
| "epoch": 4.285306704707561, |
| "grad_norm": 0.4661259949207306, |
| "learning_rate": 8.57224515719276e-05, |
| "loss": 0.1297, |
| "step": 2253 |
| }, |
| { |
| "epoch": 4.287208749405611, |
| "grad_norm": 0.27821779251098633, |
| "learning_rate": 8.571610034931725e-05, |
| "loss": 0.0974, |
| "step": 2254 |
| }, |
| { |
| "epoch": 4.289110794103661, |
| "grad_norm": 0.3892570436000824, |
| "learning_rate": 8.570974912670689e-05, |
| "loss": 0.1362, |
| "step": 2255 |
| }, |
| { |
| "epoch": 4.291012838801712, |
| "grad_norm": 0.3612288534641266, |
| "learning_rate": 8.570339790409654e-05, |
| "loss": 0.121, |
| "step": 2256 |
| }, |
| { |
| "epoch": 4.292914883499762, |
| "grad_norm": 0.3542415499687195, |
| "learning_rate": 8.56970466814862e-05, |
| "loss": 0.1004, |
| "step": 2257 |
| }, |
| { |
| "epoch": 4.294816928197813, |
| "grad_norm": 0.3457956910133362, |
| "learning_rate": 8.569069545887583e-05, |
| "loss": 0.1035, |
| "step": 2258 |
| }, |
| { |
| "epoch": 4.296718972895863, |
| "grad_norm": 0.42984023690223694, |
| "learning_rate": 8.568434423626548e-05, |
| "loss": 0.1236, |
| "step": 2259 |
| }, |
| { |
| "epoch": 4.298621017593914, |
| "grad_norm": 0.3002376854419708, |
| "learning_rate": 8.567799301365514e-05, |
| "loss": 0.0867, |
| "step": 2260 |
| }, |
| { |
| "epoch": 4.300523062291964, |
| "grad_norm": 0.3134646415710449, |
| "learning_rate": 8.567164179104477e-05, |
| "loss": 0.0928, |
| "step": 2261 |
| }, |
| { |
| "epoch": 4.302425106990015, |
| "grad_norm": 0.35177892446517944, |
| "learning_rate": 8.566529056843443e-05, |
| "loss": 0.1072, |
| "step": 2262 |
| }, |
| { |
| "epoch": 4.304327151688065, |
| "grad_norm": 0.40704670548439026, |
| "learning_rate": 8.565893934582408e-05, |
| "loss": 0.1216, |
| "step": 2263 |
| }, |
| { |
| "epoch": 4.3062291963861155, |
| "grad_norm": 0.40002110600471497, |
| "learning_rate": 8.565258812321372e-05, |
| "loss": 0.1153, |
| "step": 2264 |
| }, |
| { |
| "epoch": 4.3081312410841655, |
| "grad_norm": 0.28185611963272095, |
| "learning_rate": 8.564623690060337e-05, |
| "loss": 0.0815, |
| "step": 2265 |
| }, |
| { |
| "epoch": 4.310033285782216, |
| "grad_norm": 0.45204728841781616, |
| "learning_rate": 8.563988567799302e-05, |
| "loss": 0.1285, |
| "step": 2266 |
| }, |
| { |
| "epoch": 4.311935330480266, |
| "grad_norm": 0.39130833745002747, |
| "learning_rate": 8.563353445538267e-05, |
| "loss": 0.1235, |
| "step": 2267 |
| }, |
| { |
| "epoch": 4.313837375178316, |
| "grad_norm": 0.29855722188949585, |
| "learning_rate": 8.562718323277231e-05, |
| "loss": 0.0943, |
| "step": 2268 |
| }, |
| { |
| "epoch": 4.315739419876367, |
| "grad_norm": 0.2964162826538086, |
| "learning_rate": 8.562083201016196e-05, |
| "loss": 0.1056, |
| "step": 2269 |
| }, |
| { |
| "epoch": 4.317641464574417, |
| "grad_norm": 0.3408963978290558, |
| "learning_rate": 8.561448078755161e-05, |
| "loss": 0.1096, |
| "step": 2270 |
| }, |
| { |
| "epoch": 4.319543509272468, |
| "grad_norm": 0.26335135102272034, |
| "learning_rate": 8.560812956494125e-05, |
| "loss": 0.1258, |
| "step": 2271 |
| }, |
| { |
| "epoch": 4.321445553970518, |
| "grad_norm": 0.45781078934669495, |
| "learning_rate": 8.56017783423309e-05, |
| "loss": 0.1441, |
| "step": 2272 |
| }, |
| { |
| "epoch": 4.323347598668569, |
| "grad_norm": 0.30225613713264465, |
| "learning_rate": 8.559542711972056e-05, |
| "loss": 0.0886, |
| "step": 2273 |
| }, |
| { |
| "epoch": 4.325249643366619, |
| "grad_norm": 0.39499637484550476, |
| "learning_rate": 8.55890758971102e-05, |
| "loss": 0.108, |
| "step": 2274 |
| }, |
| { |
| "epoch": 4.32715168806467, |
| "grad_norm": 0.25995761156082153, |
| "learning_rate": 8.558272467449985e-05, |
| "loss": 0.0832, |
| "step": 2275 |
| }, |
| { |
| "epoch": 4.32905373276272, |
| "grad_norm": 0.4667019248008728, |
| "learning_rate": 8.557637345188948e-05, |
| "loss": 0.1376, |
| "step": 2276 |
| }, |
| { |
| "epoch": 4.330955777460771, |
| "grad_norm": 0.6616588830947876, |
| "learning_rate": 8.557002222927915e-05, |
| "loss": 0.1402, |
| "step": 2277 |
| }, |
| { |
| "epoch": 4.332857822158821, |
| "grad_norm": 0.362642765045166, |
| "learning_rate": 8.556367100666879e-05, |
| "loss": 0.1036, |
| "step": 2278 |
| }, |
| { |
| "epoch": 4.334759866856871, |
| "grad_norm": 0.34205347299575806, |
| "learning_rate": 8.555731978405843e-05, |
| "loss": 0.0901, |
| "step": 2279 |
| }, |
| { |
| "epoch": 4.336661911554922, |
| "grad_norm": 0.428653746843338, |
| "learning_rate": 8.555096856144809e-05, |
| "loss": 0.1291, |
| "step": 2280 |
| }, |
| { |
| "epoch": 4.338563956252972, |
| "grad_norm": 0.31291234493255615, |
| "learning_rate": 8.554461733883773e-05, |
| "loss": 0.091, |
| "step": 2281 |
| }, |
| { |
| "epoch": 4.3404660009510225, |
| "grad_norm": 0.33913081884384155, |
| "learning_rate": 8.553826611622737e-05, |
| "loss": 0.0844, |
| "step": 2282 |
| }, |
| { |
| "epoch": 4.3423680456490725, |
| "grad_norm": 0.3302326500415802, |
| "learning_rate": 8.553191489361702e-05, |
| "loss": 0.0894, |
| "step": 2283 |
| }, |
| { |
| "epoch": 4.344270090347123, |
| "grad_norm": 0.39421653747558594, |
| "learning_rate": 8.552556367100667e-05, |
| "loss": 0.1173, |
| "step": 2284 |
| }, |
| { |
| "epoch": 4.346172135045173, |
| "grad_norm": 0.35651376843452454, |
| "learning_rate": 8.551921244839632e-05, |
| "loss": 0.0945, |
| "step": 2285 |
| }, |
| { |
| "epoch": 4.348074179743224, |
| "grad_norm": 0.37059125304222107, |
| "learning_rate": 8.551286122578596e-05, |
| "loss": 0.1223, |
| "step": 2286 |
| }, |
| { |
| "epoch": 4.349976224441274, |
| "grad_norm": 0.31241846084594727, |
| "learning_rate": 8.550651000317561e-05, |
| "loss": 0.1057, |
| "step": 2287 |
| }, |
| { |
| "epoch": 4.351878269139325, |
| "grad_norm": 0.29532214999198914, |
| "learning_rate": 8.550015878056527e-05, |
| "loss": 0.1008, |
| "step": 2288 |
| }, |
| { |
| "epoch": 4.353780313837375, |
| "grad_norm": 0.435973584651947, |
| "learning_rate": 8.54938075579549e-05, |
| "loss": 0.1258, |
| "step": 2289 |
| }, |
| { |
| "epoch": 4.355682358535425, |
| "grad_norm": 0.3240755498409271, |
| "learning_rate": 8.548745633534456e-05, |
| "loss": 0.1383, |
| "step": 2290 |
| }, |
| { |
| "epoch": 4.357584403233476, |
| "grad_norm": 0.3592849373817444, |
| "learning_rate": 8.548110511273421e-05, |
| "loss": 0.118, |
| "step": 2291 |
| }, |
| { |
| "epoch": 4.359486447931526, |
| "grad_norm": 0.3495205342769623, |
| "learning_rate": 8.547475389012385e-05, |
| "loss": 0.1182, |
| "step": 2292 |
| }, |
| { |
| "epoch": 4.361388492629577, |
| "grad_norm": 0.35103073716163635, |
| "learning_rate": 8.54684026675135e-05, |
| "loss": 0.1075, |
| "step": 2293 |
| }, |
| { |
| "epoch": 4.363290537327627, |
| "grad_norm": 0.4233345091342926, |
| "learning_rate": 8.546205144490315e-05, |
| "loss": 0.1111, |
| "step": 2294 |
| }, |
| { |
| "epoch": 4.365192582025678, |
| "grad_norm": 0.3999617099761963, |
| "learning_rate": 8.54557002222928e-05, |
| "loss": 0.1172, |
| "step": 2295 |
| }, |
| { |
| "epoch": 4.367094626723728, |
| "grad_norm": 0.3122519254684448, |
| "learning_rate": 8.544934899968244e-05, |
| "loss": 0.0973, |
| "step": 2296 |
| }, |
| { |
| "epoch": 4.368996671421779, |
| "grad_norm": 0.2844139039516449, |
| "learning_rate": 8.544299777707209e-05, |
| "loss": 0.0972, |
| "step": 2297 |
| }, |
| { |
| "epoch": 4.370898716119829, |
| "grad_norm": 0.3841843008995056, |
| "learning_rate": 8.543664655446174e-05, |
| "loss": 0.1145, |
| "step": 2298 |
| }, |
| { |
| "epoch": 4.37280076081788, |
| "grad_norm": 0.35272732377052307, |
| "learning_rate": 8.543029533185138e-05, |
| "loss": 0.1, |
| "step": 2299 |
| }, |
| { |
| "epoch": 4.37470280551593, |
| "grad_norm": 0.3861033618450165, |
| "learning_rate": 8.542394410924103e-05, |
| "loss": 0.12, |
| "step": 2300 |
| }, |
| { |
| "epoch": 4.37660485021398, |
| "grad_norm": 0.2895589768886566, |
| "learning_rate": 8.541759288663069e-05, |
| "loss": 0.0857, |
| "step": 2301 |
| }, |
| { |
| "epoch": 4.3785068949120305, |
| "grad_norm": 0.4067385792732239, |
| "learning_rate": 8.541124166402032e-05, |
| "loss": 0.114, |
| "step": 2302 |
| }, |
| { |
| "epoch": 4.3804089396100805, |
| "grad_norm": 0.3439483642578125, |
| "learning_rate": 8.540489044140998e-05, |
| "loss": 0.1218, |
| "step": 2303 |
| }, |
| { |
| "epoch": 4.382310984308131, |
| "grad_norm": 0.273703396320343, |
| "learning_rate": 8.539853921879963e-05, |
| "loss": 0.0919, |
| "step": 2304 |
| }, |
| { |
| "epoch": 4.384213029006181, |
| "grad_norm": 0.2975528836250305, |
| "learning_rate": 8.539218799618927e-05, |
| "loss": 0.0786, |
| "step": 2305 |
| }, |
| { |
| "epoch": 4.386115073704232, |
| "grad_norm": 0.3109762370586395, |
| "learning_rate": 8.538583677357892e-05, |
| "loss": 0.1043, |
| "step": 2306 |
| }, |
| { |
| "epoch": 4.388017118402282, |
| "grad_norm": 0.30896326899528503, |
| "learning_rate": 8.537948555096857e-05, |
| "loss": 0.0986, |
| "step": 2307 |
| }, |
| { |
| "epoch": 4.389919163100333, |
| "grad_norm": 0.24300821125507355, |
| "learning_rate": 8.537313432835822e-05, |
| "loss": 0.0821, |
| "step": 2308 |
| }, |
| { |
| "epoch": 4.391821207798383, |
| "grad_norm": 0.2907545566558838, |
| "learning_rate": 8.536678310574786e-05, |
| "loss": 0.0943, |
| "step": 2309 |
| }, |
| { |
| "epoch": 4.393723252496434, |
| "grad_norm": 0.4220617115497589, |
| "learning_rate": 8.53604318831375e-05, |
| "loss": 0.1359, |
| "step": 2310 |
| }, |
| { |
| "epoch": 4.395625297194484, |
| "grad_norm": 0.3436138331890106, |
| "learning_rate": 8.535408066052716e-05, |
| "loss": 0.1106, |
| "step": 2311 |
| }, |
| { |
| "epoch": 4.397527341892534, |
| "grad_norm": 0.36533981561660767, |
| "learning_rate": 8.53477294379168e-05, |
| "loss": 0.1194, |
| "step": 2312 |
| }, |
| { |
| "epoch": 4.399429386590585, |
| "grad_norm": 0.3554334044456482, |
| "learning_rate": 8.534137821530645e-05, |
| "loss": 0.1571, |
| "step": 2313 |
| }, |
| { |
| "epoch": 4.401331431288635, |
| "grad_norm": 0.3670365512371063, |
| "learning_rate": 8.53350269926961e-05, |
| "loss": 0.1299, |
| "step": 2314 |
| }, |
| { |
| "epoch": 4.403233475986686, |
| "grad_norm": 0.4539790451526642, |
| "learning_rate": 8.532867577008574e-05, |
| "loss": 0.1348, |
| "step": 2315 |
| }, |
| { |
| "epoch": 4.405135520684736, |
| "grad_norm": 0.29808804392814636, |
| "learning_rate": 8.53223245474754e-05, |
| "loss": 0.1046, |
| "step": 2316 |
| }, |
| { |
| "epoch": 4.407037565382787, |
| "grad_norm": 0.3486464321613312, |
| "learning_rate": 8.531597332486503e-05, |
| "loss": 0.1047, |
| "step": 2317 |
| }, |
| { |
| "epoch": 4.408939610080837, |
| "grad_norm": 0.2947161793708801, |
| "learning_rate": 8.530962210225469e-05, |
| "loss": 0.0814, |
| "step": 2318 |
| }, |
| { |
| "epoch": 4.410841654778888, |
| "grad_norm": 0.3321152627468109, |
| "learning_rate": 8.530327087964434e-05, |
| "loss": 0.1068, |
| "step": 2319 |
| }, |
| { |
| "epoch": 4.412743699476938, |
| "grad_norm": 0.2441323846578598, |
| "learning_rate": 8.529691965703398e-05, |
| "loss": 0.0813, |
| "step": 2320 |
| }, |
| { |
| "epoch": 4.4146457441749885, |
| "grad_norm": 0.37151622772216797, |
| "learning_rate": 8.529056843442364e-05, |
| "loss": 0.0995, |
| "step": 2321 |
| }, |
| { |
| "epoch": 4.4165477888730384, |
| "grad_norm": 0.330240398645401, |
| "learning_rate": 8.528421721181328e-05, |
| "loss": 0.0999, |
| "step": 2322 |
| }, |
| { |
| "epoch": 4.418449833571089, |
| "grad_norm": 0.38048794865608215, |
| "learning_rate": 8.527786598920292e-05, |
| "loss": 0.1065, |
| "step": 2323 |
| }, |
| { |
| "epoch": 4.420351878269139, |
| "grad_norm": 0.3825136423110962, |
| "learning_rate": 8.527151476659257e-05, |
| "loss": 0.1021, |
| "step": 2324 |
| }, |
| { |
| "epoch": 4.422253922967189, |
| "grad_norm": 0.3410681486129761, |
| "learning_rate": 8.526516354398222e-05, |
| "loss": 0.0899, |
| "step": 2325 |
| }, |
| { |
| "epoch": 4.42415596766524, |
| "grad_norm": 0.33466002345085144, |
| "learning_rate": 8.525881232137187e-05, |
| "loss": 0.1051, |
| "step": 2326 |
| }, |
| { |
| "epoch": 4.42605801236329, |
| "grad_norm": 0.3932620584964752, |
| "learning_rate": 8.525246109876151e-05, |
| "loss": 0.1156, |
| "step": 2327 |
| }, |
| { |
| "epoch": 4.427960057061341, |
| "grad_norm": 0.31098031997680664, |
| "learning_rate": 8.524610987615116e-05, |
| "loss": 0.1026, |
| "step": 2328 |
| }, |
| { |
| "epoch": 4.429862101759391, |
| "grad_norm": 0.3773583471775055, |
| "learning_rate": 8.523975865354082e-05, |
| "loss": 0.1113, |
| "step": 2329 |
| }, |
| { |
| "epoch": 4.431764146457442, |
| "grad_norm": 0.33763033151626587, |
| "learning_rate": 8.523340743093045e-05, |
| "loss": 0.0941, |
| "step": 2330 |
| }, |
| { |
| "epoch": 4.433666191155492, |
| "grad_norm": 0.23584803938865662, |
| "learning_rate": 8.52270562083201e-05, |
| "loss": 0.0777, |
| "step": 2331 |
| }, |
| { |
| "epoch": 4.435568235853543, |
| "grad_norm": 0.3598161041736603, |
| "learning_rate": 8.522070498570976e-05, |
| "loss": 0.1173, |
| "step": 2332 |
| }, |
| { |
| "epoch": 4.437470280551593, |
| "grad_norm": 0.3960074484348297, |
| "learning_rate": 8.52143537630994e-05, |
| "loss": 0.119, |
| "step": 2333 |
| }, |
| { |
| "epoch": 4.439372325249644, |
| "grad_norm": 0.3260672092437744, |
| "learning_rate": 8.520800254048905e-05, |
| "loss": 0.1107, |
| "step": 2334 |
| }, |
| { |
| "epoch": 4.441274369947694, |
| "grad_norm": 0.3651185929775238, |
| "learning_rate": 8.52016513178787e-05, |
| "loss": 0.0993, |
| "step": 2335 |
| }, |
| { |
| "epoch": 4.443176414645745, |
| "grad_norm": 0.39154887199401855, |
| "learning_rate": 8.519530009526834e-05, |
| "loss": 0.1168, |
| "step": 2336 |
| }, |
| { |
| "epoch": 4.445078459343795, |
| "grad_norm": 0.3429001569747925, |
| "learning_rate": 8.518894887265799e-05, |
| "loss": 0.1111, |
| "step": 2337 |
| }, |
| { |
| "epoch": 4.446980504041845, |
| "grad_norm": 0.3407055735588074, |
| "learning_rate": 8.518259765004764e-05, |
| "loss": 0.1032, |
| "step": 2338 |
| }, |
| { |
| "epoch": 4.4488825487398955, |
| "grad_norm": 0.3813023567199707, |
| "learning_rate": 8.517624642743729e-05, |
| "loss": 0.1077, |
| "step": 2339 |
| }, |
| { |
| "epoch": 4.4507845934379455, |
| "grad_norm": 0.2836807370185852, |
| "learning_rate": 8.516989520482693e-05, |
| "loss": 0.0833, |
| "step": 2340 |
| }, |
| { |
| "epoch": 4.452686638135996, |
| "grad_norm": 0.4083840250968933, |
| "learning_rate": 8.516354398221657e-05, |
| "loss": 0.1254, |
| "step": 2341 |
| }, |
| { |
| "epoch": 4.454588682834046, |
| "grad_norm": 0.29835161566734314, |
| "learning_rate": 8.515719275960623e-05, |
| "loss": 0.1207, |
| "step": 2342 |
| }, |
| { |
| "epoch": 4.456490727532097, |
| "grad_norm": 0.30677247047424316, |
| "learning_rate": 8.515084153699587e-05, |
| "loss": 0.0807, |
| "step": 2343 |
| }, |
| { |
| "epoch": 4.458392772230147, |
| "grad_norm": 0.312853068113327, |
| "learning_rate": 8.514449031438552e-05, |
| "loss": 0.1174, |
| "step": 2344 |
| }, |
| { |
| "epoch": 4.460294816928198, |
| "grad_norm": 0.431356281042099, |
| "learning_rate": 8.513813909177518e-05, |
| "loss": 0.1324, |
| "step": 2345 |
| }, |
| { |
| "epoch": 4.462196861626248, |
| "grad_norm": 0.2785525918006897, |
| "learning_rate": 8.513178786916482e-05, |
| "loss": 0.1025, |
| "step": 2346 |
| }, |
| { |
| "epoch": 4.464098906324299, |
| "grad_norm": 0.2919105291366577, |
| "learning_rate": 8.512543664655447e-05, |
| "loss": 0.1154, |
| "step": 2347 |
| }, |
| { |
| "epoch": 4.466000951022349, |
| "grad_norm": 0.4356403350830078, |
| "learning_rate": 8.51190854239441e-05, |
| "loss": 0.1161, |
| "step": 2348 |
| }, |
| { |
| "epoch": 4.467902995720399, |
| "grad_norm": 0.3411230146884918, |
| "learning_rate": 8.511273420133377e-05, |
| "loss": 0.1032, |
| "step": 2349 |
| }, |
| { |
| "epoch": 4.46980504041845, |
| "grad_norm": 0.3335597515106201, |
| "learning_rate": 8.510638297872341e-05, |
| "loss": 0.1427, |
| "step": 2350 |
| }, |
| { |
| "epoch": 4.4717070851165, |
| "grad_norm": 0.3813069760799408, |
| "learning_rate": 8.510003175611305e-05, |
| "loss": 0.1214, |
| "step": 2351 |
| }, |
| { |
| "epoch": 4.473609129814551, |
| "grad_norm": 0.2616579830646515, |
| "learning_rate": 8.509368053350271e-05, |
| "loss": 0.0914, |
| "step": 2352 |
| }, |
| { |
| "epoch": 4.475511174512601, |
| "grad_norm": 0.24161195755004883, |
| "learning_rate": 8.508732931089235e-05, |
| "loss": 0.0806, |
| "step": 2353 |
| }, |
| { |
| "epoch": 4.477413219210652, |
| "grad_norm": 0.41089168190956116, |
| "learning_rate": 8.508097808828199e-05, |
| "loss": 0.1095, |
| "step": 2354 |
| }, |
| { |
| "epoch": 4.479315263908702, |
| "grad_norm": 0.2930002510547638, |
| "learning_rate": 8.507462686567164e-05, |
| "loss": 0.0851, |
| "step": 2355 |
| }, |
| { |
| "epoch": 4.481217308606753, |
| "grad_norm": 0.38217440247535706, |
| "learning_rate": 8.506827564306129e-05, |
| "loss": 0.106, |
| "step": 2356 |
| }, |
| { |
| "epoch": 4.483119353304803, |
| "grad_norm": 0.4617588520050049, |
| "learning_rate": 8.506192442045094e-05, |
| "loss": 0.1269, |
| "step": 2357 |
| }, |
| { |
| "epoch": 4.4850213980028535, |
| "grad_norm": 0.33491015434265137, |
| "learning_rate": 8.505557319784058e-05, |
| "loss": 0.1086, |
| "step": 2358 |
| }, |
| { |
| "epoch": 4.4869234427009035, |
| "grad_norm": 0.31024834513664246, |
| "learning_rate": 8.504922197523023e-05, |
| "loss": 0.1039, |
| "step": 2359 |
| }, |
| { |
| "epoch": 4.4888254873989535, |
| "grad_norm": 0.36780717968940735, |
| "learning_rate": 8.504287075261989e-05, |
| "loss": 0.1102, |
| "step": 2360 |
| }, |
| { |
| "epoch": 4.490727532097004, |
| "grad_norm": 0.40606439113616943, |
| "learning_rate": 8.503651953000952e-05, |
| "loss": 0.13, |
| "step": 2361 |
| }, |
| { |
| "epoch": 4.492629576795054, |
| "grad_norm": 0.4511033296585083, |
| "learning_rate": 8.503016830739918e-05, |
| "loss": 0.1182, |
| "step": 2362 |
| }, |
| { |
| "epoch": 4.494531621493105, |
| "grad_norm": 0.36328256130218506, |
| "learning_rate": 8.502381708478883e-05, |
| "loss": 0.1024, |
| "step": 2363 |
| }, |
| { |
| "epoch": 4.496433666191155, |
| "grad_norm": 0.3860591650009155, |
| "learning_rate": 8.501746586217847e-05, |
| "loss": 0.1019, |
| "step": 2364 |
| }, |
| { |
| "epoch": 4.498335710889206, |
| "grad_norm": 0.46222564578056335, |
| "learning_rate": 8.501111463956812e-05, |
| "loss": 0.1132, |
| "step": 2365 |
| }, |
| { |
| "epoch": 4.500237755587256, |
| "grad_norm": 0.3612005412578583, |
| "learning_rate": 8.500476341695777e-05, |
| "loss": 0.0963, |
| "step": 2366 |
| }, |
| { |
| "epoch": 4.502139800285307, |
| "grad_norm": 0.43513086438179016, |
| "learning_rate": 8.499841219434742e-05, |
| "loss": 0.1109, |
| "step": 2367 |
| }, |
| { |
| "epoch": 4.504041844983357, |
| "grad_norm": 0.2950316071510315, |
| "learning_rate": 8.499206097173706e-05, |
| "loss": 0.1124, |
| "step": 2368 |
| }, |
| { |
| "epoch": 4.505943889681408, |
| "grad_norm": 0.36488962173461914, |
| "learning_rate": 8.498570974912671e-05, |
| "loss": 0.1, |
| "step": 2369 |
| }, |
| { |
| "epoch": 4.507845934379458, |
| "grad_norm": 0.3592323064804077, |
| "learning_rate": 8.497935852651636e-05, |
| "loss": 0.0995, |
| "step": 2370 |
| }, |
| { |
| "epoch": 4.509747979077508, |
| "grad_norm": 0.34753555059432983, |
| "learning_rate": 8.4973007303906e-05, |
| "loss": 0.1026, |
| "step": 2371 |
| }, |
| { |
| "epoch": 4.511650023775559, |
| "grad_norm": 0.39495691657066345, |
| "learning_rate": 8.496665608129565e-05, |
| "loss": 0.1272, |
| "step": 2372 |
| }, |
| { |
| "epoch": 4.513552068473609, |
| "grad_norm": 0.3553752601146698, |
| "learning_rate": 8.49603048586853e-05, |
| "loss": 0.1136, |
| "step": 2373 |
| }, |
| { |
| "epoch": 4.51545411317166, |
| "grad_norm": 0.37848785519599915, |
| "learning_rate": 8.495395363607494e-05, |
| "loss": 0.1069, |
| "step": 2374 |
| }, |
| { |
| "epoch": 4.51735615786971, |
| "grad_norm": 0.33565762639045715, |
| "learning_rate": 8.49476024134646e-05, |
| "loss": 0.1075, |
| "step": 2375 |
| }, |
| { |
| "epoch": 4.519258202567761, |
| "grad_norm": 0.3359149694442749, |
| "learning_rate": 8.494125119085425e-05, |
| "loss": 0.098, |
| "step": 2376 |
| }, |
| { |
| "epoch": 4.521160247265811, |
| "grad_norm": 0.3218232989311218, |
| "learning_rate": 8.493489996824389e-05, |
| "loss": 0.096, |
| "step": 2377 |
| }, |
| { |
| "epoch": 4.5230622919638614, |
| "grad_norm": 0.3153054714202881, |
| "learning_rate": 8.492854874563354e-05, |
| "loss": 0.1015, |
| "step": 2378 |
| }, |
| { |
| "epoch": 4.5249643366619114, |
| "grad_norm": 0.37637823820114136, |
| "learning_rate": 8.492219752302319e-05, |
| "loss": 0.1164, |
| "step": 2379 |
| }, |
| { |
| "epoch": 4.526866381359962, |
| "grad_norm": 0.3270327150821686, |
| "learning_rate": 8.491584630041284e-05, |
| "loss": 0.1084, |
| "step": 2380 |
| }, |
| { |
| "epoch": 4.528768426058012, |
| "grad_norm": 0.23998558521270752, |
| "learning_rate": 8.490949507780248e-05, |
| "loss": 0.0777, |
| "step": 2381 |
| }, |
| { |
| "epoch": 4.530670470756062, |
| "grad_norm": 0.31294015049934387, |
| "learning_rate": 8.490314385519212e-05, |
| "loss": 0.0807, |
| "step": 2382 |
| }, |
| { |
| "epoch": 4.532572515454113, |
| "grad_norm": 0.3305555582046509, |
| "learning_rate": 8.489679263258178e-05, |
| "loss": 0.1011, |
| "step": 2383 |
| }, |
| { |
| "epoch": 4.534474560152163, |
| "grad_norm": 0.35641244053840637, |
| "learning_rate": 8.489044140997142e-05, |
| "loss": 0.11, |
| "step": 2384 |
| }, |
| { |
| "epoch": 4.536376604850214, |
| "grad_norm": 0.3511948883533478, |
| "learning_rate": 8.488409018736107e-05, |
| "loss": 0.1009, |
| "step": 2385 |
| }, |
| { |
| "epoch": 4.538278649548264, |
| "grad_norm": 0.3899917006492615, |
| "learning_rate": 8.487773896475071e-05, |
| "loss": 0.1285, |
| "step": 2386 |
| }, |
| { |
| "epoch": 4.540180694246315, |
| "grad_norm": 0.4415057897567749, |
| "learning_rate": 8.487138774214036e-05, |
| "loss": 0.1434, |
| "step": 2387 |
| }, |
| { |
| "epoch": 4.542082738944365, |
| "grad_norm": 0.42669907212257385, |
| "learning_rate": 8.486503651953002e-05, |
| "loss": 0.1201, |
| "step": 2388 |
| }, |
| { |
| "epoch": 4.543984783642416, |
| "grad_norm": 0.27351129055023193, |
| "learning_rate": 8.485868529691965e-05, |
| "loss": 0.0761, |
| "step": 2389 |
| }, |
| { |
| "epoch": 4.545886828340466, |
| "grad_norm": 0.31243595480918884, |
| "learning_rate": 8.48523340743093e-05, |
| "loss": 0.0909, |
| "step": 2390 |
| }, |
| { |
| "epoch": 4.547788873038517, |
| "grad_norm": 0.36273542046546936, |
| "learning_rate": 8.484598285169896e-05, |
| "loss": 0.1156, |
| "step": 2391 |
| }, |
| { |
| "epoch": 4.549690917736567, |
| "grad_norm": 0.3167242109775543, |
| "learning_rate": 8.48396316290886e-05, |
| "loss": 0.2065, |
| "step": 2392 |
| }, |
| { |
| "epoch": 4.551592962434617, |
| "grad_norm": 0.3072797358036041, |
| "learning_rate": 8.483328040647825e-05, |
| "loss": 0.0939, |
| "step": 2393 |
| }, |
| { |
| "epoch": 4.553495007132668, |
| "grad_norm": 0.32601553201675415, |
| "learning_rate": 8.48269291838679e-05, |
| "loss": 0.1052, |
| "step": 2394 |
| }, |
| { |
| "epoch": 4.555397051830718, |
| "grad_norm": 0.41232773661613464, |
| "learning_rate": 8.482057796125754e-05, |
| "loss": 0.1207, |
| "step": 2395 |
| }, |
| { |
| "epoch": 4.5572990965287685, |
| "grad_norm": 0.46499213576316833, |
| "learning_rate": 8.481422673864719e-05, |
| "loss": 0.1251, |
| "step": 2396 |
| }, |
| { |
| "epoch": 4.5592011412268185, |
| "grad_norm": 0.3984009325504303, |
| "learning_rate": 8.480787551603684e-05, |
| "loss": 0.1317, |
| "step": 2397 |
| }, |
| { |
| "epoch": 4.561103185924869, |
| "grad_norm": 0.3825131356716156, |
| "learning_rate": 8.48015242934265e-05, |
| "loss": 0.1273, |
| "step": 2398 |
| }, |
| { |
| "epoch": 4.563005230622919, |
| "grad_norm": 0.39657148718833923, |
| "learning_rate": 8.479517307081613e-05, |
| "loss": 0.145, |
| "step": 2399 |
| }, |
| { |
| "epoch": 4.56490727532097, |
| "grad_norm": 0.3764631748199463, |
| "learning_rate": 8.478882184820578e-05, |
| "loss": 0.1133, |
| "step": 2400 |
| }, |
| { |
| "epoch": 4.56680932001902, |
| "grad_norm": 0.2968275249004364, |
| "learning_rate": 8.478247062559544e-05, |
| "loss": 0.0885, |
| "step": 2401 |
| }, |
| { |
| "epoch": 4.568711364717071, |
| "grad_norm": 0.326856791973114, |
| "learning_rate": 8.477611940298507e-05, |
| "loss": 0.0923, |
| "step": 2402 |
| }, |
| { |
| "epoch": 4.570613409415121, |
| "grad_norm": 0.38287606835365295, |
| "learning_rate": 8.476976818037473e-05, |
| "loss": 0.141, |
| "step": 2403 |
| }, |
| { |
| "epoch": 4.572515454113171, |
| "grad_norm": 0.47493815422058105, |
| "learning_rate": 8.476341695776438e-05, |
| "loss": 0.1146, |
| "step": 2404 |
| }, |
| { |
| "epoch": 4.574417498811222, |
| "grad_norm": 0.35078614950180054, |
| "learning_rate": 8.475706573515402e-05, |
| "loss": 0.1153, |
| "step": 2405 |
| }, |
| { |
| "epoch": 4.576319543509273, |
| "grad_norm": 0.3837313950061798, |
| "learning_rate": 8.475071451254367e-05, |
| "loss": 0.1408, |
| "step": 2406 |
| }, |
| { |
| "epoch": 4.578221588207323, |
| "grad_norm": 0.3800102472305298, |
| "learning_rate": 8.474436328993332e-05, |
| "loss": 0.1224, |
| "step": 2407 |
| }, |
| { |
| "epoch": 4.580123632905373, |
| "grad_norm": 0.40831804275512695, |
| "learning_rate": 8.473801206732296e-05, |
| "loss": 0.1283, |
| "step": 2408 |
| }, |
| { |
| "epoch": 4.582025677603424, |
| "grad_norm": 0.34854429960250854, |
| "learning_rate": 8.473166084471261e-05, |
| "loss": 0.101, |
| "step": 2409 |
| }, |
| { |
| "epoch": 4.583927722301474, |
| "grad_norm": 0.3317374885082245, |
| "learning_rate": 8.472530962210226e-05, |
| "loss": 0.0986, |
| "step": 2410 |
| }, |
| { |
| "epoch": 4.585829766999525, |
| "grad_norm": 0.3316230773925781, |
| "learning_rate": 8.471895839949191e-05, |
| "loss": 0.0955, |
| "step": 2411 |
| }, |
| { |
| "epoch": 4.587731811697575, |
| "grad_norm": 0.3458825945854187, |
| "learning_rate": 8.471260717688155e-05, |
| "loss": 0.1246, |
| "step": 2412 |
| }, |
| { |
| "epoch": 4.589633856395626, |
| "grad_norm": 0.2985215187072754, |
| "learning_rate": 8.470625595427119e-05, |
| "loss": 0.0904, |
| "step": 2413 |
| }, |
| { |
| "epoch": 4.591535901093676, |
| "grad_norm": 0.5128130912780762, |
| "learning_rate": 8.469990473166086e-05, |
| "loss": 0.1119, |
| "step": 2414 |
| }, |
| { |
| "epoch": 4.5934379457917265, |
| "grad_norm": 0.3538981080055237, |
| "learning_rate": 8.46935535090505e-05, |
| "loss": 0.1276, |
| "step": 2415 |
| }, |
| { |
| "epoch": 4.5953399904897765, |
| "grad_norm": 0.24112893640995026, |
| "learning_rate": 8.468720228644015e-05, |
| "loss": 0.0813, |
| "step": 2416 |
| }, |
| { |
| "epoch": 4.597242035187827, |
| "grad_norm": 0.34151947498321533, |
| "learning_rate": 8.46808510638298e-05, |
| "loss": 0.1214, |
| "step": 2417 |
| }, |
| { |
| "epoch": 4.599144079885877, |
| "grad_norm": 0.3011094629764557, |
| "learning_rate": 8.467449984121944e-05, |
| "loss": 0.0955, |
| "step": 2418 |
| }, |
| { |
| "epoch": 4.601046124583927, |
| "grad_norm": 0.45026248693466187, |
| "learning_rate": 8.466814861860909e-05, |
| "loss": 0.1309, |
| "step": 2419 |
| }, |
| { |
| "epoch": 4.602948169281978, |
| "grad_norm": 0.38199952244758606, |
| "learning_rate": 8.466179739599873e-05, |
| "loss": 0.1229, |
| "step": 2420 |
| }, |
| { |
| "epoch": 4.604850213980028, |
| "grad_norm": 0.44846484065055847, |
| "learning_rate": 8.465544617338839e-05, |
| "loss": 0.1254, |
| "step": 2421 |
| }, |
| { |
| "epoch": 4.606752258678079, |
| "grad_norm": 0.29512494802474976, |
| "learning_rate": 8.464909495077803e-05, |
| "loss": 0.0874, |
| "step": 2422 |
| }, |
| { |
| "epoch": 4.608654303376129, |
| "grad_norm": 0.34601306915283203, |
| "learning_rate": 8.464274372816767e-05, |
| "loss": 0.0928, |
| "step": 2423 |
| }, |
| { |
| "epoch": 4.61055634807418, |
| "grad_norm": 0.4081529378890991, |
| "learning_rate": 8.463639250555733e-05, |
| "loss": 0.1161, |
| "step": 2424 |
| }, |
| { |
| "epoch": 4.61245839277223, |
| "grad_norm": 0.39208075404167175, |
| "learning_rate": 8.463004128294697e-05, |
| "loss": 0.1124, |
| "step": 2425 |
| }, |
| { |
| "epoch": 4.614360437470281, |
| "grad_norm": 0.2740732431411743, |
| "learning_rate": 8.462369006033661e-05, |
| "loss": 0.0698, |
| "step": 2426 |
| }, |
| { |
| "epoch": 4.616262482168331, |
| "grad_norm": 0.37493231892585754, |
| "learning_rate": 8.461733883772626e-05, |
| "loss": 0.089, |
| "step": 2427 |
| }, |
| { |
| "epoch": 4.618164526866382, |
| "grad_norm": 0.4912300407886505, |
| "learning_rate": 8.461098761511591e-05, |
| "loss": 0.1374, |
| "step": 2428 |
| }, |
| { |
| "epoch": 4.620066571564432, |
| "grad_norm": 0.44587963819503784, |
| "learning_rate": 8.460463639250557e-05, |
| "loss": 0.1207, |
| "step": 2429 |
| }, |
| { |
| "epoch": 4.621968616262482, |
| "grad_norm": 0.4140859544277191, |
| "learning_rate": 8.45982851698952e-05, |
| "loss": 0.1333, |
| "step": 2430 |
| }, |
| { |
| "epoch": 4.623870660960533, |
| "grad_norm": 0.3500138223171234, |
| "learning_rate": 8.459193394728486e-05, |
| "loss": 0.1032, |
| "step": 2431 |
| }, |
| { |
| "epoch": 4.625772705658583, |
| "grad_norm": 0.3875083327293396, |
| "learning_rate": 8.458558272467451e-05, |
| "loss": 0.1018, |
| "step": 2432 |
| }, |
| { |
| "epoch": 4.627674750356634, |
| "grad_norm": 0.5065046548843384, |
| "learning_rate": 8.457923150206415e-05, |
| "loss": 0.125, |
| "step": 2433 |
| }, |
| { |
| "epoch": 4.629576795054684, |
| "grad_norm": 0.2707502841949463, |
| "learning_rate": 8.45728802794538e-05, |
| "loss": 0.1002, |
| "step": 2434 |
| }, |
| { |
| "epoch": 4.6314788397527344, |
| "grad_norm": 0.38502418994903564, |
| "learning_rate": 8.456652905684345e-05, |
| "loss": 0.1264, |
| "step": 2435 |
| }, |
| { |
| "epoch": 4.633380884450784, |
| "grad_norm": 0.34822702407836914, |
| "learning_rate": 8.456017783423309e-05, |
| "loss": 0.1184, |
| "step": 2436 |
| }, |
| { |
| "epoch": 4.635282929148835, |
| "grad_norm": 0.33620592951774597, |
| "learning_rate": 8.455382661162274e-05, |
| "loss": 0.1264, |
| "step": 2437 |
| }, |
| { |
| "epoch": 4.637184973846885, |
| "grad_norm": 0.3064115345478058, |
| "learning_rate": 8.454747538901239e-05, |
| "loss": 0.1122, |
| "step": 2438 |
| }, |
| { |
| "epoch": 4.639087018544936, |
| "grad_norm": 0.34428808093070984, |
| "learning_rate": 8.454112416640204e-05, |
| "loss": 0.1083, |
| "step": 2439 |
| }, |
| { |
| "epoch": 4.640989063242986, |
| "grad_norm": 0.3312735855579376, |
| "learning_rate": 8.453477294379168e-05, |
| "loss": 0.1046, |
| "step": 2440 |
| }, |
| { |
| "epoch": 4.642891107941036, |
| "grad_norm": 0.42405757308006287, |
| "learning_rate": 8.452842172118133e-05, |
| "loss": 0.1364, |
| "step": 2441 |
| }, |
| { |
| "epoch": 4.644793152639087, |
| "grad_norm": 0.39682331681251526, |
| "learning_rate": 8.452207049857099e-05, |
| "loss": 0.1262, |
| "step": 2442 |
| }, |
| { |
| "epoch": 4.646695197337137, |
| "grad_norm": 0.3447044789791107, |
| "learning_rate": 8.451571927596062e-05, |
| "loss": 0.1158, |
| "step": 2443 |
| }, |
| { |
| "epoch": 4.648597242035188, |
| "grad_norm": 0.40121355652809143, |
| "learning_rate": 8.450936805335026e-05, |
| "loss": 0.1246, |
| "step": 2444 |
| }, |
| { |
| "epoch": 4.650499286733238, |
| "grad_norm": 0.3898472785949707, |
| "learning_rate": 8.450301683073993e-05, |
| "loss": 0.1244, |
| "step": 2445 |
| }, |
| { |
| "epoch": 4.652401331431289, |
| "grad_norm": 0.2964152991771698, |
| "learning_rate": 8.449666560812957e-05, |
| "loss": 0.0925, |
| "step": 2446 |
| }, |
| { |
| "epoch": 4.654303376129339, |
| "grad_norm": 0.2836705446243286, |
| "learning_rate": 8.449031438551922e-05, |
| "loss": 0.101, |
| "step": 2447 |
| }, |
| { |
| "epoch": 4.65620542082739, |
| "grad_norm": 0.3003692030906677, |
| "learning_rate": 8.448396316290887e-05, |
| "loss": 0.0922, |
| "step": 2448 |
| }, |
| { |
| "epoch": 4.65810746552544, |
| "grad_norm": 0.5348609089851379, |
| "learning_rate": 8.447761194029851e-05, |
| "loss": 0.1735, |
| "step": 2449 |
| }, |
| { |
| "epoch": 4.660009510223491, |
| "grad_norm": 0.3387379050254822, |
| "learning_rate": 8.447126071768816e-05, |
| "loss": 0.1126, |
| "step": 2450 |
| }, |
| { |
| "epoch": 4.661911554921541, |
| "grad_norm": 0.30646830797195435, |
| "learning_rate": 8.44649094950778e-05, |
| "loss": 0.085, |
| "step": 2451 |
| }, |
| { |
| "epoch": 4.663813599619591, |
| "grad_norm": 0.34434470534324646, |
| "learning_rate": 8.445855827246746e-05, |
| "loss": 0.1113, |
| "step": 2452 |
| }, |
| { |
| "epoch": 4.6657156443176415, |
| "grad_norm": 0.38273414969444275, |
| "learning_rate": 8.44522070498571e-05, |
| "loss": 0.1135, |
| "step": 2453 |
| }, |
| { |
| "epoch": 4.6676176890156915, |
| "grad_norm": 0.44843336939811707, |
| "learning_rate": 8.444585582724674e-05, |
| "loss": 0.1497, |
| "step": 2454 |
| }, |
| { |
| "epoch": 4.669519733713742, |
| "grad_norm": 0.4575416147708893, |
| "learning_rate": 8.44395046046364e-05, |
| "loss": 0.1082, |
| "step": 2455 |
| }, |
| { |
| "epoch": 4.671421778411792, |
| "grad_norm": 0.38473185896873474, |
| "learning_rate": 8.443315338202604e-05, |
| "loss": 0.1255, |
| "step": 2456 |
| }, |
| { |
| "epoch": 4.673323823109843, |
| "grad_norm": 0.3839578926563263, |
| "learning_rate": 8.44268021594157e-05, |
| "loss": 0.1106, |
| "step": 2457 |
| }, |
| { |
| "epoch": 4.675225867807893, |
| "grad_norm": 0.35472893714904785, |
| "learning_rate": 8.442045093680533e-05, |
| "loss": 0.1122, |
| "step": 2458 |
| }, |
| { |
| "epoch": 4.677127912505944, |
| "grad_norm": 0.34224382042884827, |
| "learning_rate": 8.441409971419499e-05, |
| "loss": 0.0963, |
| "step": 2459 |
| }, |
| { |
| "epoch": 4.679029957203994, |
| "grad_norm": 0.3992440104484558, |
| "learning_rate": 8.440774849158464e-05, |
| "loss": 0.1234, |
| "step": 2460 |
| }, |
| { |
| "epoch": 4.680932001902045, |
| "grad_norm": 0.39441943168640137, |
| "learning_rate": 8.440139726897428e-05, |
| "loss": 0.11, |
| "step": 2461 |
| }, |
| { |
| "epoch": 4.682834046600095, |
| "grad_norm": 0.43852171301841736, |
| "learning_rate": 8.439504604636393e-05, |
| "loss": 0.1361, |
| "step": 2462 |
| }, |
| { |
| "epoch": 4.684736091298145, |
| "grad_norm": 0.35047483444213867, |
| "learning_rate": 8.438869482375358e-05, |
| "loss": 0.0981, |
| "step": 2463 |
| }, |
| { |
| "epoch": 4.686638135996196, |
| "grad_norm": 0.3970755934715271, |
| "learning_rate": 8.438234360114322e-05, |
| "loss": 0.1196, |
| "step": 2464 |
| }, |
| { |
| "epoch": 4.688540180694246, |
| "grad_norm": 0.2760510742664337, |
| "learning_rate": 8.437599237853287e-05, |
| "loss": 0.1035, |
| "step": 2465 |
| }, |
| { |
| "epoch": 4.690442225392297, |
| "grad_norm": 0.26530909538269043, |
| "learning_rate": 8.436964115592252e-05, |
| "loss": 0.1589, |
| "step": 2466 |
| }, |
| { |
| "epoch": 4.692344270090347, |
| "grad_norm": 0.2989928126335144, |
| "learning_rate": 8.436328993331216e-05, |
| "loss": 0.0945, |
| "step": 2467 |
| }, |
| { |
| "epoch": 4.694246314788398, |
| "grad_norm": 0.42447128891944885, |
| "learning_rate": 8.435693871070181e-05, |
| "loss": 0.1433, |
| "step": 2468 |
| }, |
| { |
| "epoch": 4.696148359486448, |
| "grad_norm": 0.4014334976673126, |
| "learning_rate": 8.435058748809146e-05, |
| "loss": 0.1242, |
| "step": 2469 |
| }, |
| { |
| "epoch": 4.698050404184499, |
| "grad_norm": 0.3872852921485901, |
| "learning_rate": 8.434423626548111e-05, |
| "loss": 0.1195, |
| "step": 2470 |
| }, |
| { |
| "epoch": 4.699952448882549, |
| "grad_norm": 0.3857705891132355, |
| "learning_rate": 8.433788504287075e-05, |
| "loss": 0.108, |
| "step": 2471 |
| }, |
| { |
| "epoch": 4.7018544935805995, |
| "grad_norm": 0.3534420430660248, |
| "learning_rate": 8.43315338202604e-05, |
| "loss": 0.1218, |
| "step": 2472 |
| }, |
| { |
| "epoch": 4.7037565382786495, |
| "grad_norm": 0.32009604573249817, |
| "learning_rate": 8.432518259765006e-05, |
| "loss": 0.1053, |
| "step": 2473 |
| }, |
| { |
| "epoch": 4.7056585829766995, |
| "grad_norm": 0.2501387894153595, |
| "learning_rate": 8.43188313750397e-05, |
| "loss": 0.0668, |
| "step": 2474 |
| }, |
| { |
| "epoch": 4.70756062767475, |
| "grad_norm": 0.3360025882720947, |
| "learning_rate": 8.431248015242935e-05, |
| "loss": 0.1119, |
| "step": 2475 |
| }, |
| { |
| "epoch": 4.709462672372801, |
| "grad_norm": 0.31509891152381897, |
| "learning_rate": 8.4306128929819e-05, |
| "loss": 0.0955, |
| "step": 2476 |
| }, |
| { |
| "epoch": 4.711364717070851, |
| "grad_norm": 0.42007285356521606, |
| "learning_rate": 8.429977770720864e-05, |
| "loss": 0.1441, |
| "step": 2477 |
| }, |
| { |
| "epoch": 4.713266761768901, |
| "grad_norm": 0.39764338731765747, |
| "learning_rate": 8.429342648459829e-05, |
| "loss": 0.1175, |
| "step": 2478 |
| }, |
| { |
| "epoch": 4.715168806466952, |
| "grad_norm": 0.33381861448287964, |
| "learning_rate": 8.428707526198794e-05, |
| "loss": 0.1199, |
| "step": 2479 |
| }, |
| { |
| "epoch": 4.717070851165002, |
| "grad_norm": 0.2918257415294647, |
| "learning_rate": 8.428072403937758e-05, |
| "loss": 0.0796, |
| "step": 2480 |
| }, |
| { |
| "epoch": 4.718972895863053, |
| "grad_norm": 0.42560750246047974, |
| "learning_rate": 8.427437281676723e-05, |
| "loss": 0.114, |
| "step": 2481 |
| }, |
| { |
| "epoch": 4.720874940561103, |
| "grad_norm": 0.3700113594532013, |
| "learning_rate": 8.426802159415688e-05, |
| "loss": 0.1145, |
| "step": 2482 |
| }, |
| { |
| "epoch": 4.722776985259154, |
| "grad_norm": 0.39171457290649414, |
| "learning_rate": 8.426167037154653e-05, |
| "loss": 0.128, |
| "step": 2483 |
| }, |
| { |
| "epoch": 4.724679029957204, |
| "grad_norm": 0.3000270426273346, |
| "learning_rate": 8.425531914893617e-05, |
| "loss": 0.0932, |
| "step": 2484 |
| }, |
| { |
| "epoch": 4.726581074655254, |
| "grad_norm": 0.2848623991012573, |
| "learning_rate": 8.424896792632581e-05, |
| "loss": 0.086, |
| "step": 2485 |
| }, |
| { |
| "epoch": 4.728483119353305, |
| "grad_norm": 0.3404539227485657, |
| "learning_rate": 8.424261670371548e-05, |
| "loss": 0.0934, |
| "step": 2486 |
| }, |
| { |
| "epoch": 4.730385164051356, |
| "grad_norm": 0.31609418988227844, |
| "learning_rate": 8.423626548110511e-05, |
| "loss": 0.0985, |
| "step": 2487 |
| }, |
| { |
| "epoch": 4.732287208749406, |
| "grad_norm": 0.34037312865257263, |
| "learning_rate": 8.422991425849477e-05, |
| "loss": 0.1193, |
| "step": 2488 |
| }, |
| { |
| "epoch": 4.734189253447456, |
| "grad_norm": 0.31899651885032654, |
| "learning_rate": 8.422356303588442e-05, |
| "loss": 0.1137, |
| "step": 2489 |
| }, |
| { |
| "epoch": 4.736091298145507, |
| "grad_norm": 0.39307737350463867, |
| "learning_rate": 8.421721181327406e-05, |
| "loss": 0.1452, |
| "step": 2490 |
| }, |
| { |
| "epoch": 4.7379933428435566, |
| "grad_norm": 0.26885175704956055, |
| "learning_rate": 8.421086059066371e-05, |
| "loss": 0.1025, |
| "step": 2491 |
| }, |
| { |
| "epoch": 4.739895387541607, |
| "grad_norm": 0.23492799699306488, |
| "learning_rate": 8.420450936805335e-05, |
| "loss": 0.0821, |
| "step": 2492 |
| }, |
| { |
| "epoch": 4.741797432239657, |
| "grad_norm": 0.30144715309143066, |
| "learning_rate": 8.419815814544301e-05, |
| "loss": 0.0924, |
| "step": 2493 |
| }, |
| { |
| "epoch": 4.743699476937708, |
| "grad_norm": 0.3370392322540283, |
| "learning_rate": 8.419180692283265e-05, |
| "loss": 0.1281, |
| "step": 2494 |
| }, |
| { |
| "epoch": 4.745601521635758, |
| "grad_norm": 0.3939819633960724, |
| "learning_rate": 8.418545570022229e-05, |
| "loss": 0.1115, |
| "step": 2495 |
| }, |
| { |
| "epoch": 4.747503566333809, |
| "grad_norm": 0.7242825627326965, |
| "learning_rate": 8.417910447761194e-05, |
| "loss": 0.1038, |
| "step": 2496 |
| }, |
| { |
| "epoch": 4.749405611031859, |
| "grad_norm": 0.3430320620536804, |
| "learning_rate": 8.417275325500159e-05, |
| "loss": 0.107, |
| "step": 2497 |
| }, |
| { |
| "epoch": 4.75130765572991, |
| "grad_norm": 0.37956321239471436, |
| "learning_rate": 8.416640203239123e-05, |
| "loss": 0.1203, |
| "step": 2498 |
| }, |
| { |
| "epoch": 4.75320970042796, |
| "grad_norm": 0.3118121027946472, |
| "learning_rate": 8.416005080978088e-05, |
| "loss": 0.0961, |
| "step": 2499 |
| }, |
| { |
| "epoch": 4.75511174512601, |
| "grad_norm": 0.3842122554779053, |
| "learning_rate": 8.415369958717053e-05, |
| "loss": 0.1095, |
| "step": 2500 |
| }, |
| { |
| "epoch": 4.757013789824061, |
| "grad_norm": 0.36103618144989014, |
| "learning_rate": 8.414734836456019e-05, |
| "loss": 0.107, |
| "step": 2501 |
| }, |
| { |
| "epoch": 4.758915834522111, |
| "grad_norm": 0.4404369592666626, |
| "learning_rate": 8.414099714194982e-05, |
| "loss": 0.0972, |
| "step": 2502 |
| }, |
| { |
| "epoch": 4.760817879220162, |
| "grad_norm": 0.45303696393966675, |
| "learning_rate": 8.413464591933948e-05, |
| "loss": 0.1286, |
| "step": 2503 |
| }, |
| { |
| "epoch": 4.762719923918212, |
| "grad_norm": 0.36196044087409973, |
| "learning_rate": 8.412829469672913e-05, |
| "loss": 0.1095, |
| "step": 2504 |
| }, |
| { |
| "epoch": 4.764621968616263, |
| "grad_norm": 0.49001795053482056, |
| "learning_rate": 8.412194347411877e-05, |
| "loss": 0.1578, |
| "step": 2505 |
| }, |
| { |
| "epoch": 4.766524013314313, |
| "grad_norm": 0.32446369528770447, |
| "learning_rate": 8.411559225150842e-05, |
| "loss": 0.0991, |
| "step": 2506 |
| }, |
| { |
| "epoch": 4.768426058012364, |
| "grad_norm": 0.3021388053894043, |
| "learning_rate": 8.410924102889807e-05, |
| "loss": 0.0902, |
| "step": 2507 |
| }, |
| { |
| "epoch": 4.770328102710414, |
| "grad_norm": 0.28912147879600525, |
| "learning_rate": 8.410288980628771e-05, |
| "loss": 0.106, |
| "step": 2508 |
| }, |
| { |
| "epoch": 4.7722301474084645, |
| "grad_norm": 0.40766748785972595, |
| "learning_rate": 8.409653858367736e-05, |
| "loss": 0.1155, |
| "step": 2509 |
| }, |
| { |
| "epoch": 4.7741321921065145, |
| "grad_norm": 0.5005617737770081, |
| "learning_rate": 8.409018736106701e-05, |
| "loss": 0.1674, |
| "step": 2510 |
| }, |
| { |
| "epoch": 4.7760342368045645, |
| "grad_norm": 0.4575154781341553, |
| "learning_rate": 8.408383613845666e-05, |
| "loss": 0.1639, |
| "step": 2511 |
| }, |
| { |
| "epoch": 4.777936281502615, |
| "grad_norm": 0.4962354302406311, |
| "learning_rate": 8.40774849158463e-05, |
| "loss": 0.1336, |
| "step": 2512 |
| }, |
| { |
| "epoch": 4.779838326200665, |
| "grad_norm": 0.4569809138774872, |
| "learning_rate": 8.407113369323595e-05, |
| "loss": 0.1323, |
| "step": 2513 |
| }, |
| { |
| "epoch": 4.781740370898716, |
| "grad_norm": 0.34369999170303345, |
| "learning_rate": 8.40647824706256e-05, |
| "loss": 0.1171, |
| "step": 2514 |
| }, |
| { |
| "epoch": 4.783642415596766, |
| "grad_norm": 0.3565669655799866, |
| "learning_rate": 8.405843124801524e-05, |
| "loss": 0.1159, |
| "step": 2515 |
| }, |
| { |
| "epoch": 4.785544460294817, |
| "grad_norm": 0.24039465188980103, |
| "learning_rate": 8.405208002540488e-05, |
| "loss": 0.0976, |
| "step": 2516 |
| }, |
| { |
| "epoch": 4.787446504992867, |
| "grad_norm": 0.37532779574394226, |
| "learning_rate": 8.404572880279455e-05, |
| "loss": 0.1129, |
| "step": 2517 |
| }, |
| { |
| "epoch": 4.789348549690918, |
| "grad_norm": 0.334505170583725, |
| "learning_rate": 8.403937758018419e-05, |
| "loss": 0.1016, |
| "step": 2518 |
| }, |
| { |
| "epoch": 4.791250594388968, |
| "grad_norm": 0.43082761764526367, |
| "learning_rate": 8.403302635757384e-05, |
| "loss": 0.1307, |
| "step": 2519 |
| }, |
| { |
| "epoch": 4.793152639087019, |
| "grad_norm": 0.4381292760372162, |
| "learning_rate": 8.402667513496349e-05, |
| "loss": 0.1137, |
| "step": 2520 |
| }, |
| { |
| "epoch": 4.795054683785069, |
| "grad_norm": 0.4337981045246124, |
| "learning_rate": 8.402032391235313e-05, |
| "loss": 0.1281, |
| "step": 2521 |
| }, |
| { |
| "epoch": 4.796956728483119, |
| "grad_norm": 0.4429587721824646, |
| "learning_rate": 8.401397268974278e-05, |
| "loss": 0.1191, |
| "step": 2522 |
| }, |
| { |
| "epoch": 4.79885877318117, |
| "grad_norm": 0.4298746883869171, |
| "learning_rate": 8.400762146713242e-05, |
| "loss": 0.1367, |
| "step": 2523 |
| }, |
| { |
| "epoch": 4.80076081787922, |
| "grad_norm": 0.42826715111732483, |
| "learning_rate": 8.400127024452208e-05, |
| "loss": 0.1222, |
| "step": 2524 |
| }, |
| { |
| "epoch": 4.802662862577271, |
| "grad_norm": 0.37338751554489136, |
| "learning_rate": 8.399491902191172e-05, |
| "loss": 0.1048, |
| "step": 2525 |
| }, |
| { |
| "epoch": 4.804564907275321, |
| "grad_norm": 0.38671061396598816, |
| "learning_rate": 8.398856779930136e-05, |
| "loss": 0.1154, |
| "step": 2526 |
| }, |
| { |
| "epoch": 4.806466951973372, |
| "grad_norm": 0.3544102907180786, |
| "learning_rate": 8.398221657669103e-05, |
| "loss": 0.1055, |
| "step": 2527 |
| }, |
| { |
| "epoch": 4.808368996671422, |
| "grad_norm": 0.38023364543914795, |
| "learning_rate": 8.397586535408066e-05, |
| "loss": 0.1117, |
| "step": 2528 |
| }, |
| { |
| "epoch": 4.8102710413694725, |
| "grad_norm": 0.3622092008590698, |
| "learning_rate": 8.396951413147032e-05, |
| "loss": 0.1099, |
| "step": 2529 |
| }, |
| { |
| "epoch": 4.8121730860675225, |
| "grad_norm": 0.692039966583252, |
| "learning_rate": 8.396316290885995e-05, |
| "loss": 0.1335, |
| "step": 2530 |
| }, |
| { |
| "epoch": 4.814075130765573, |
| "grad_norm": 0.35321712493896484, |
| "learning_rate": 8.39568116862496e-05, |
| "loss": 0.1175, |
| "step": 2531 |
| }, |
| { |
| "epoch": 4.815977175463623, |
| "grad_norm": 0.37036386132240295, |
| "learning_rate": 8.395046046363926e-05, |
| "loss": 0.1253, |
| "step": 2532 |
| }, |
| { |
| "epoch": 4.817879220161673, |
| "grad_norm": 0.42249128222465515, |
| "learning_rate": 8.39441092410289e-05, |
| "loss": 0.1163, |
| "step": 2533 |
| }, |
| { |
| "epoch": 4.819781264859724, |
| "grad_norm": 0.3563583195209503, |
| "learning_rate": 8.393775801841855e-05, |
| "loss": 0.1597, |
| "step": 2534 |
| }, |
| { |
| "epoch": 4.821683309557774, |
| "grad_norm": 0.39946305751800537, |
| "learning_rate": 8.39314067958082e-05, |
| "loss": 0.1156, |
| "step": 2535 |
| }, |
| { |
| "epoch": 4.823585354255825, |
| "grad_norm": 0.31761807203292847, |
| "learning_rate": 8.392505557319784e-05, |
| "loss": 0.0946, |
| "step": 2536 |
| }, |
| { |
| "epoch": 4.825487398953875, |
| "grad_norm": 0.4180295765399933, |
| "learning_rate": 8.391870435058749e-05, |
| "loss": 0.1271, |
| "step": 2537 |
| }, |
| { |
| "epoch": 4.827389443651926, |
| "grad_norm": 0.36158043146133423, |
| "learning_rate": 8.391235312797714e-05, |
| "loss": 0.106, |
| "step": 2538 |
| }, |
| { |
| "epoch": 4.829291488349976, |
| "grad_norm": 0.4044169783592224, |
| "learning_rate": 8.390600190536678e-05, |
| "loss": 0.1094, |
| "step": 2539 |
| }, |
| { |
| "epoch": 4.831193533048027, |
| "grad_norm": 0.3362937569618225, |
| "learning_rate": 8.389965068275643e-05, |
| "loss": 0.078, |
| "step": 2540 |
| }, |
| { |
| "epoch": 4.833095577746077, |
| "grad_norm": 0.3558341860771179, |
| "learning_rate": 8.389329946014608e-05, |
| "loss": 0.1125, |
| "step": 2541 |
| }, |
| { |
| "epoch": 4.834997622444128, |
| "grad_norm": 0.44893354177474976, |
| "learning_rate": 8.388694823753574e-05, |
| "loss": 0.1393, |
| "step": 2542 |
| }, |
| { |
| "epoch": 4.836899667142178, |
| "grad_norm": 0.3790888488292694, |
| "learning_rate": 8.388059701492537e-05, |
| "loss": 0.1312, |
| "step": 2543 |
| }, |
| { |
| "epoch": 4.838801711840228, |
| "grad_norm": 0.24070213735103607, |
| "learning_rate": 8.387424579231503e-05, |
| "loss": 0.0772, |
| "step": 2544 |
| }, |
| { |
| "epoch": 4.840703756538279, |
| "grad_norm": 0.4367123246192932, |
| "learning_rate": 8.386789456970468e-05, |
| "loss": 0.1227, |
| "step": 2545 |
| }, |
| { |
| "epoch": 4.842605801236329, |
| "grad_norm": 0.3168450891971588, |
| "learning_rate": 8.386154334709432e-05, |
| "loss": 0.0928, |
| "step": 2546 |
| }, |
| { |
| "epoch": 4.8445078459343796, |
| "grad_norm": 0.36236846446990967, |
| "learning_rate": 8.385519212448397e-05, |
| "loss": 0.0997, |
| "step": 2547 |
| }, |
| { |
| "epoch": 4.8464098906324296, |
| "grad_norm": 0.31763169169425964, |
| "learning_rate": 8.384884090187362e-05, |
| "loss": 0.1093, |
| "step": 2548 |
| }, |
| { |
| "epoch": 4.84831193533048, |
| "grad_norm": 0.3502260148525238, |
| "learning_rate": 8.384248967926326e-05, |
| "loss": 0.1299, |
| "step": 2549 |
| }, |
| { |
| "epoch": 4.85021398002853, |
| "grad_norm": 0.3593395948410034, |
| "learning_rate": 8.383613845665291e-05, |
| "loss": 0.1066, |
| "step": 2550 |
| }, |
| { |
| "epoch": 4.852116024726581, |
| "grad_norm": 0.39665883779525757, |
| "learning_rate": 8.382978723404256e-05, |
| "loss": 0.1267, |
| "step": 2551 |
| }, |
| { |
| "epoch": 4.854018069424631, |
| "grad_norm": 0.4395765960216522, |
| "learning_rate": 8.38234360114322e-05, |
| "loss": 0.174, |
| "step": 2552 |
| }, |
| { |
| "epoch": 4.855920114122682, |
| "grad_norm": 0.3507075607776642, |
| "learning_rate": 8.381708478882185e-05, |
| "loss": 0.0953, |
| "step": 2553 |
| }, |
| { |
| "epoch": 4.857822158820732, |
| "grad_norm": 0.3769589364528656, |
| "learning_rate": 8.381073356621149e-05, |
| "loss": 0.1395, |
| "step": 2554 |
| }, |
| { |
| "epoch": 4.859724203518782, |
| "grad_norm": 0.30503159761428833, |
| "learning_rate": 8.380438234360116e-05, |
| "loss": 0.0937, |
| "step": 2555 |
| }, |
| { |
| "epoch": 4.861626248216833, |
| "grad_norm": 0.39943060278892517, |
| "learning_rate": 8.37980311209908e-05, |
| "loss": 0.1103, |
| "step": 2556 |
| }, |
| { |
| "epoch": 4.863528292914884, |
| "grad_norm": 0.36200422048568726, |
| "learning_rate": 8.379167989838043e-05, |
| "loss": 0.1135, |
| "step": 2557 |
| }, |
| { |
| "epoch": 4.865430337612934, |
| "grad_norm": 0.3811735510826111, |
| "learning_rate": 8.37853286757701e-05, |
| "loss": 0.1265, |
| "step": 2558 |
| }, |
| { |
| "epoch": 4.867332382310984, |
| "grad_norm": 0.42090871930122375, |
| "learning_rate": 8.377897745315974e-05, |
| "loss": 0.1339, |
| "step": 2559 |
| }, |
| { |
| "epoch": 4.869234427009035, |
| "grad_norm": 0.41796380281448364, |
| "learning_rate": 8.377262623054939e-05, |
| "loss": 0.1136, |
| "step": 2560 |
| }, |
| { |
| "epoch": 4.871136471707085, |
| "grad_norm": 0.33189094066619873, |
| "learning_rate": 8.376627500793903e-05, |
| "loss": 0.0923, |
| "step": 2561 |
| }, |
| { |
| "epoch": 4.873038516405136, |
| "grad_norm": 0.46369072794914246, |
| "learning_rate": 8.375992378532868e-05, |
| "loss": 0.1236, |
| "step": 2562 |
| }, |
| { |
| "epoch": 4.874940561103186, |
| "grad_norm": 0.27973759174346924, |
| "learning_rate": 8.375357256271833e-05, |
| "loss": 0.0933, |
| "step": 2563 |
| }, |
| { |
| "epoch": 4.876842605801237, |
| "grad_norm": 0.39309409260749817, |
| "learning_rate": 8.374722134010797e-05, |
| "loss": 0.1135, |
| "step": 2564 |
| }, |
| { |
| "epoch": 4.878744650499287, |
| "grad_norm": 0.43652641773223877, |
| "learning_rate": 8.374087011749763e-05, |
| "loss": 0.136, |
| "step": 2565 |
| }, |
| { |
| "epoch": 4.8806466951973375, |
| "grad_norm": 0.30485180020332336, |
| "learning_rate": 8.373451889488727e-05, |
| "loss": 0.0894, |
| "step": 2566 |
| }, |
| { |
| "epoch": 4.8825487398953875, |
| "grad_norm": 0.40164196491241455, |
| "learning_rate": 8.372816767227691e-05, |
| "loss": 0.1235, |
| "step": 2567 |
| }, |
| { |
| "epoch": 4.884450784593438, |
| "grad_norm": 0.3442533314228058, |
| "learning_rate": 8.372181644966656e-05, |
| "loss": 0.1222, |
| "step": 2568 |
| }, |
| { |
| "epoch": 4.886352829291488, |
| "grad_norm": 0.38092851638793945, |
| "learning_rate": 8.371546522705621e-05, |
| "loss": 0.1135, |
| "step": 2569 |
| }, |
| { |
| "epoch": 4.888254873989538, |
| "grad_norm": 0.37114188075065613, |
| "learning_rate": 8.370911400444585e-05, |
| "loss": 0.1181, |
| "step": 2570 |
| }, |
| { |
| "epoch": 4.890156918687589, |
| "grad_norm": 0.35971492528915405, |
| "learning_rate": 8.37027627818355e-05, |
| "loss": 0.1247, |
| "step": 2571 |
| }, |
| { |
| "epoch": 4.892058963385639, |
| "grad_norm": 0.25756967067718506, |
| "learning_rate": 8.369641155922516e-05, |
| "loss": 0.0929, |
| "step": 2572 |
| }, |
| { |
| "epoch": 4.89396100808369, |
| "grad_norm": 0.4541129171848297, |
| "learning_rate": 8.369006033661481e-05, |
| "loss": 0.142, |
| "step": 2573 |
| }, |
| { |
| "epoch": 4.89586305278174, |
| "grad_norm": 0.48526903986930847, |
| "learning_rate": 8.368370911400445e-05, |
| "loss": 0.1612, |
| "step": 2574 |
| }, |
| { |
| "epoch": 4.897765097479791, |
| "grad_norm": 0.31703343987464905, |
| "learning_rate": 8.36773578913941e-05, |
| "loss": 0.1135, |
| "step": 2575 |
| }, |
| { |
| "epoch": 4.899667142177841, |
| "grad_norm": 0.2969724237918854, |
| "learning_rate": 8.367100666878375e-05, |
| "loss": 0.1148, |
| "step": 2576 |
| }, |
| { |
| "epoch": 4.901569186875892, |
| "grad_norm": 0.37165188789367676, |
| "learning_rate": 8.366465544617339e-05, |
| "loss": 0.1066, |
| "step": 2577 |
| }, |
| { |
| "epoch": 4.903471231573942, |
| "grad_norm": 0.2899304926395416, |
| "learning_rate": 8.365830422356304e-05, |
| "loss": 0.0896, |
| "step": 2578 |
| }, |
| { |
| "epoch": 4.905373276271993, |
| "grad_norm": 0.3420521914958954, |
| "learning_rate": 8.365195300095269e-05, |
| "loss": 0.0929, |
| "step": 2579 |
| }, |
| { |
| "epoch": 4.907275320970043, |
| "grad_norm": 0.48174387216567993, |
| "learning_rate": 8.364560177834233e-05, |
| "loss": 0.1422, |
| "step": 2580 |
| }, |
| { |
| "epoch": 4.909177365668093, |
| "grad_norm": 0.3492242693901062, |
| "learning_rate": 8.363925055573198e-05, |
| "loss": 0.1116, |
| "step": 2581 |
| }, |
| { |
| "epoch": 4.911079410366144, |
| "grad_norm": 0.367914080619812, |
| "learning_rate": 8.363289933312163e-05, |
| "loss": 0.1139, |
| "step": 2582 |
| }, |
| { |
| "epoch": 4.912981455064194, |
| "grad_norm": 0.32939612865448, |
| "learning_rate": 8.362654811051129e-05, |
| "loss": 0.1175, |
| "step": 2583 |
| }, |
| { |
| "epoch": 4.914883499762245, |
| "grad_norm": 0.3939587473869324, |
| "learning_rate": 8.362019688790092e-05, |
| "loss": 0.1263, |
| "step": 2584 |
| }, |
| { |
| "epoch": 4.916785544460295, |
| "grad_norm": 0.36641520261764526, |
| "learning_rate": 8.361384566529058e-05, |
| "loss": 0.1219, |
| "step": 2585 |
| }, |
| { |
| "epoch": 4.9186875891583455, |
| "grad_norm": 0.2804834544658661, |
| "learning_rate": 8.360749444268023e-05, |
| "loss": 0.0839, |
| "step": 2586 |
| }, |
| { |
| "epoch": 4.9205896338563955, |
| "grad_norm": 0.310461163520813, |
| "learning_rate": 8.360114322006987e-05, |
| "loss": 0.0949, |
| "step": 2587 |
| }, |
| { |
| "epoch": 4.922491678554446, |
| "grad_norm": 0.34361201524734497, |
| "learning_rate": 8.35947919974595e-05, |
| "loss": 0.1167, |
| "step": 2588 |
| }, |
| { |
| "epoch": 4.924393723252496, |
| "grad_norm": 0.3348811864852905, |
| "learning_rate": 8.358844077484917e-05, |
| "loss": 0.1035, |
| "step": 2589 |
| }, |
| { |
| "epoch": 4.926295767950547, |
| "grad_norm": 0.24014593660831451, |
| "learning_rate": 8.358208955223881e-05, |
| "loss": 0.1413, |
| "step": 2590 |
| }, |
| { |
| "epoch": 4.928197812648597, |
| "grad_norm": 0.4338441491127014, |
| "learning_rate": 8.357573832962846e-05, |
| "loss": 0.1186, |
| "step": 2591 |
| }, |
| { |
| "epoch": 4.930099857346647, |
| "grad_norm": 0.3601210415363312, |
| "learning_rate": 8.356938710701811e-05, |
| "loss": 0.1014, |
| "step": 2592 |
| }, |
| { |
| "epoch": 4.932001902044698, |
| "grad_norm": 0.2996499538421631, |
| "learning_rate": 8.356303588440775e-05, |
| "loss": 0.0906, |
| "step": 2593 |
| }, |
| { |
| "epoch": 4.933903946742748, |
| "grad_norm": 0.30851230025291443, |
| "learning_rate": 8.35566846617974e-05, |
| "loss": 0.0806, |
| "step": 2594 |
| }, |
| { |
| "epoch": 4.935805991440799, |
| "grad_norm": 0.22290165722370148, |
| "learning_rate": 8.355033343918704e-05, |
| "loss": 0.0728, |
| "step": 2595 |
| }, |
| { |
| "epoch": 4.937708036138849, |
| "grad_norm": 0.28518247604370117, |
| "learning_rate": 8.35439822165767e-05, |
| "loss": 0.0894, |
| "step": 2596 |
| }, |
| { |
| "epoch": 4.9396100808369, |
| "grad_norm": 0.424231618642807, |
| "learning_rate": 8.353763099396634e-05, |
| "loss": 0.1157, |
| "step": 2597 |
| }, |
| { |
| "epoch": 4.94151212553495, |
| "grad_norm": 0.5748564600944519, |
| "learning_rate": 8.353127977135598e-05, |
| "loss": 0.1777, |
| "step": 2598 |
| }, |
| { |
| "epoch": 4.943414170233001, |
| "grad_norm": 0.39010798931121826, |
| "learning_rate": 8.352492854874565e-05, |
| "loss": 0.104, |
| "step": 2599 |
| }, |
| { |
| "epoch": 4.945316214931051, |
| "grad_norm": 0.40491625666618347, |
| "learning_rate": 8.351857732613529e-05, |
| "loss": 0.115, |
| "step": 2600 |
| }, |
| { |
| "epoch": 4.947218259629102, |
| "grad_norm": 0.3881874084472656, |
| "learning_rate": 8.351222610352494e-05, |
| "loss": 0.1125, |
| "step": 2601 |
| }, |
| { |
| "epoch": 4.949120304327152, |
| "grad_norm": 0.4075947403907776, |
| "learning_rate": 8.350587488091458e-05, |
| "loss": 0.1376, |
| "step": 2602 |
| }, |
| { |
| "epoch": 4.951022349025202, |
| "grad_norm": 0.4263762831687927, |
| "learning_rate": 8.349952365830423e-05, |
| "loss": 0.1214, |
| "step": 2603 |
| }, |
| { |
| "epoch": 4.9529243937232525, |
| "grad_norm": 0.4403824806213379, |
| "learning_rate": 8.349317243569388e-05, |
| "loss": 0.1212, |
| "step": 2604 |
| }, |
| { |
| "epoch": 4.9548264384213025, |
| "grad_norm": 0.41958004236221313, |
| "learning_rate": 8.348682121308352e-05, |
| "loss": 0.1197, |
| "step": 2605 |
| }, |
| { |
| "epoch": 4.956728483119353, |
| "grad_norm": 0.3664645850658417, |
| "learning_rate": 8.348046999047317e-05, |
| "loss": 0.1208, |
| "step": 2606 |
| }, |
| { |
| "epoch": 4.958630527817403, |
| "grad_norm": 0.3618158996105194, |
| "learning_rate": 8.347411876786282e-05, |
| "loss": 0.1241, |
| "step": 2607 |
| }, |
| { |
| "epoch": 4.960532572515454, |
| "grad_norm": 0.3135223686695099, |
| "learning_rate": 8.346776754525246e-05, |
| "loss": 0.0807, |
| "step": 2608 |
| }, |
| { |
| "epoch": 4.962434617213504, |
| "grad_norm": 0.3673211932182312, |
| "learning_rate": 8.346141632264211e-05, |
| "loss": 0.1188, |
| "step": 2609 |
| }, |
| { |
| "epoch": 4.964336661911555, |
| "grad_norm": 0.34168919920921326, |
| "learning_rate": 8.345506510003176e-05, |
| "loss": 0.1113, |
| "step": 2610 |
| }, |
| { |
| "epoch": 4.966238706609605, |
| "grad_norm": 0.3807981312274933, |
| "learning_rate": 8.34487138774214e-05, |
| "loss": 0.1243, |
| "step": 2611 |
| }, |
| { |
| "epoch": 4.968140751307656, |
| "grad_norm": 0.35833629965782166, |
| "learning_rate": 8.344236265481105e-05, |
| "loss": 0.1175, |
| "step": 2612 |
| }, |
| { |
| "epoch": 4.970042796005706, |
| "grad_norm": 0.4410795569419861, |
| "learning_rate": 8.34360114322007e-05, |
| "loss": 0.1174, |
| "step": 2613 |
| }, |
| { |
| "epoch": 4.971944840703756, |
| "grad_norm": 0.27122291922569275, |
| "learning_rate": 8.342966020959036e-05, |
| "loss": 0.1062, |
| "step": 2614 |
| }, |
| { |
| "epoch": 4.973846885401807, |
| "grad_norm": 0.3411978483200073, |
| "learning_rate": 8.342330898698e-05, |
| "loss": 0.1274, |
| "step": 2615 |
| }, |
| { |
| "epoch": 4.975748930099857, |
| "grad_norm": 0.36536306142807007, |
| "learning_rate": 8.341695776436965e-05, |
| "loss": 0.1182, |
| "step": 2616 |
| }, |
| { |
| "epoch": 4.977650974797908, |
| "grad_norm": 0.3873109221458435, |
| "learning_rate": 8.34106065417593e-05, |
| "loss": 0.1043, |
| "step": 2617 |
| }, |
| { |
| "epoch": 4.979553019495958, |
| "grad_norm": 0.30192115902900696, |
| "learning_rate": 8.340425531914894e-05, |
| "loss": 0.0984, |
| "step": 2618 |
| }, |
| { |
| "epoch": 4.981455064194009, |
| "grad_norm": 0.37886565923690796, |
| "learning_rate": 8.339790409653859e-05, |
| "loss": 0.1161, |
| "step": 2619 |
| }, |
| { |
| "epoch": 4.983357108892059, |
| "grad_norm": 0.34957846999168396, |
| "learning_rate": 8.339155287392824e-05, |
| "loss": 0.1083, |
| "step": 2620 |
| }, |
| { |
| "epoch": 4.98525915359011, |
| "grad_norm": 0.3169527053833008, |
| "learning_rate": 8.338520165131788e-05, |
| "loss": 0.088, |
| "step": 2621 |
| }, |
| { |
| "epoch": 4.98716119828816, |
| "grad_norm": 0.41983914375305176, |
| "learning_rate": 8.337885042870753e-05, |
| "loss": 0.1158, |
| "step": 2622 |
| }, |
| { |
| "epoch": 4.9890632429862105, |
| "grad_norm": 0.3467552661895752, |
| "learning_rate": 8.337249920609718e-05, |
| "loss": 0.0958, |
| "step": 2623 |
| }, |
| { |
| "epoch": 4.9909652876842605, |
| "grad_norm": 0.3872130513191223, |
| "learning_rate": 8.336614798348682e-05, |
| "loss": 0.1012, |
| "step": 2624 |
| }, |
| { |
| "epoch": 4.9928673323823105, |
| "grad_norm": 0.2966238856315613, |
| "learning_rate": 8.335979676087647e-05, |
| "loss": 0.0913, |
| "step": 2625 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 15750, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 30, |
| "save_steps": 525, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 6.721151480093e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|