| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9985734664764622, |
| "eval_steps": 500, |
| "global_step": 525, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0019020446980504042, |
| "grad_norm": 0.9932524561882019, |
| "learning_rate": 2e-05, |
| "loss": 1.3348, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0038040893961008085, |
| "grad_norm": 0.9241018295288086, |
| "learning_rate": 4e-05, |
| "loss": 1.3131, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.005706134094151213, |
| "grad_norm": 1.1556137800216675, |
| "learning_rate": 6e-05, |
| "loss": 1.5644, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.007608178792201617, |
| "grad_norm": 0.8612737059593201, |
| "learning_rate": 8e-05, |
| "loss": 1.2192, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.009510223490252021, |
| "grad_norm": 0.8998388648033142, |
| "learning_rate": 0.0001, |
| "loss": 1.3651, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.011412268188302425, |
| "grad_norm": 0.7211980819702148, |
| "learning_rate": 9.999364877738964e-05, |
| "loss": 1.2525, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.01331431288635283, |
| "grad_norm": 0.44894707202911377, |
| "learning_rate": 9.998729755477931e-05, |
| "loss": 1.1999, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.015216357584403234, |
| "grad_norm": 0.4338511824607849, |
| "learning_rate": 9.998094633216895e-05, |
| "loss": 1.0147, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.017118402282453638, |
| "grad_norm": 0.5658989548683167, |
| "learning_rate": 9.99745951095586e-05, |
| "loss": 1.1997, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.019020446980504042, |
| "grad_norm": 0.4467356503009796, |
| "learning_rate": 9.996824388694824e-05, |
| "loss": 1.0424, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.020922491678554447, |
| "grad_norm": 0.3743385374546051, |
| "learning_rate": 9.996189266433789e-05, |
| "loss": 1.0902, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.02282453637660485, |
| "grad_norm": 0.30667275190353394, |
| "learning_rate": 9.995554144172754e-05, |
| "loss": 0.8736, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.024726581074655255, |
| "grad_norm": 0.48634254932403564, |
| "learning_rate": 9.994919021911718e-05, |
| "loss": 0.977, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.02662862577270566, |
| "grad_norm": 0.4229658246040344, |
| "learning_rate": 9.994283899650683e-05, |
| "loss": 0.9673, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.028530670470756064, |
| "grad_norm": 0.39269882440567017, |
| "learning_rate": 9.993648777389648e-05, |
| "loss": 1.0001, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.030432715168806468, |
| "grad_norm": 0.38597363233566284, |
| "learning_rate": 9.993013655128612e-05, |
| "loss": 0.9705, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.03233475986685687, |
| "grad_norm": 0.40809136629104614, |
| "learning_rate": 9.992378532867577e-05, |
| "loss": 0.9246, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.034236804564907276, |
| "grad_norm": 0.4431133270263672, |
| "learning_rate": 9.991743410606542e-05, |
| "loss": 1.0409, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.03613884926295768, |
| "grad_norm": 0.5659255981445312, |
| "learning_rate": 9.991108288345506e-05, |
| "loss": 1.1118, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.038040893961008085, |
| "grad_norm": 0.4943106472492218, |
| "learning_rate": 9.990473166084471e-05, |
| "loss": 0.9213, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.039942938659058486, |
| "grad_norm": 0.48820945620536804, |
| "learning_rate": 9.989838043823437e-05, |
| "loss": 0.9108, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.04184498335710889, |
| "grad_norm": 0.4464576542377472, |
| "learning_rate": 9.989202921562402e-05, |
| "loss": 0.8959, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.043747028055159294, |
| "grad_norm": 0.3870016038417816, |
| "learning_rate": 9.988567799301366e-05, |
| "loss": 0.8013, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.0456490727532097, |
| "grad_norm": 0.42381179332733154, |
| "learning_rate": 9.987932677040331e-05, |
| "loss": 0.8584, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.0475511174512601, |
| "grad_norm": 0.37170907855033875, |
| "learning_rate": 9.987297554779296e-05, |
| "loss": 0.7849, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.04945316214931051, |
| "grad_norm": 0.4516700506210327, |
| "learning_rate": 9.98666243251826e-05, |
| "loss": 0.8902, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.05135520684736091, |
| "grad_norm": 0.3525027334690094, |
| "learning_rate": 9.986027310257225e-05, |
| "loss": 0.6029, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.05325725154541132, |
| "grad_norm": 0.437707781791687, |
| "learning_rate": 9.98539218799619e-05, |
| "loss": 0.7387, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.05515929624346172, |
| "grad_norm": 0.45205071568489075, |
| "learning_rate": 9.984757065735154e-05, |
| "loss": 0.7468, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.05706134094151213, |
| "grad_norm": 0.3709086775779724, |
| "learning_rate": 9.984121943474119e-05, |
| "loss": 0.7365, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.05896338563956253, |
| "grad_norm": 0.4089844822883606, |
| "learning_rate": 9.983486821213084e-05, |
| "loss": 0.6563, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.060865430337612936, |
| "grad_norm": 0.45955532789230347, |
| "learning_rate": 9.982851698952048e-05, |
| "loss": 0.8021, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.06276747503566334, |
| "grad_norm": 0.5240988731384277, |
| "learning_rate": 9.982216576691013e-05, |
| "loss": 0.6933, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.06466951973371374, |
| "grad_norm": 0.4703526496887207, |
| "learning_rate": 9.981581454429977e-05, |
| "loss": 0.7339, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.06657156443176415, |
| "grad_norm": 0.5659805536270142, |
| "learning_rate": 9.980946332168944e-05, |
| "loss": 0.8139, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.06847360912981455, |
| "grad_norm": 0.39259326457977295, |
| "learning_rate": 9.980311209907908e-05, |
| "loss": 0.5838, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.07037565382786495, |
| "grad_norm": 0.4165003001689911, |
| "learning_rate": 9.979676087646871e-05, |
| "loss": 0.674, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.07227769852591535, |
| "grad_norm": 0.4533802568912506, |
| "learning_rate": 9.979040965385838e-05, |
| "loss": 0.6974, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.07417974322396577, |
| "grad_norm": 0.5213814973831177, |
| "learning_rate": 9.978405843124802e-05, |
| "loss": 0.7896, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.07608178792201617, |
| "grad_norm": 0.3241259753704071, |
| "learning_rate": 9.977770720863767e-05, |
| "loss": 0.5895, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.07798383262006657, |
| "grad_norm": 0.34446167945861816, |
| "learning_rate": 9.977135598602731e-05, |
| "loss": 0.6222, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.07988587731811697, |
| "grad_norm": 0.49035167694091797, |
| "learning_rate": 9.976500476341696e-05, |
| "loss": 0.6978, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.08178792201616739, |
| "grad_norm": 0.4795296788215637, |
| "learning_rate": 9.975865354080661e-05, |
| "loss": 0.7368, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.08368996671421779, |
| "grad_norm": 0.44959381222724915, |
| "learning_rate": 9.975230231819625e-05, |
| "loss": 0.57, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.08559201141226819, |
| "grad_norm": 0.4577605426311493, |
| "learning_rate": 9.974595109558592e-05, |
| "loss": 0.691, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.08749405611031859, |
| "grad_norm": 0.41654840111732483, |
| "learning_rate": 9.973959987297555e-05, |
| "loss": 0.6346, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.089396100808369, |
| "grad_norm": 0.6599829196929932, |
| "learning_rate": 9.973324865036519e-05, |
| "loss": 0.6358, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.0912981455064194, |
| "grad_norm": 0.38539162278175354, |
| "learning_rate": 9.972689742775484e-05, |
| "loss": 0.5723, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.0932001902044698, |
| "grad_norm": 0.4626316428184509, |
| "learning_rate": 9.97205462051445e-05, |
| "loss": 0.6845, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.0951022349025202, |
| "grad_norm": 0.348387211561203, |
| "learning_rate": 9.971419498253413e-05, |
| "loss": 0.4857, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.09700427960057062, |
| "grad_norm": 0.4964020252227783, |
| "learning_rate": 9.970784375992379e-05, |
| "loss": 0.7141, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.09890632429862102, |
| "grad_norm": 0.4282241463661194, |
| "learning_rate": 9.970149253731344e-05, |
| "loss": 0.6619, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.10080836899667142, |
| "grad_norm": 0.35991716384887695, |
| "learning_rate": 9.969514131470309e-05, |
| "loss": 0.4727, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.10271041369472182, |
| "grad_norm": 0.3936012387275696, |
| "learning_rate": 9.968879009209273e-05, |
| "loss": 0.5644, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.10461245839277224, |
| "grad_norm": 0.39267924427986145, |
| "learning_rate": 9.968243886948238e-05, |
| "loss": 0.5126, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.10651450309082264, |
| "grad_norm": 0.4119136333465576, |
| "learning_rate": 9.967608764687203e-05, |
| "loss": 0.471, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.10841654778887304, |
| "grad_norm": 0.5160384178161621, |
| "learning_rate": 9.966973642426167e-05, |
| "loss": 0.6555, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.11031859248692344, |
| "grad_norm": 0.4742174744606018, |
| "learning_rate": 9.966338520165132e-05, |
| "loss": 0.6093, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.11222063718497385, |
| "grad_norm": 0.3615169823169708, |
| "learning_rate": 9.965703397904097e-05, |
| "loss": 0.5527, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.11412268188302425, |
| "grad_norm": 0.5700575113296509, |
| "learning_rate": 9.965068275643061e-05, |
| "loss": 0.5713, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.11602472658107466, |
| "grad_norm": 0.4825727939605713, |
| "learning_rate": 9.964433153382026e-05, |
| "loss": 0.5142, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.11792677127912506, |
| "grad_norm": 0.392088919878006, |
| "learning_rate": 9.963798031120992e-05, |
| "loss": 0.513, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.11982881597717546, |
| "grad_norm": 0.35883110761642456, |
| "learning_rate": 9.963162908859957e-05, |
| "loss": 0.501, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.12173086067522587, |
| "grad_norm": 0.39946749806404114, |
| "learning_rate": 9.96252778659892e-05, |
| "loss": 0.5532, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.12363290537327627, |
| "grad_norm": 0.4191288352012634, |
| "learning_rate": 9.961892664337886e-05, |
| "loss": 0.5258, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.12553495007132667, |
| "grad_norm": 0.3662487268447876, |
| "learning_rate": 9.961257542076851e-05, |
| "loss": 0.5121, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.1274369947693771, |
| "grad_norm": 0.5582164525985718, |
| "learning_rate": 9.960622419815815e-05, |
| "loss": 0.6494, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.12933903946742747, |
| "grad_norm": 0.485128790140152, |
| "learning_rate": 9.959987297554779e-05, |
| "loss": 0.6022, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.1312410841654779, |
| "grad_norm": 0.3816944360733032, |
| "learning_rate": 9.959352175293745e-05, |
| "loss": 0.4851, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.1331431288635283, |
| "grad_norm": 0.3637336194515228, |
| "learning_rate": 9.958717053032709e-05, |
| "loss": 0.4344, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.1350451735615787, |
| "grad_norm": 0.4418705105781555, |
| "learning_rate": 9.958081930771674e-05, |
| "loss": 0.6008, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.1369472182596291, |
| "grad_norm": 0.44138631224632263, |
| "learning_rate": 9.95744680851064e-05, |
| "loss": 0.5319, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.1388492629576795, |
| "grad_norm": 0.37523001432418823, |
| "learning_rate": 9.956811686249603e-05, |
| "loss": 0.657, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.1407513076557299, |
| "grad_norm": 0.4489665627479553, |
| "learning_rate": 9.956176563988568e-05, |
| "loss": 0.5526, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.14265335235378032, |
| "grad_norm": 0.39318791031837463, |
| "learning_rate": 9.955541441727532e-05, |
| "loss": 0.6046, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.1445553970518307, |
| "grad_norm": 0.4817538261413574, |
| "learning_rate": 9.954906319466499e-05, |
| "loss": 0.5149, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.14645744174988112, |
| "grad_norm": 0.4451163411140442, |
| "learning_rate": 9.954271197205463e-05, |
| "loss": 0.4892, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.14835948644793154, |
| "grad_norm": 0.29836660623550415, |
| "learning_rate": 9.953636074944426e-05, |
| "loss": 0.4005, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.15026153114598192, |
| "grad_norm": 0.3185100555419922, |
| "learning_rate": 9.953000952683393e-05, |
| "loss": 0.4168, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.15216357584403234, |
| "grad_norm": 0.26550424098968506, |
| "learning_rate": 9.952365830422357e-05, |
| "loss": 0.39, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.15406562054208273, |
| "grad_norm": 0.4328240156173706, |
| "learning_rate": 9.951730708161322e-05, |
| "loss": 0.5041, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.15596766524013314, |
| "grad_norm": 0.5178936123847961, |
| "learning_rate": 9.951095585900286e-05, |
| "loss": 0.6017, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.15786970993818356, |
| "grad_norm": 0.45657551288604736, |
| "learning_rate": 9.950460463639251e-05, |
| "loss": 0.5734, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.15977175463623394, |
| "grad_norm": 0.5482913851737976, |
| "learning_rate": 9.949825341378216e-05, |
| "loss": 0.6015, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.16167379933428436, |
| "grad_norm": 0.39362308382987976, |
| "learning_rate": 9.94919021911718e-05, |
| "loss": 0.5712, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.16357584403233477, |
| "grad_norm": 0.4381113350391388, |
| "learning_rate": 9.948555096856145e-05, |
| "loss": 0.5194, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.16547788873038516, |
| "grad_norm": 0.5021312236785889, |
| "learning_rate": 9.94791997459511e-05, |
| "loss": 0.5279, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.16737993342843557, |
| "grad_norm": 0.4364267587661743, |
| "learning_rate": 9.947284852334074e-05, |
| "loss": 0.5892, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.16928197812648596, |
| "grad_norm": 0.37873050570487976, |
| "learning_rate": 9.94664973007304e-05, |
| "loss": 0.5328, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.17118402282453637, |
| "grad_norm": 0.4768919050693512, |
| "learning_rate": 9.946014607812005e-05, |
| "loss": 0.4889, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.1730860675225868, |
| "grad_norm": 0.3834541440010071, |
| "learning_rate": 9.945379485550968e-05, |
| "loss": 0.4642, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.17498811222063718, |
| "grad_norm": 0.48581764101982117, |
| "learning_rate": 9.944744363289934e-05, |
| "loss": 0.4741, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.1768901569186876, |
| "grad_norm": 0.39364808797836304, |
| "learning_rate": 9.944109241028899e-05, |
| "loss": 0.5684, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.178792201616738, |
| "grad_norm": 0.4657204747200012, |
| "learning_rate": 9.943474118767864e-05, |
| "loss": 0.609, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.1806942463147884, |
| "grad_norm": 0.40989887714385986, |
| "learning_rate": 9.942838996506828e-05, |
| "loss": 0.4319, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.1825962910128388, |
| "grad_norm": 0.43797624111175537, |
| "learning_rate": 9.942203874245793e-05, |
| "loss": 0.4997, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.1844983357108892, |
| "grad_norm": 0.3887675106525421, |
| "learning_rate": 9.941568751984758e-05, |
| "loss": 0.5548, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.1864003804089396, |
| "grad_norm": 0.39017003774642944, |
| "learning_rate": 9.940933629723722e-05, |
| "loss": 0.5113, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.18830242510699002, |
| "grad_norm": 0.41409194469451904, |
| "learning_rate": 9.940298507462687e-05, |
| "loss": 0.5496, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.1902044698050404, |
| "grad_norm": 0.34578803181648254, |
| "learning_rate": 9.939663385201652e-05, |
| "loss": 0.4048, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.19210651450309082, |
| "grad_norm": 0.32233092188835144, |
| "learning_rate": 9.939028262940616e-05, |
| "loss": 0.4442, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.19400855920114124, |
| "grad_norm": 0.45841965079307556, |
| "learning_rate": 9.938393140679581e-05, |
| "loss": 0.5646, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.19591060389919163, |
| "grad_norm": 0.3825596272945404, |
| "learning_rate": 9.937758018418547e-05, |
| "loss": 0.4583, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.19781264859724204, |
| "grad_norm": 0.44690102338790894, |
| "learning_rate": 9.93712289615751e-05, |
| "loss": 0.5799, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.19971469329529243, |
| "grad_norm": 0.4881773591041565, |
| "learning_rate": 9.936487773896476e-05, |
| "loss": 0.4094, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.20161673799334284, |
| "grad_norm": 0.4745669960975647, |
| "learning_rate": 9.93585265163544e-05, |
| "loss": 0.6068, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.20351878269139326, |
| "grad_norm": 0.5497081279754639, |
| "learning_rate": 9.935217529374406e-05, |
| "loss": 0.4654, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.20542082738944364, |
| "grad_norm": 0.3564707636833191, |
| "learning_rate": 9.93458240711337e-05, |
| "loss": 0.5678, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.20732287208749406, |
| "grad_norm": 0.446321964263916, |
| "learning_rate": 9.933947284852334e-05, |
| "loss": 0.4503, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.20922491678554447, |
| "grad_norm": 0.4253140389919281, |
| "learning_rate": 9.9333121625913e-05, |
| "loss": 0.538, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.21112696148359486, |
| "grad_norm": 0.4123047888278961, |
| "learning_rate": 9.932677040330264e-05, |
| "loss": 0.4359, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.21302900618164528, |
| "grad_norm": 0.3887772262096405, |
| "learning_rate": 9.932041918069229e-05, |
| "loss": 0.5534, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.21493105087969566, |
| "grad_norm": 0.38153669238090515, |
| "learning_rate": 9.931406795808193e-05, |
| "loss": 0.4296, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.21683309557774608, |
| "grad_norm": 0.43017521500587463, |
| "learning_rate": 9.930771673547158e-05, |
| "loss": 0.5899, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.2187351402757965, |
| "grad_norm": 0.40156394243240356, |
| "learning_rate": 9.930136551286123e-05, |
| "loss": 0.3917, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.22063718497384688, |
| "grad_norm": 0.3576590120792389, |
| "learning_rate": 9.929501429025087e-05, |
| "loss": 0.3908, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.2225392296718973, |
| "grad_norm": 0.33245769143104553, |
| "learning_rate": 9.928866306764054e-05, |
| "loss": 0.4043, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.2244412743699477, |
| "grad_norm": 0.43169739842414856, |
| "learning_rate": 9.928231184503018e-05, |
| "loss": 0.5569, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.2263433190679981, |
| "grad_norm": 0.4004412293434143, |
| "learning_rate": 9.927596062241981e-05, |
| "loss": 0.4931, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.2282453637660485, |
| "grad_norm": 0.3550797998905182, |
| "learning_rate": 9.926960939980947e-05, |
| "loss": 0.4505, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.2301474084640989, |
| "grad_norm": 0.3701287508010864, |
| "learning_rate": 9.926325817719912e-05, |
| "loss": 0.4967, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.2320494531621493, |
| "grad_norm": 0.4120308756828308, |
| "learning_rate": 9.925690695458876e-05, |
| "loss": 0.4408, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.23395149786019973, |
| "grad_norm": 0.4737403392791748, |
| "learning_rate": 9.925055573197841e-05, |
| "loss": 0.7221, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.2358535425582501, |
| "grad_norm": 0.37103158235549927, |
| "learning_rate": 9.924420450936806e-05, |
| "loss": 0.4419, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.23775558725630053, |
| "grad_norm": 0.48644623160362244, |
| "learning_rate": 9.923785328675771e-05, |
| "loss": 0.5006, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.2396576319543509, |
| "grad_norm": 0.3381918966770172, |
| "learning_rate": 9.923150206414735e-05, |
| "loss": 0.4786, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.24155967665240133, |
| "grad_norm": 0.4500490128993988, |
| "learning_rate": 9.9225150841537e-05, |
| "loss": 0.4984, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.24346172135045174, |
| "grad_norm": 0.5506143569946289, |
| "learning_rate": 9.921879961892665e-05, |
| "loss": 0.4857, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.24536376604850213, |
| "grad_norm": 0.4111080467700958, |
| "learning_rate": 9.921244839631629e-05, |
| "loss": 0.4464, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.24726581074655254, |
| "grad_norm": 0.52936851978302, |
| "learning_rate": 9.920609717370594e-05, |
| "loss": 0.5664, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.24916785544460296, |
| "grad_norm": 0.465009480714798, |
| "learning_rate": 9.91997459510956e-05, |
| "loss": 0.4318, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.25106990014265335, |
| "grad_norm": 0.3044665455818176, |
| "learning_rate": 9.919339472848523e-05, |
| "loss": 0.4284, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.25297194484070373, |
| "grad_norm": 0.4849638342857361, |
| "learning_rate": 9.918704350587488e-05, |
| "loss": 0.5956, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.2548739895387542, |
| "grad_norm": 0.4701893925666809, |
| "learning_rate": 9.918069228326454e-05, |
| "loss": 0.4541, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.25677603423680456, |
| "grad_norm": 0.42524924874305725, |
| "learning_rate": 9.917434106065419e-05, |
| "loss": 0.4991, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.25867807893485495, |
| "grad_norm": 0.46284592151641846, |
| "learning_rate": 9.916798983804383e-05, |
| "loss": 0.453, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.2605801236329054, |
| "grad_norm": 0.40281572937965393, |
| "learning_rate": 9.916163861543348e-05, |
| "loss": 0.4771, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.2624821683309558, |
| "grad_norm": 0.425214558839798, |
| "learning_rate": 9.915528739282313e-05, |
| "loss": 0.4665, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.26438421302900617, |
| "grad_norm": 0.4181045889854431, |
| "learning_rate": 9.914893617021277e-05, |
| "loss": 0.5014, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.2662862577270566, |
| "grad_norm": 0.4024779498577118, |
| "learning_rate": 9.914258494760241e-05, |
| "loss": 0.5905, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.268188302425107, |
| "grad_norm": 0.3768770694732666, |
| "learning_rate": 9.913623372499207e-05, |
| "loss": 0.408, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.2700903471231574, |
| "grad_norm": 0.4033905267715454, |
| "learning_rate": 9.912988250238171e-05, |
| "loss": 0.4511, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.2719923918212078, |
| "grad_norm": 0.32505708932876587, |
| "learning_rate": 9.912353127977136e-05, |
| "loss": 0.4395, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.2738944365192582, |
| "grad_norm": 0.3487790822982788, |
| "learning_rate": 9.9117180057161e-05, |
| "loss": 0.3601, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.2757964812173086, |
| "grad_norm": 0.30558326840400696, |
| "learning_rate": 9.911082883455065e-05, |
| "loss": 0.4607, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.277698525915359, |
| "grad_norm": 0.3752080500125885, |
| "learning_rate": 9.91044776119403e-05, |
| "loss": 0.3957, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.2796005706134094, |
| "grad_norm": 0.3506644368171692, |
| "learning_rate": 9.909812638932994e-05, |
| "loss": 0.366, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.2815026153114598, |
| "grad_norm": 0.43430307507514954, |
| "learning_rate": 9.909177516671961e-05, |
| "loss": 0.4542, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.2834046600095102, |
| "grad_norm": 0.41930171847343445, |
| "learning_rate": 9.908542394410925e-05, |
| "loss": 0.709, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.28530670470756064, |
| "grad_norm": 0.3717108964920044, |
| "learning_rate": 9.907907272149888e-05, |
| "loss": 0.4701, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.28720874940561103, |
| "grad_norm": 0.4177984595298767, |
| "learning_rate": 9.907272149888854e-05, |
| "loss": 0.6189, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.2891107941036614, |
| "grad_norm": 0.37706881761550903, |
| "learning_rate": 9.906637027627819e-05, |
| "loss": 0.4546, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.29101283880171186, |
| "grad_norm": 0.4210599660873413, |
| "learning_rate": 9.906001905366784e-05, |
| "loss": 0.4716, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.29291488349976225, |
| "grad_norm": 0.3707990050315857, |
| "learning_rate": 9.905366783105748e-05, |
| "loss": 0.4644, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.29481692819781263, |
| "grad_norm": 0.36913537979125977, |
| "learning_rate": 9.904731660844713e-05, |
| "loss": 0.4605, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.2967189728958631, |
| "grad_norm": 0.41291072964668274, |
| "learning_rate": 9.904096538583678e-05, |
| "loss": 0.4294, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.29862101759391346, |
| "grad_norm": 0.30809640884399414, |
| "learning_rate": 9.903461416322642e-05, |
| "loss": 0.4369, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.30052306229196385, |
| "grad_norm": 0.4266267716884613, |
| "learning_rate": 9.902826294061607e-05, |
| "loss": 0.456, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.3024251069900143, |
| "grad_norm": 0.37408629059791565, |
| "learning_rate": 9.902191171800572e-05, |
| "loss": 0.4359, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.3043271516880647, |
| "grad_norm": 0.40199100971221924, |
| "learning_rate": 9.901556049539536e-05, |
| "loss": 0.4433, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.30622919638611507, |
| "grad_norm": 0.3430602252483368, |
| "learning_rate": 9.900920927278501e-05, |
| "loss": 0.4317, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.30813124108416545, |
| "grad_norm": 0.5091786980628967, |
| "learning_rate": 9.900285805017467e-05, |
| "loss": 0.5824, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.3100332857822159, |
| "grad_norm": 0.34287527203559875, |
| "learning_rate": 9.89965068275643e-05, |
| "loss": 0.4025, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.3119353304802663, |
| "grad_norm": 0.4919246733188629, |
| "learning_rate": 9.899015560495396e-05, |
| "loss": 0.5612, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.31383737517831667, |
| "grad_norm": 0.35404297709465027, |
| "learning_rate": 9.898380438234361e-05, |
| "loss": 0.4731, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.3157394198763671, |
| "grad_norm": 0.3590085506439209, |
| "learning_rate": 9.897745315973326e-05, |
| "loss": 0.4365, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.3176414645744175, |
| "grad_norm": 0.4132196605205536, |
| "learning_rate": 9.89711019371229e-05, |
| "loss": 0.3485, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.3195435092724679, |
| "grad_norm": 0.46459728479385376, |
| "learning_rate": 9.896475071451255e-05, |
| "loss": 0.4327, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.3214455539705183, |
| "grad_norm": 0.435651957988739, |
| "learning_rate": 9.89583994919022e-05, |
| "loss": 0.4684, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.3233475986685687, |
| "grad_norm": 0.38278958201408386, |
| "learning_rate": 9.895204826929184e-05, |
| "loss": 0.4265, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.3252496433666191, |
| "grad_norm": 0.31499558687210083, |
| "learning_rate": 9.894569704668149e-05, |
| "loss": 0.4099, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.32715168806466954, |
| "grad_norm": 0.40141284465789795, |
| "learning_rate": 9.893934582407114e-05, |
| "loss": 0.4461, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.32905373276271993, |
| "grad_norm": 0.42945384979248047, |
| "learning_rate": 9.893299460146078e-05, |
| "loss": 0.4379, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.3309557774607703, |
| "grad_norm": 0.5186269283294678, |
| "learning_rate": 9.892664337885043e-05, |
| "loss": 0.5134, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.33285782215882076, |
| "grad_norm": 0.3771612048149109, |
| "learning_rate": 9.892029215624009e-05, |
| "loss": 0.4617, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.33475986685687115, |
| "grad_norm": 0.48396849632263184, |
| "learning_rate": 9.891394093362972e-05, |
| "loss": 0.4944, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.33666191155492153, |
| "grad_norm": 0.5303121209144592, |
| "learning_rate": 9.890758971101938e-05, |
| "loss": 0.4049, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.3385639562529719, |
| "grad_norm": 0.33063024282455444, |
| "learning_rate": 9.890123848840901e-05, |
| "loss": 0.401, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.34046600095102236, |
| "grad_norm": 0.3764759302139282, |
| "learning_rate": 9.889488726579868e-05, |
| "loss": 0.4222, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.34236804564907275, |
| "grad_norm": 0.27206951379776, |
| "learning_rate": 9.888853604318832e-05, |
| "loss": 0.3206, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.34427009034712314, |
| "grad_norm": 0.3893122971057892, |
| "learning_rate": 9.888218482057796e-05, |
| "loss": 0.3558, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.3461721350451736, |
| "grad_norm": 0.42340540885925293, |
| "learning_rate": 9.887583359796762e-05, |
| "loss": 0.3948, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.34807417974322397, |
| "grad_norm": 0.4103796184062958, |
| "learning_rate": 9.886948237535726e-05, |
| "loss": 0.4769, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.34997622444127435, |
| "grad_norm": 0.39225244522094727, |
| "learning_rate": 9.886313115274691e-05, |
| "loss": 0.441, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.3518782691393248, |
| "grad_norm": 0.3774043023586273, |
| "learning_rate": 9.885677993013655e-05, |
| "loss": 0.3018, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.3537803138373752, |
| "grad_norm": 0.4012366235256195, |
| "learning_rate": 9.88504287075262e-05, |
| "loss": 0.4217, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.35568235853542557, |
| "grad_norm": 0.37299972772598267, |
| "learning_rate": 9.884407748491585e-05, |
| "loss": 0.4518, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.357584403233476, |
| "grad_norm": 0.34713125228881836, |
| "learning_rate": 9.883772626230549e-05, |
| "loss": 0.3882, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.3594864479315264, |
| "grad_norm": 0.4148958623409271, |
| "learning_rate": 9.883137503969516e-05, |
| "loss": 0.4979, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.3613884926295768, |
| "grad_norm": 0.3979155421257019, |
| "learning_rate": 9.88250238170848e-05, |
| "loss": 0.3854, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.36329053732762717, |
| "grad_norm": 0.42723751068115234, |
| "learning_rate": 9.881867259447443e-05, |
| "loss": 0.4325, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.3651925820256776, |
| "grad_norm": 0.4195951521396637, |
| "learning_rate": 9.881232137186409e-05, |
| "loss": 0.3917, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.367094626723728, |
| "grad_norm": 0.43937554955482483, |
| "learning_rate": 9.880597014925374e-05, |
| "loss": 0.3907, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.3689966714217784, |
| "grad_norm": 0.3176072835922241, |
| "learning_rate": 9.879961892664338e-05, |
| "loss": 0.3581, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.37089871611982883, |
| "grad_norm": 0.39909854531288147, |
| "learning_rate": 9.879326770403303e-05, |
| "loss": 0.5881, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.3728007608178792, |
| "grad_norm": 0.35058659315109253, |
| "learning_rate": 9.878691648142268e-05, |
| "loss": 0.4753, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.3747028055159296, |
| "grad_norm": 0.3353765904903412, |
| "learning_rate": 9.878056525881233e-05, |
| "loss": 0.4014, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.37660485021398005, |
| "grad_norm": 0.4102007746696472, |
| "learning_rate": 9.877421403620197e-05, |
| "loss": 0.4841, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.37850689491203043, |
| "grad_norm": 0.45450812578201294, |
| "learning_rate": 9.876786281359162e-05, |
| "loss": 0.4655, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.3804089396100808, |
| "grad_norm": 0.32525572180747986, |
| "learning_rate": 9.876151159098127e-05, |
| "loss": 0.3869, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.38231098430813126, |
| "grad_norm": 0.4488207697868347, |
| "learning_rate": 9.875516036837091e-05, |
| "loss": 0.4743, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.38421302900618165, |
| "grad_norm": 0.432962030172348, |
| "learning_rate": 9.874880914576056e-05, |
| "loss": 0.4171, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.38611507370423204, |
| "grad_norm": 0.4264095723628998, |
| "learning_rate": 9.874245792315022e-05, |
| "loss": 0.4344, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.3880171184022825, |
| "grad_norm": 0.43752139806747437, |
| "learning_rate": 9.873610670053985e-05, |
| "loss": 0.5248, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.38991916310033287, |
| "grad_norm": 0.42547503113746643, |
| "learning_rate": 9.87297554779295e-05, |
| "loss": 0.4011, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.39182120779838325, |
| "grad_norm": 0.34600159525871277, |
| "learning_rate": 9.872340425531916e-05, |
| "loss": 0.3444, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.39372325249643364, |
| "grad_norm": 0.3614776134490967, |
| "learning_rate": 9.871705303270881e-05, |
| "loss": 0.4784, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.3956252971944841, |
| "grad_norm": 0.47591882944107056, |
| "learning_rate": 9.871070181009845e-05, |
| "loss": 0.5159, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.39752734189253447, |
| "grad_norm": 0.3321515917778015, |
| "learning_rate": 9.870435058748809e-05, |
| "loss": 0.4382, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.39942938659058486, |
| "grad_norm": 0.45849499106407166, |
| "learning_rate": 9.869799936487775e-05, |
| "loss": 0.4269, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.4013314312886353, |
| "grad_norm": 0.3666900098323822, |
| "learning_rate": 9.869164814226739e-05, |
| "loss": 0.4077, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.4032334759866857, |
| "grad_norm": 0.3387741446495056, |
| "learning_rate": 9.868529691965703e-05, |
| "loss": 0.4485, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.4051355206847361, |
| "grad_norm": 0.3360239267349243, |
| "learning_rate": 9.86789456970467e-05, |
| "loss": 0.4042, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.4070375653827865, |
| "grad_norm": 0.40923500061035156, |
| "learning_rate": 9.867259447443633e-05, |
| "loss": 0.5001, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.4089396100808369, |
| "grad_norm": 0.3974573314189911, |
| "learning_rate": 9.866624325182598e-05, |
| "loss": 0.4984, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.4108416547788873, |
| "grad_norm": 0.4095960557460785, |
| "learning_rate": 9.865989202921562e-05, |
| "loss": 0.3837, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.41274369947693773, |
| "grad_norm": 0.3334168493747711, |
| "learning_rate": 9.865354080660527e-05, |
| "loss": 0.3935, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.4146457441749881, |
| "grad_norm": 0.5007266998291016, |
| "learning_rate": 9.864718958399493e-05, |
| "loss": 0.4443, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.4165477888730385, |
| "grad_norm": 0.35881495475769043, |
| "learning_rate": 9.864083836138456e-05, |
| "loss": 0.3835, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.41844983357108895, |
| "grad_norm": 0.3785092830657959, |
| "learning_rate": 9.863448713877423e-05, |
| "loss": 0.3884, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.42035187826913933, |
| "grad_norm": 0.41435107588768005, |
| "learning_rate": 9.862813591616387e-05, |
| "loss": 0.4116, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.4222539229671897, |
| "grad_norm": 0.41338756680488586, |
| "learning_rate": 9.86217846935535e-05, |
| "loss": 0.5235, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.4241559676652401, |
| "grad_norm": 0.4335710406303406, |
| "learning_rate": 9.861543347094316e-05, |
| "loss": 0.516, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.42605801236329055, |
| "grad_norm": 0.37374967336654663, |
| "learning_rate": 9.860908224833281e-05, |
| "loss": 0.4663, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.42796005706134094, |
| "grad_norm": 0.3213825821876526, |
| "learning_rate": 9.860273102572246e-05, |
| "loss": 0.3636, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.4298621017593913, |
| "grad_norm": 0.41535523533821106, |
| "learning_rate": 9.85963798031121e-05, |
| "loss": 0.3677, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.43176414645744177, |
| "grad_norm": 0.3543884754180908, |
| "learning_rate": 9.859002858050175e-05, |
| "loss": 0.376, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.43366619115549215, |
| "grad_norm": 0.4012312889099121, |
| "learning_rate": 9.85836773578914e-05, |
| "loss": 0.4886, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.43556823585354254, |
| "grad_norm": 0.3928169310092926, |
| "learning_rate": 9.857732613528104e-05, |
| "loss": 0.3741, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.437470280551593, |
| "grad_norm": 0.4982980191707611, |
| "learning_rate": 9.85709749126707e-05, |
| "loss": 0.5704, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.43937232524964337, |
| "grad_norm": 0.356545090675354, |
| "learning_rate": 9.856462369006035e-05, |
| "loss": 0.3618, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.44127436994769376, |
| "grad_norm": 0.5087487697601318, |
| "learning_rate": 9.855827246744998e-05, |
| "loss": 0.4733, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.4431764146457442, |
| "grad_norm": 0.3566097021102905, |
| "learning_rate": 9.855192124483964e-05, |
| "loss": 0.3771, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.4450784593437946, |
| "grad_norm": 0.3210541605949402, |
| "learning_rate": 9.854557002222929e-05, |
| "loss": 0.4341, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.446980504041845, |
| "grad_norm": 0.25422924757003784, |
| "learning_rate": 9.853921879961893e-05, |
| "loss": 0.3987, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.4488825487398954, |
| "grad_norm": 0.39164894819259644, |
| "learning_rate": 9.853286757700858e-05, |
| "loss": 0.4149, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.4507845934379458, |
| "grad_norm": 0.37471455335617065, |
| "learning_rate": 9.852651635439823e-05, |
| "loss": 0.4471, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.4526866381359962, |
| "grad_norm": 0.37678262591362, |
| "learning_rate": 9.852016513178788e-05, |
| "loss": 0.3943, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.4545886828340466, |
| "grad_norm": 0.4653976857662201, |
| "learning_rate": 9.851381390917752e-05, |
| "loss": 0.4848, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.456490727532097, |
| "grad_norm": 0.46764564514160156, |
| "learning_rate": 9.850746268656717e-05, |
| "loss": 0.4624, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.4583927722301474, |
| "grad_norm": 0.3803463876247406, |
| "learning_rate": 9.850111146395682e-05, |
| "loss": 0.442, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.4602948169281978, |
| "grad_norm": 0.33662229776382446, |
| "learning_rate": 9.849476024134646e-05, |
| "loss": 0.4564, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.46219686162624823, |
| "grad_norm": 0.42181041836738586, |
| "learning_rate": 9.848840901873611e-05, |
| "loss": 0.4702, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.4640989063242986, |
| "grad_norm": 0.40373390913009644, |
| "learning_rate": 9.848205779612576e-05, |
| "loss": 0.3745, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.466000951022349, |
| "grad_norm": 0.36634379625320435, |
| "learning_rate": 9.84757065735154e-05, |
| "loss": 0.428, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.46790299572039945, |
| "grad_norm": 0.35369235277175903, |
| "learning_rate": 9.846935535090506e-05, |
| "loss": 0.3986, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.46980504041844984, |
| "grad_norm": 0.4154004454612732, |
| "learning_rate": 9.846300412829471e-05, |
| "loss": 0.3512, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.4717070851165002, |
| "grad_norm": 0.3689868450164795, |
| "learning_rate": 9.845665290568435e-05, |
| "loss": 0.3708, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.47360912981455067, |
| "grad_norm": 0.38414841890335083, |
| "learning_rate": 9.8450301683074e-05, |
| "loss": 0.3401, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.47551117451260105, |
| "grad_norm": 0.39936143159866333, |
| "learning_rate": 9.844395046046364e-05, |
| "loss": 0.4328, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.47741321921065144, |
| "grad_norm": 0.30578187108039856, |
| "learning_rate": 9.84375992378533e-05, |
| "loss": 0.3694, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.4793152639087018, |
| "grad_norm": 0.39497658610343933, |
| "learning_rate": 9.843124801524294e-05, |
| "loss": 0.3945, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.48121730860675227, |
| "grad_norm": 0.44466689229011536, |
| "learning_rate": 9.842489679263258e-05, |
| "loss": 0.4485, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.48311935330480266, |
| "grad_norm": 0.3614617586135864, |
| "learning_rate": 9.841854557002223e-05, |
| "loss": 0.3701, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.48502139800285304, |
| "grad_norm": 0.3102608621120453, |
| "learning_rate": 9.841219434741188e-05, |
| "loss": 0.3677, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.4869234427009035, |
| "grad_norm": 0.36049678921699524, |
| "learning_rate": 9.840584312480153e-05, |
| "loss": 0.411, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.4888254873989539, |
| "grad_norm": 0.4025668501853943, |
| "learning_rate": 9.839949190219117e-05, |
| "loss": 0.433, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.49072753209700426, |
| "grad_norm": 0.4131562113761902, |
| "learning_rate": 9.839314067958082e-05, |
| "loss": 0.4818, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.4926295767950547, |
| "grad_norm": 0.481468141078949, |
| "learning_rate": 9.838678945697047e-05, |
| "loss": 0.5226, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.4945316214931051, |
| "grad_norm": 0.2845190167427063, |
| "learning_rate": 9.838043823436011e-05, |
| "loss": 0.3323, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.4964336661911555, |
| "grad_norm": 0.40381497144699097, |
| "learning_rate": 9.837408701174976e-05, |
| "loss": 0.4025, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.4983357108892059, |
| "grad_norm": 0.4109043478965759, |
| "learning_rate": 9.836773578913942e-05, |
| "loss": 0.4429, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.5002377555872562, |
| "grad_norm": 0.4256783425807953, |
| "learning_rate": 9.836138456652906e-05, |
| "loss": 0.3994, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.5021398002853067, |
| "grad_norm": 0.35044407844543457, |
| "learning_rate": 9.835503334391871e-05, |
| "loss": 0.4431, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.5040418449833571, |
| "grad_norm": 0.4456939697265625, |
| "learning_rate": 9.834868212130836e-05, |
| "loss": 0.5424, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.5059438896814075, |
| "grad_norm": 0.36340197920799255, |
| "learning_rate": 9.8342330898698e-05, |
| "loss": 0.4199, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.5078459343794579, |
| "grad_norm": 0.4018803536891937, |
| "learning_rate": 9.833597967608765e-05, |
| "loss": 0.4132, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.5097479790775084, |
| "grad_norm": 0.3372616469860077, |
| "learning_rate": 9.83296284534773e-05, |
| "loss": 0.3239, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.5116500237755587, |
| "grad_norm": 0.4497722387313843, |
| "learning_rate": 9.832327723086695e-05, |
| "loss": 0.4019, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.5135520684736091, |
| "grad_norm": 0.422269344329834, |
| "learning_rate": 9.831692600825659e-05, |
| "loss": 0.45, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.5154541131716596, |
| "grad_norm": 0.4167305529117584, |
| "learning_rate": 9.831057478564624e-05, |
| "loss": 0.4172, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.5173561578697099, |
| "grad_norm": 0.4340919554233551, |
| "learning_rate": 9.83042235630359e-05, |
| "loss": 0.5042, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.5192582025677603, |
| "grad_norm": 0.4179072380065918, |
| "learning_rate": 9.829787234042553e-05, |
| "loss": 0.3499, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.5211602472658108, |
| "grad_norm": 0.39216554164886475, |
| "learning_rate": 9.829152111781518e-05, |
| "loss": 0.4729, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.5230622919638611, |
| "grad_norm": 0.4485825002193451, |
| "learning_rate": 9.828516989520484e-05, |
| "loss": 0.4449, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.5249643366619116, |
| "grad_norm": 0.3843270242214203, |
| "learning_rate": 9.827881867259447e-05, |
| "loss": 0.5416, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.526866381359962, |
| "grad_norm": 0.30829140543937683, |
| "learning_rate": 9.827246744998413e-05, |
| "loss": 0.4004, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.5287684260580123, |
| "grad_norm": 0.2905525863170624, |
| "learning_rate": 9.826611622737378e-05, |
| "loss": 0.3574, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.5306704707560628, |
| "grad_norm": 0.3848637342453003, |
| "learning_rate": 9.825976500476343e-05, |
| "loss": 0.4021, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.5325725154541132, |
| "grad_norm": 0.32691988348960876, |
| "learning_rate": 9.825341378215307e-05, |
| "loss": 0.4317, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.5344745601521635, |
| "grad_norm": 0.3506065011024475, |
| "learning_rate": 9.824706255954271e-05, |
| "loss": 0.329, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.536376604850214, |
| "grad_norm": 0.3102387487888336, |
| "learning_rate": 9.824071133693237e-05, |
| "loss": 0.3695, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.5382786495482644, |
| "grad_norm": 0.45750680565834045, |
| "learning_rate": 9.823436011432201e-05, |
| "loss": 0.4232, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.5401806942463148, |
| "grad_norm": 0.297134131193161, |
| "learning_rate": 9.822800889171165e-05, |
| "loss": 0.4137, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.5420827389443652, |
| "grad_norm": 0.3696708679199219, |
| "learning_rate": 9.822165766910131e-05, |
| "loss": 0.4598, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.5439847836424156, |
| "grad_norm": 0.31236112117767334, |
| "learning_rate": 9.821530644649095e-05, |
| "loss": 0.314, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.545886828340466, |
| "grad_norm": 0.3596087694168091, |
| "learning_rate": 9.82089552238806e-05, |
| "loss": 0.4164, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.5477888730385164, |
| "grad_norm": 0.33347079157829285, |
| "learning_rate": 9.820260400127024e-05, |
| "loss": 0.3915, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.5496909177365669, |
| "grad_norm": 0.37818920612335205, |
| "learning_rate": 9.81962527786599e-05, |
| "loss": 0.3994, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.5515929624346172, |
| "grad_norm": 0.3968106806278229, |
| "learning_rate": 9.818990155604955e-05, |
| "loss": 0.3611, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.5534950071326676, |
| "grad_norm": 0.34991270303726196, |
| "learning_rate": 9.818355033343918e-05, |
| "loss": 0.3703, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.555397051830718, |
| "grad_norm": 0.4046263098716736, |
| "learning_rate": 9.817719911082885e-05, |
| "loss": 0.3302, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.5572990965287684, |
| "grad_norm": 0.35804587602615356, |
| "learning_rate": 9.817084788821849e-05, |
| "loss": 0.373, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.5592011412268189, |
| "grad_norm": 0.3538301885128021, |
| "learning_rate": 9.816449666560813e-05, |
| "loss": 0.3482, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.5611031859248692, |
| "grad_norm": 0.36835455894470215, |
| "learning_rate": 9.815814544299778e-05, |
| "loss": 0.3393, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.5630052306229196, |
| "grad_norm": 0.48919835686683655, |
| "learning_rate": 9.815179422038743e-05, |
| "loss": 0.4213, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.5649072753209701, |
| "grad_norm": 0.3472330570220947, |
| "learning_rate": 9.814544299777708e-05, |
| "loss": 0.3996, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.5668093200190204, |
| "grad_norm": 0.428611159324646, |
| "learning_rate": 9.813909177516672e-05, |
| "loss": 0.4524, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.5687113647170708, |
| "grad_norm": 0.4176979959011078, |
| "learning_rate": 9.813274055255637e-05, |
| "loss": 0.3787, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.5706134094151213, |
| "grad_norm": 0.41548797488212585, |
| "learning_rate": 9.812638932994602e-05, |
| "loss": 0.4758, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.5725154541131716, |
| "grad_norm": 0.3926902413368225, |
| "learning_rate": 9.812003810733566e-05, |
| "loss": 0.434, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.5744174988112221, |
| "grad_norm": 0.392846018075943, |
| "learning_rate": 9.811368688472531e-05, |
| "loss": 0.3928, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.5763195435092725, |
| "grad_norm": 0.36347585916519165, |
| "learning_rate": 9.810733566211497e-05, |
| "loss": 0.4264, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.5782215882073228, |
| "grad_norm": 0.4314410090446472, |
| "learning_rate": 9.81009844395046e-05, |
| "loss": 0.4199, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.5801236329053733, |
| "grad_norm": 0.337494820356369, |
| "learning_rate": 9.809463321689426e-05, |
| "loss": 0.4181, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.5820256776034237, |
| "grad_norm": 0.27786335349082947, |
| "learning_rate": 9.808828199428391e-05, |
| "loss": 0.3, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.583927722301474, |
| "grad_norm": 0.37235599756240845, |
| "learning_rate": 9.808193077167355e-05, |
| "loss": 0.3927, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.5858297669995245, |
| "grad_norm": 0.37353670597076416, |
| "learning_rate": 9.80755795490632e-05, |
| "loss": 0.4146, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.5877318116975749, |
| "grad_norm": 0.3919946551322937, |
| "learning_rate": 9.806922832645285e-05, |
| "loss": 0.5055, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.5896338563956253, |
| "grad_norm": 0.45411062240600586, |
| "learning_rate": 9.80628771038425e-05, |
| "loss": 0.5347, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.5915359010936757, |
| "grad_norm": 0.4087005853652954, |
| "learning_rate": 9.805652588123214e-05, |
| "loss": 0.3732, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.5934379457917262, |
| "grad_norm": 0.313297837972641, |
| "learning_rate": 9.805017465862178e-05, |
| "loss": 0.3093, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.5953399904897765, |
| "grad_norm": 0.40149226784706116, |
| "learning_rate": 9.804382343601144e-05, |
| "loss": 0.4404, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.5972420351878269, |
| "grad_norm": 0.34245574474334717, |
| "learning_rate": 9.803747221340108e-05, |
| "loss": 0.4036, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.5991440798858774, |
| "grad_norm": 0.38059449195861816, |
| "learning_rate": 9.803112099079073e-05, |
| "loss": 0.3763, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.6010461245839277, |
| "grad_norm": 0.4539381265640259, |
| "learning_rate": 9.802476976818039e-05, |
| "loss": 0.4551, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.6029481692819781, |
| "grad_norm": 0.4077235460281372, |
| "learning_rate": 9.801841854557002e-05, |
| "loss": 0.4641, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.6048502139800286, |
| "grad_norm": 0.3426643908023834, |
| "learning_rate": 9.801206732295968e-05, |
| "loss": 0.3684, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.6067522586780789, |
| "grad_norm": 0.3042270839214325, |
| "learning_rate": 9.800571610034931e-05, |
| "loss": 0.373, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.6086543033761294, |
| "grad_norm": 0.4373973309993744, |
| "learning_rate": 9.799936487773897e-05, |
| "loss": 0.5442, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.6105563480741797, |
| "grad_norm": 0.385797917842865, |
| "learning_rate": 9.799301365512862e-05, |
| "loss": 0.4218, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.6124583927722301, |
| "grad_norm": 0.33210891485214233, |
| "learning_rate": 9.798666243251826e-05, |
| "loss": 0.3062, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.6143604374702806, |
| "grad_norm": 0.3997063636779785, |
| "learning_rate": 9.798031120990792e-05, |
| "loss": 0.4104, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.6162624821683309, |
| "grad_norm": 0.4837460219860077, |
| "learning_rate": 9.797395998729756e-05, |
| "loss": 0.5271, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.6181645268663813, |
| "grad_norm": 0.36420971155166626, |
| "learning_rate": 9.79676087646872e-05, |
| "loss": 0.4033, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.6200665715644318, |
| "grad_norm": 0.33610865473747253, |
| "learning_rate": 9.796125754207685e-05, |
| "loss": 0.3992, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.6219686162624821, |
| "grad_norm": 0.28999099135398865, |
| "learning_rate": 9.79549063194665e-05, |
| "loss": 0.3675, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.6238706609605326, |
| "grad_norm": 0.359401673078537, |
| "learning_rate": 9.794855509685615e-05, |
| "loss": 0.4363, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.625772705658583, |
| "grad_norm": 0.3948569595813751, |
| "learning_rate": 9.794220387424579e-05, |
| "loss": 0.3698, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.6276747503566333, |
| "grad_norm": 0.3753513991832733, |
| "learning_rate": 9.793585265163544e-05, |
| "loss": 0.4397, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.6295767950546838, |
| "grad_norm": 0.32612451910972595, |
| "learning_rate": 9.79295014290251e-05, |
| "loss": 0.3846, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.6314788397527342, |
| "grad_norm": 0.40796539187431335, |
| "learning_rate": 9.792315020641473e-05, |
| "loss": 0.371, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.6333808844507846, |
| "grad_norm": 0.4358294904232025, |
| "learning_rate": 9.791679898380439e-05, |
| "loss": 0.4052, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.635282929148835, |
| "grad_norm": 0.39615437388420105, |
| "learning_rate": 9.791044776119404e-05, |
| "loss": 0.3686, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.6371849738468854, |
| "grad_norm": 0.32977715134620667, |
| "learning_rate": 9.790409653858368e-05, |
| "loss": 0.4404, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.6390870185449358, |
| "grad_norm": 0.38361093401908875, |
| "learning_rate": 9.789774531597333e-05, |
| "loss": 0.3709, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.6409890632429862, |
| "grad_norm": 0.40280988812446594, |
| "learning_rate": 9.789139409336298e-05, |
| "loss": 0.3322, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.6428911079410367, |
| "grad_norm": 0.3682766854763031, |
| "learning_rate": 9.788504287075262e-05, |
| "loss": 0.4144, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.644793152639087, |
| "grad_norm": 0.39864271879196167, |
| "learning_rate": 9.787869164814227e-05, |
| "loss": 0.4404, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.6466951973371374, |
| "grad_norm": 0.3244321048259735, |
| "learning_rate": 9.787234042553192e-05, |
| "loss": 0.3541, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.6485972420351879, |
| "grad_norm": 0.323403924703598, |
| "learning_rate": 9.786598920292157e-05, |
| "loss": 0.3374, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.6504992867332382, |
| "grad_norm": 0.3881044387817383, |
| "learning_rate": 9.785963798031121e-05, |
| "loss": 0.4415, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.6524013314312886, |
| "grad_norm": 0.35189467668533325, |
| "learning_rate": 9.785328675770086e-05, |
| "loss": 0.401, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.6543033761293391, |
| "grad_norm": 0.3553767800331116, |
| "learning_rate": 9.784693553509052e-05, |
| "loss": 0.456, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.6562054208273894, |
| "grad_norm": 0.3302605152130127, |
| "learning_rate": 9.784058431248015e-05, |
| "loss": 0.472, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.6581074655254399, |
| "grad_norm": 0.4526873826980591, |
| "learning_rate": 9.78342330898698e-05, |
| "loss": 0.3908, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.6600095102234903, |
| "grad_norm": 0.3232348561286926, |
| "learning_rate": 9.782788186725946e-05, |
| "loss": 0.3421, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.6619115549215406, |
| "grad_norm": 0.38508203625679016, |
| "learning_rate": 9.78215306446491e-05, |
| "loss": 0.4093, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.6638135996195911, |
| "grad_norm": 0.3187748193740845, |
| "learning_rate": 9.781517942203875e-05, |
| "loss": 0.4319, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.6657156443176415, |
| "grad_norm": 0.2614807188510895, |
| "learning_rate": 9.78088281994284e-05, |
| "loss": 0.314, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.6676176890156919, |
| "grad_norm": 0.40218180418014526, |
| "learning_rate": 9.780247697681805e-05, |
| "loss": 0.4404, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.6695197337137423, |
| "grad_norm": 0.4016517996788025, |
| "learning_rate": 9.779612575420769e-05, |
| "loss": 0.5063, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.6714217784117926, |
| "grad_norm": 0.3333278000354767, |
| "learning_rate": 9.778977453159733e-05, |
| "loss": 0.2966, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.6733238231098431, |
| "grad_norm": 0.4535547196865082, |
| "learning_rate": 9.778342330898699e-05, |
| "loss": 0.4077, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.6752258678078935, |
| "grad_norm": 0.4180653393268585, |
| "learning_rate": 9.777707208637663e-05, |
| "loss": 0.4554, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.6771279125059438, |
| "grad_norm": 0.43454670906066895, |
| "learning_rate": 9.777072086376627e-05, |
| "loss": 0.4403, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.6790299572039943, |
| "grad_norm": 0.45290321111679077, |
| "learning_rate": 9.776436964115594e-05, |
| "loss": 0.4037, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.6809320019020447, |
| "grad_norm": 0.34165212512016296, |
| "learning_rate": 9.775801841854557e-05, |
| "loss": 0.3044, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.6828340466000951, |
| "grad_norm": 0.435138463973999, |
| "learning_rate": 9.775166719593523e-05, |
| "loss": 0.4293, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.6847360912981455, |
| "grad_norm": 0.36061882972717285, |
| "learning_rate": 9.774531597332486e-05, |
| "loss": 0.4052, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.6866381359961959, |
| "grad_norm": 0.4023354947566986, |
| "learning_rate": 9.773896475071452e-05, |
| "loss": 0.4232, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.6885401806942463, |
| "grad_norm": 0.39200109243392944, |
| "learning_rate": 9.773261352810417e-05, |
| "loss": 0.3882, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.6904422253922967, |
| "grad_norm": 0.34504035115242004, |
| "learning_rate": 9.77262623054938e-05, |
| "loss": 0.4063, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.6923442700903472, |
| "grad_norm": 0.31081900000572205, |
| "learning_rate": 9.771991108288346e-05, |
| "loss": 0.251, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.6942463147883975, |
| "grad_norm": 0.3800300061702728, |
| "learning_rate": 9.771355986027311e-05, |
| "loss": 0.3722, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.6961483594864479, |
| "grad_norm": 0.3476494550704956, |
| "learning_rate": 9.770720863766275e-05, |
| "loss": 0.382, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.6980504041844984, |
| "grad_norm": 0.38069918751716614, |
| "learning_rate": 9.77008574150524e-05, |
| "loss": 0.4329, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.6999524488825487, |
| "grad_norm": 0.4034759998321533, |
| "learning_rate": 9.769450619244205e-05, |
| "loss": 0.4112, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.7018544935805991, |
| "grad_norm": 0.4232093393802643, |
| "learning_rate": 9.76881549698317e-05, |
| "loss": 0.4524, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.7037565382786496, |
| "grad_norm": 0.40627321600914, |
| "learning_rate": 9.768180374722134e-05, |
| "loss": 0.388, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.7056585829766999, |
| "grad_norm": 0.41021519899368286, |
| "learning_rate": 9.767545252461099e-05, |
| "loss": 0.3741, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.7075606276747504, |
| "grad_norm": 0.3615809679031372, |
| "learning_rate": 9.766910130200065e-05, |
| "loss": 0.4432, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.7094626723728008, |
| "grad_norm": 0.3088645935058594, |
| "learning_rate": 9.766275007939028e-05, |
| "loss": 0.3343, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.7113647170708511, |
| "grad_norm": 0.380659818649292, |
| "learning_rate": 9.765639885677994e-05, |
| "loss": 0.4092, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.7132667617689016, |
| "grad_norm": 0.28462380170822144, |
| "learning_rate": 9.765004763416959e-05, |
| "loss": 0.31, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.715168806466952, |
| "grad_norm": 0.3215513229370117, |
| "learning_rate": 9.764369641155923e-05, |
| "loss": 0.4115, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.7170708511650024, |
| "grad_norm": 0.397651731967926, |
| "learning_rate": 9.763734518894888e-05, |
| "loss": 0.4369, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.7189728958630528, |
| "grad_norm": 0.31436121463775635, |
| "learning_rate": 9.763099396633853e-05, |
| "loss": 0.4339, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.7208749405611032, |
| "grad_norm": 0.4024806320667267, |
| "learning_rate": 9.762464274372817e-05, |
| "loss": 0.4252, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.7227769852591536, |
| "grad_norm": 0.37994107604026794, |
| "learning_rate": 9.761829152111782e-05, |
| "loss": 0.3483, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.724679029957204, |
| "grad_norm": 0.44616061449050903, |
| "learning_rate": 9.761194029850747e-05, |
| "loss": 0.3809, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.7265810746552543, |
| "grad_norm": 0.3396744728088379, |
| "learning_rate": 9.760558907589712e-05, |
| "loss": 0.3382, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.7284831193533048, |
| "grad_norm": 0.334839791059494, |
| "learning_rate": 9.759923785328676e-05, |
| "loss": 0.3465, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.7303851640513552, |
| "grad_norm": 0.417478084564209, |
| "learning_rate": 9.75928866306764e-05, |
| "loss": 0.3191, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.7322872087494056, |
| "grad_norm": 0.30790823698043823, |
| "learning_rate": 9.758653540806606e-05, |
| "loss": 0.3139, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.734189253447456, |
| "grad_norm": 0.4008057415485382, |
| "learning_rate": 9.75801841854557e-05, |
| "loss": 0.419, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.7360912981455064, |
| "grad_norm": 0.42966723442077637, |
| "learning_rate": 9.757383296284535e-05, |
| "loss": 0.3634, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.7379933428435568, |
| "grad_norm": 0.33789002895355225, |
| "learning_rate": 9.7567481740235e-05, |
| "loss": 0.3966, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.7398953875416072, |
| "grad_norm": 0.35244229435920715, |
| "learning_rate": 9.756113051762464e-05, |
| "loss": 0.3991, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.7417974322396577, |
| "grad_norm": 0.3581864833831787, |
| "learning_rate": 9.75547792950143e-05, |
| "loss": 0.347, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.743699476937708, |
| "grad_norm": 0.30788975954055786, |
| "learning_rate": 9.754842807240394e-05, |
| "loss": 0.3485, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.7456015216357584, |
| "grad_norm": 0.5155593156814575, |
| "learning_rate": 9.754207684979359e-05, |
| "loss": 0.4793, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.7475035663338089, |
| "grad_norm": 0.4183029532432556, |
| "learning_rate": 9.753572562718324e-05, |
| "loss": 0.4064, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.7494056110318592, |
| "grad_norm": 0.36132046580314636, |
| "learning_rate": 9.752937440457288e-05, |
| "loss": 0.3539, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.7513076557299097, |
| "grad_norm": 0.4269217252731323, |
| "learning_rate": 9.752302318196254e-05, |
| "loss": 0.4358, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.7532097004279601, |
| "grad_norm": 0.38872459530830383, |
| "learning_rate": 9.751667195935218e-05, |
| "loss": 0.3238, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.7551117451260104, |
| "grad_norm": 0.4668743312358856, |
| "learning_rate": 9.751032073674182e-05, |
| "loss": 0.4218, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.7570137898240609, |
| "grad_norm": 0.3817143738269806, |
| "learning_rate": 9.750396951413147e-05, |
| "loss": 0.4332, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.7589158345221113, |
| "grad_norm": 0.4089401960372925, |
| "learning_rate": 9.749761829152112e-05, |
| "loss": 0.319, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.7608178792201616, |
| "grad_norm": 0.36516866087913513, |
| "learning_rate": 9.749126706891077e-05, |
| "loss": 0.3858, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.7627199239182121, |
| "grad_norm": 0.3843027949333191, |
| "learning_rate": 9.748491584630041e-05, |
| "loss": 0.4682, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.7646219686162625, |
| "grad_norm": 0.36987295746803284, |
| "learning_rate": 9.747856462369006e-05, |
| "loss": 0.3328, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.7665240133143129, |
| "grad_norm": 0.4972301721572876, |
| "learning_rate": 9.747221340107972e-05, |
| "loss": 0.3939, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.7684260580123633, |
| "grad_norm": 0.4319972097873688, |
| "learning_rate": 9.746586217846935e-05, |
| "loss": 0.3918, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.7703281027104137, |
| "grad_norm": 0.364364892244339, |
| "learning_rate": 9.7459510955859e-05, |
| "loss": 0.3871, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.7722301474084641, |
| "grad_norm": 0.43767908215522766, |
| "learning_rate": 9.745315973324866e-05, |
| "loss": 0.3973, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.7741321921065145, |
| "grad_norm": 0.44734928011894226, |
| "learning_rate": 9.74468085106383e-05, |
| "loss": 0.3884, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.776034236804565, |
| "grad_norm": 0.3817954957485199, |
| "learning_rate": 9.744045728802795e-05, |
| "loss": 0.3647, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.7779362815026153, |
| "grad_norm": 0.3619462251663208, |
| "learning_rate": 9.74341060654176e-05, |
| "loss": 0.4994, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.7798383262006657, |
| "grad_norm": 0.38225993514060974, |
| "learning_rate": 9.742775484280724e-05, |
| "loss": 0.4116, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.7817403708987162, |
| "grad_norm": 0.39784252643585205, |
| "learning_rate": 9.742140362019689e-05, |
| "loss": 0.3729, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.7836424155967665, |
| "grad_norm": 0.3188072443008423, |
| "learning_rate": 9.741505239758654e-05, |
| "loss": 0.3767, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.785544460294817, |
| "grad_norm": 0.4509223401546478, |
| "learning_rate": 9.74087011749762e-05, |
| "loss": 0.4595, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.7874465049928673, |
| "grad_norm": 0.40249937772750854, |
| "learning_rate": 9.740234995236583e-05, |
| "loss": 0.3761, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.7893485496909177, |
| "grad_norm": 0.3387410044670105, |
| "learning_rate": 9.739599872975547e-05, |
| "loss": 0.401, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.7912505943889682, |
| "grad_norm": 0.47670629620552063, |
| "learning_rate": 9.738964750714514e-05, |
| "loss": 0.3656, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.7931526390870185, |
| "grad_norm": 0.37239211797714233, |
| "learning_rate": 9.738329628453477e-05, |
| "loss": 0.4885, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.7950546837850689, |
| "grad_norm": 0.3347351849079132, |
| "learning_rate": 9.737694506192443e-05, |
| "loss": 0.291, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.7969567284831194, |
| "grad_norm": 0.3727717399597168, |
| "learning_rate": 9.737059383931408e-05, |
| "loss": 0.3506, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.7988587731811697, |
| "grad_norm": 0.3866841793060303, |
| "learning_rate": 9.736424261670372e-05, |
| "loss": 0.4355, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.8007608178792202, |
| "grad_norm": 0.39670372009277344, |
| "learning_rate": 9.735789139409337e-05, |
| "loss": 0.4041, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.8026628625772706, |
| "grad_norm": 0.35946765542030334, |
| "learning_rate": 9.7351540171483e-05, |
| "loss": 0.3378, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.8045649072753209, |
| "grad_norm": 0.24180381000041962, |
| "learning_rate": 9.734518894887267e-05, |
| "loss": 0.3133, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.8064669519733714, |
| "grad_norm": 0.4238085150718689, |
| "learning_rate": 9.733883772626231e-05, |
| "loss": 0.3968, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.8083689966714218, |
| "grad_norm": 0.35451412200927734, |
| "learning_rate": 9.733248650365195e-05, |
| "loss": 0.3456, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.8102710413694721, |
| "grad_norm": 0.49277418851852417, |
| "learning_rate": 9.732613528104161e-05, |
| "loss": 0.3916, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.8121730860675226, |
| "grad_norm": 0.34536874294281006, |
| "learning_rate": 9.731978405843125e-05, |
| "loss": 0.537, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.814075130765573, |
| "grad_norm": 0.3002311885356903, |
| "learning_rate": 9.731343283582089e-05, |
| "loss": 0.3842, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.8159771754636234, |
| "grad_norm": 0.29766812920570374, |
| "learning_rate": 9.730708161321054e-05, |
| "loss": 0.2979, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.8178792201616738, |
| "grad_norm": 0.34347230195999146, |
| "learning_rate": 9.73007303906002e-05, |
| "loss": 0.3996, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.8197812648597242, |
| "grad_norm": 0.42430102825164795, |
| "learning_rate": 9.729437916798985e-05, |
| "loss": 0.4677, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.8216833095577746, |
| "grad_norm": 0.3375668227672577, |
| "learning_rate": 9.728802794537948e-05, |
| "loss": 0.4257, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.823585354255825, |
| "grad_norm": 0.3718586266040802, |
| "learning_rate": 9.728167672276914e-05, |
| "loss": 0.3555, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.8254873989538755, |
| "grad_norm": 0.4310496151447296, |
| "learning_rate": 9.727532550015879e-05, |
| "loss": 0.4026, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.8273894436519258, |
| "grad_norm": 0.43832001090049744, |
| "learning_rate": 9.726897427754843e-05, |
| "loss": 0.4421, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.8292914883499762, |
| "grad_norm": 0.42209911346435547, |
| "learning_rate": 9.726262305493808e-05, |
| "loss": 0.397, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.8311935330480267, |
| "grad_norm": 0.4297396242618561, |
| "learning_rate": 9.725627183232773e-05, |
| "loss": 0.4244, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.833095577746077, |
| "grad_norm": 0.40587079524993896, |
| "learning_rate": 9.724992060971737e-05, |
| "loss": 0.3753, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.8349976224441275, |
| "grad_norm": 0.4127040505409241, |
| "learning_rate": 9.724356938710702e-05, |
| "loss": 0.3926, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.8368996671421779, |
| "grad_norm": 0.3734678030014038, |
| "learning_rate": 9.723721816449667e-05, |
| "loss": 0.3338, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.8388017118402282, |
| "grad_norm": 0.38152286410331726, |
| "learning_rate": 9.723086694188632e-05, |
| "loss": 0.3893, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.8407037565382787, |
| "grad_norm": 0.4234791398048401, |
| "learning_rate": 9.722451571927596e-05, |
| "loss": 0.3104, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.842605801236329, |
| "grad_norm": 0.49204525351524353, |
| "learning_rate": 9.721816449666561e-05, |
| "loss": 0.3698, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.8445078459343794, |
| "grad_norm": 0.40980932116508484, |
| "learning_rate": 9.721181327405527e-05, |
| "loss": 0.3901, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.8464098906324299, |
| "grad_norm": 0.3330426514148712, |
| "learning_rate": 9.72054620514449e-05, |
| "loss": 0.3118, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.8483119353304802, |
| "grad_norm": 0.3042624890804291, |
| "learning_rate": 9.719911082883456e-05, |
| "loss": 0.3003, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.8502139800285307, |
| "grad_norm": 0.34576475620269775, |
| "learning_rate": 9.719275960622421e-05, |
| "loss": 0.3332, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.8521160247265811, |
| "grad_norm": 0.2980082035064697, |
| "learning_rate": 9.718640838361385e-05, |
| "loss": 0.3285, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.8540180694246314, |
| "grad_norm": 0.31439459323883057, |
| "learning_rate": 9.71800571610035e-05, |
| "loss": 0.3178, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.8559201141226819, |
| "grad_norm": 0.37447845935821533, |
| "learning_rate": 9.717370593839315e-05, |
| "loss": 0.3861, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.8578221588207323, |
| "grad_norm": 0.4261024594306946, |
| "learning_rate": 9.716735471578279e-05, |
| "loss": 0.4377, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.8597242035187826, |
| "grad_norm": 0.3328630328178406, |
| "learning_rate": 9.716100349317244e-05, |
| "loss": 0.2791, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.8616262482168331, |
| "grad_norm": 0.41943463683128357, |
| "learning_rate": 9.715465227056209e-05, |
| "loss": 0.4693, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.8635282929148835, |
| "grad_norm": 0.4295640289783478, |
| "learning_rate": 9.714830104795174e-05, |
| "loss": 0.4105, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.8654303376129339, |
| "grad_norm": 0.3548508882522583, |
| "learning_rate": 9.714194982534138e-05, |
| "loss": 0.3024, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.8673323823109843, |
| "grad_norm": 0.5577777624130249, |
| "learning_rate": 9.713559860273102e-05, |
| "loss": 0.3961, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.8692344270090347, |
| "grad_norm": 0.4119040071964264, |
| "learning_rate": 9.712924738012069e-05, |
| "loss": 0.3143, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.8711364717070851, |
| "grad_norm": 0.40272560715675354, |
| "learning_rate": 9.712289615751032e-05, |
| "loss": 0.3452, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.8730385164051355, |
| "grad_norm": 0.456386536359787, |
| "learning_rate": 9.711654493489998e-05, |
| "loss": 0.403, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.874940561103186, |
| "grad_norm": 0.3982544541358948, |
| "learning_rate": 9.711019371228963e-05, |
| "loss": 0.4498, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.8768426058012363, |
| "grad_norm": 0.29361623525619507, |
| "learning_rate": 9.710384248967927e-05, |
| "loss": 0.3724, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.8787446504992867, |
| "grad_norm": 0.3854773938655853, |
| "learning_rate": 9.709749126706892e-05, |
| "loss": 0.4162, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.8806466951973372, |
| "grad_norm": 0.3760225474834442, |
| "learning_rate": 9.709114004445856e-05, |
| "loss": 0.4335, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.8825487398953875, |
| "grad_norm": 0.4936290383338928, |
| "learning_rate": 9.708478882184821e-05, |
| "loss": 0.3522, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.884450784593438, |
| "grad_norm": 0.3584468364715576, |
| "learning_rate": 9.707843759923786e-05, |
| "loss": 0.552, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.8863528292914884, |
| "grad_norm": 0.3523949086666107, |
| "learning_rate": 9.70720863766275e-05, |
| "loss": 0.3498, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.8882548739895387, |
| "grad_norm": 0.42082804441452026, |
| "learning_rate": 9.706573515401716e-05, |
| "loss": 0.4863, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.8901569186875892, |
| "grad_norm": 0.4284763038158417, |
| "learning_rate": 9.70593839314068e-05, |
| "loss": 0.4737, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.8920589633856396, |
| "grad_norm": 0.3609261214733124, |
| "learning_rate": 9.705303270879644e-05, |
| "loss": 0.3208, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.89396100808369, |
| "grad_norm": 0.31832849979400635, |
| "learning_rate": 9.704668148618609e-05, |
| "loss": 0.2545, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.8958630527817404, |
| "grad_norm": 0.38202738761901855, |
| "learning_rate": 9.704033026357574e-05, |
| "loss": 0.3952, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.8977650974797908, |
| "grad_norm": 0.347649484872818, |
| "learning_rate": 9.70339790409654e-05, |
| "loss": 0.3776, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.8996671421778412, |
| "grad_norm": 0.41626760363578796, |
| "learning_rate": 9.702762781835503e-05, |
| "loss": 0.4152, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.9015691868758916, |
| "grad_norm": 0.4042579233646393, |
| "learning_rate": 9.702127659574469e-05, |
| "loss": 0.3813, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.9034712315739419, |
| "grad_norm": 0.38196825981140137, |
| "learning_rate": 9.701492537313434e-05, |
| "loss": 0.4398, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.9053732762719924, |
| "grad_norm": 0.3867753744125366, |
| "learning_rate": 9.700857415052398e-05, |
| "loss": 0.4995, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.9072753209700428, |
| "grad_norm": 0.34228166937828064, |
| "learning_rate": 9.700222292791363e-05, |
| "loss": 0.284, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.9091773656680932, |
| "grad_norm": 0.3962937593460083, |
| "learning_rate": 9.699587170530328e-05, |
| "loss": 0.3501, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.9110794103661436, |
| "grad_norm": 0.3665268123149872, |
| "learning_rate": 9.698952048269292e-05, |
| "loss": 0.2737, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.912981455064194, |
| "grad_norm": 0.3775653839111328, |
| "learning_rate": 9.698316926008257e-05, |
| "loss": 0.3173, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.9148834997622444, |
| "grad_norm": 0.3584369421005249, |
| "learning_rate": 9.697681803747222e-05, |
| "loss": 0.3055, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.9167855444602948, |
| "grad_norm": 0.3510100245475769, |
| "learning_rate": 9.697046681486186e-05, |
| "loss": 0.3278, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.9186875891583453, |
| "grad_norm": 0.33394765853881836, |
| "learning_rate": 9.696411559225151e-05, |
| "loss": 0.2954, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.9205896338563956, |
| "grad_norm": 0.437014102935791, |
| "learning_rate": 9.695776436964116e-05, |
| "loss": 0.3797, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.922491678554446, |
| "grad_norm": 0.37421244382858276, |
| "learning_rate": 9.695141314703082e-05, |
| "loss": 0.3521, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.9243937232524965, |
| "grad_norm": 0.37696099281311035, |
| "learning_rate": 9.694506192442045e-05, |
| "loss": 0.3455, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.9262957679505468, |
| "grad_norm": 0.5452500581741333, |
| "learning_rate": 9.693871070181009e-05, |
| "loss": 0.3624, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.9281978126485972, |
| "grad_norm": 0.4049624502658844, |
| "learning_rate": 9.693235947919976e-05, |
| "loss": 0.4017, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.9300998573466477, |
| "grad_norm": 0.32757866382598877, |
| "learning_rate": 9.69260082565894e-05, |
| "loss": 0.3536, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.932001902044698, |
| "grad_norm": 0.298367977142334, |
| "learning_rate": 9.691965703397905e-05, |
| "loss": 0.3374, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.9339039467427485, |
| "grad_norm": 0.22035005688667297, |
| "learning_rate": 9.69133058113687e-05, |
| "loss": 0.2855, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.9358059914407989, |
| "grad_norm": 0.43000441789627075, |
| "learning_rate": 9.690695458875834e-05, |
| "loss": 0.4544, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.9377080361388492, |
| "grad_norm": 0.28024253249168396, |
| "learning_rate": 9.690060336614799e-05, |
| "loss": 0.308, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.9396100808368997, |
| "grad_norm": 0.53145432472229, |
| "learning_rate": 9.689425214353763e-05, |
| "loss": 0.4569, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.9415121255349501, |
| "grad_norm": 0.4006127715110779, |
| "learning_rate": 9.688790092092729e-05, |
| "loss": 0.419, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.9434141702330004, |
| "grad_norm": 0.4057261645793915, |
| "learning_rate": 9.688154969831693e-05, |
| "loss": 0.3553, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.9453162149310509, |
| "grad_norm": 0.40803465247154236, |
| "learning_rate": 9.687519847570657e-05, |
| "loss": 0.3735, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.9472182596291013, |
| "grad_norm": 0.34222155809402466, |
| "learning_rate": 9.686884725309623e-05, |
| "loss": 0.367, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.9491203043271517, |
| "grad_norm": 0.40403544902801514, |
| "learning_rate": 9.686249603048587e-05, |
| "loss": 0.416, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.9510223490252021, |
| "grad_norm": 0.33636951446533203, |
| "learning_rate": 9.685614480787551e-05, |
| "loss": 0.3423, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.9529243937232525, |
| "grad_norm": 0.3394258916378021, |
| "learning_rate": 9.684979358526516e-05, |
| "loss": 0.3282, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.9548264384213029, |
| "grad_norm": 0.3682473599910736, |
| "learning_rate": 9.684344236265482e-05, |
| "loss": 0.406, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.9567284831193533, |
| "grad_norm": 0.35073623061180115, |
| "learning_rate": 9.683709114004447e-05, |
| "loss": 0.376, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.9586305278174037, |
| "grad_norm": 0.36000022292137146, |
| "learning_rate": 9.68307399174341e-05, |
| "loss": 0.3969, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.9605325725154541, |
| "grad_norm": 0.361158162355423, |
| "learning_rate": 9.682438869482376e-05, |
| "loss": 0.347, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.9624346172135045, |
| "grad_norm": 0.3075178265571594, |
| "learning_rate": 9.681803747221341e-05, |
| "loss": 0.4362, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.9643366619115549, |
| "grad_norm": 0.30084747076034546, |
| "learning_rate": 9.681168624960305e-05, |
| "loss": 0.3563, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.9662387066096053, |
| "grad_norm": 0.3221014440059662, |
| "learning_rate": 9.68053350269927e-05, |
| "loss": 0.3366, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.9681407513076558, |
| "grad_norm": 0.36464688181877136, |
| "learning_rate": 9.679898380438235e-05, |
| "loss": 0.3992, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.9700427960057061, |
| "grad_norm": 0.32443803548812866, |
| "learning_rate": 9.679263258177199e-05, |
| "loss": 0.3293, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.9719448407037565, |
| "grad_norm": 0.3689454197883606, |
| "learning_rate": 9.678628135916164e-05, |
| "loss": 0.3546, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.973846885401807, |
| "grad_norm": 0.3754975199699402, |
| "learning_rate": 9.677993013655129e-05, |
| "loss": 0.3856, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.9757489300998573, |
| "grad_norm": 0.3642953634262085, |
| "learning_rate": 9.677357891394094e-05, |
| "loss": 0.4326, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.9776509747979077, |
| "grad_norm": 0.43278223276138306, |
| "learning_rate": 9.676722769133058e-05, |
| "loss": 0.3964, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.9795530194959582, |
| "grad_norm": 0.43771886825561523, |
| "learning_rate": 9.676087646872023e-05, |
| "loss": 0.3861, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.9814550641940085, |
| "grad_norm": 0.34908977150917053, |
| "learning_rate": 9.675452524610989e-05, |
| "loss": 0.3981, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.983357108892059, |
| "grad_norm": 0.35733312368392944, |
| "learning_rate": 9.674817402349953e-05, |
| "loss": 0.3636, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.9852591535901094, |
| "grad_norm": 0.3636298179626465, |
| "learning_rate": 9.674182280088918e-05, |
| "loss": 0.4336, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.9871611982881597, |
| "grad_norm": 0.32771605253219604, |
| "learning_rate": 9.673547157827883e-05, |
| "loss": 0.3481, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.9890632429862102, |
| "grad_norm": 0.40213117003440857, |
| "learning_rate": 9.672912035566847e-05, |
| "loss": 0.3707, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.9909652876842606, |
| "grad_norm": 0.3386654257774353, |
| "learning_rate": 9.672276913305812e-05, |
| "loss": 0.3384, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.992867332382311, |
| "grad_norm": 0.3965696096420288, |
| "learning_rate": 9.671641791044777e-05, |
| "loss": 0.3595, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.9947693770803614, |
| "grad_norm": 0.38238459825515747, |
| "learning_rate": 9.671006668783741e-05, |
| "loss": 0.3714, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.9966714217784118, |
| "grad_norm": 0.3248405456542969, |
| "learning_rate": 9.670371546522706e-05, |
| "loss": 0.394, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.9985734664764622, |
| "grad_norm": 0.3902266323566437, |
| "learning_rate": 9.66973642426167e-05, |
| "loss": 0.4115, |
| "step": 525 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 15750, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 30, |
| "save_steps": 525, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.347361345425408e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|