text2json-generative-1epoch / trainer_state.json
alexandrlukashov's picture
Upload folder using huggingface_hub
e8892c3 verified
{
"best_global_step": 7969,
"best_metric": 0.9335432648658752,
"best_model_checkpoint": "./qwen3-1.7b-text2json-v3/checkpoint-7969",
"epoch": 1.0,
"eval_steps": 500,
"global_step": 7969,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0012549610177733854,
"grad_norm": 2.7938790321350098,
"learning_rate": 1.875e-06,
"loss": 1.6903125762939453,
"step": 10
},
{
"epoch": 0.0025099220355467708,
"grad_norm": 2.206455945968628,
"learning_rate": 3.958333333333333e-06,
"loss": 1.661090087890625,
"step": 20
},
{
"epoch": 0.003764883053320156,
"grad_norm": 0.9010259509086609,
"learning_rate": 6.041666666666667e-06,
"loss": 1.5127840042114258,
"step": 30
},
{
"epoch": 0.0050198440710935415,
"grad_norm": 0.4326864778995514,
"learning_rate": 8.125000000000001e-06,
"loss": 1.3494388580322265,
"step": 40
},
{
"epoch": 0.006274805088866927,
"grad_norm": 0.41406598687171936,
"learning_rate": 1.0208333333333334e-05,
"loss": 1.3530027389526367,
"step": 50
},
{
"epoch": 0.007529766106640312,
"grad_norm": 0.3024258613586426,
"learning_rate": 1.2291666666666666e-05,
"loss": 1.2346282958984376,
"step": 60
},
{
"epoch": 0.008784727124413697,
"grad_norm": 0.25805506110191345,
"learning_rate": 1.4374999999999999e-05,
"loss": 1.2771096229553223,
"step": 70
},
{
"epoch": 0.010039688142187083,
"grad_norm": 0.2437220960855484,
"learning_rate": 1.6458333333333335e-05,
"loss": 1.261610221862793,
"step": 80
},
{
"epoch": 0.011294649159960469,
"grad_norm": 0.26054802536964417,
"learning_rate": 1.854166666666667e-05,
"loss": 1.173118495941162,
"step": 90
},
{
"epoch": 0.012549610177733855,
"grad_norm": 0.28059178590774536,
"learning_rate": 2.0625e-05,
"loss": 1.1928260803222657,
"step": 100
},
{
"epoch": 0.013804571195507239,
"grad_norm": 0.23667506873607635,
"learning_rate": 2.2708333333333334e-05,
"loss": 1.2138886451721191,
"step": 110
},
{
"epoch": 0.015059532213280625,
"grad_norm": 0.24474164843559265,
"learning_rate": 2.479166666666667e-05,
"loss": 1.2723681449890136,
"step": 120
},
{
"epoch": 0.016314493231054012,
"grad_norm": 0.33744102716445923,
"learning_rate": 2.6875e-05,
"loss": 1.267289924621582,
"step": 130
},
{
"epoch": 0.017569454248827394,
"grad_norm": 0.26319584250450134,
"learning_rate": 2.8958333333333337e-05,
"loss": 1.190561866760254,
"step": 140
},
{
"epoch": 0.01882441526660078,
"grad_norm": 0.2557962238788605,
"learning_rate": 3.104166666666667e-05,
"loss": 1.1946110725402832,
"step": 150
},
{
"epoch": 0.020079376284374166,
"grad_norm": 0.27757933735847473,
"learning_rate": 3.3125e-05,
"loss": 1.1806714057922363,
"step": 160
},
{
"epoch": 0.021334337302147552,
"grad_norm": 0.22700543701648712,
"learning_rate": 3.520833333333334e-05,
"loss": 1.2129878997802734,
"step": 170
},
{
"epoch": 0.022589298319920938,
"grad_norm": 0.2856610417366028,
"learning_rate": 3.729166666666667e-05,
"loss": 1.1290763854980468,
"step": 180
},
{
"epoch": 0.023844259337694323,
"grad_norm": 0.26019546389579773,
"learning_rate": 3.9375e-05,
"loss": 1.2425550460815429,
"step": 190
},
{
"epoch": 0.02509922035546771,
"grad_norm": 0.23760564625263214,
"learning_rate": 4.1458333333333336e-05,
"loss": 1.2198989868164063,
"step": 200
},
{
"epoch": 0.026354181373241095,
"grad_norm": 0.26171621680259705,
"learning_rate": 4.354166666666667e-05,
"loss": 1.228119468688965,
"step": 210
},
{
"epoch": 0.027609142391014477,
"grad_norm": 0.23990799486637115,
"learning_rate": 4.5625e-05,
"loss": 1.1751232147216797,
"step": 220
},
{
"epoch": 0.028864103408787863,
"grad_norm": 0.21109728515148163,
"learning_rate": 4.770833333333334e-05,
"loss": 1.18958740234375,
"step": 230
},
{
"epoch": 0.03011906442656125,
"grad_norm": 0.22801515460014343,
"learning_rate": 4.979166666666667e-05,
"loss": 1.2031834602355957,
"step": 240
},
{
"epoch": 0.03137402544433464,
"grad_norm": 0.2113051414489746,
"learning_rate": 4.99417777202743e-05,
"loss": 1.104047679901123,
"step": 250
},
{
"epoch": 0.032628986462108024,
"grad_norm": 0.2567385733127594,
"learning_rate": 4.987708629835684e-05,
"loss": 1.124267292022705,
"step": 260
},
{
"epoch": 0.0338839474798814,
"grad_norm": 0.25872015953063965,
"learning_rate": 4.981239487643939e-05,
"loss": 1.1195612907409669,
"step": 270
},
{
"epoch": 0.03513890849765479,
"grad_norm": 0.22646528482437134,
"learning_rate": 4.974770345452193e-05,
"loss": 1.129570484161377,
"step": 280
},
{
"epoch": 0.036393869515428175,
"grad_norm": 0.26482081413269043,
"learning_rate": 4.968301203260448e-05,
"loss": 1.1967822074890138,
"step": 290
},
{
"epoch": 0.03764883053320156,
"grad_norm": 0.2646678686141968,
"learning_rate": 4.9618320610687025e-05,
"loss": 1.1607979774475097,
"step": 300
},
{
"epoch": 0.038903791550974946,
"grad_norm": 0.22207561135292053,
"learning_rate": 4.955362918876957e-05,
"loss": 1.1674532890319824,
"step": 310
},
{
"epoch": 0.04015875256874833,
"grad_norm": 0.24940666556358337,
"learning_rate": 4.948893776685212e-05,
"loss": 1.0974184036254884,
"step": 320
},
{
"epoch": 0.04141371358652172,
"grad_norm": 0.2815345823764801,
"learning_rate": 4.9424246344934665e-05,
"loss": 1.1656072616577149,
"step": 330
},
{
"epoch": 0.042668674604295104,
"grad_norm": 0.2820636034011841,
"learning_rate": 4.935955492301721e-05,
"loss": 1.1393600463867188,
"step": 340
},
{
"epoch": 0.04392363562206849,
"grad_norm": 0.21207420527935028,
"learning_rate": 4.929486350109976e-05,
"loss": 1.1813325881958008,
"step": 350
},
{
"epoch": 0.045178596639841875,
"grad_norm": 0.2317555546760559,
"learning_rate": 4.92301720791823e-05,
"loss": 1.2024839401245118,
"step": 360
},
{
"epoch": 0.04643355765761526,
"grad_norm": 0.22415006160736084,
"learning_rate": 4.916548065726485e-05,
"loss": 1.1521187782287599,
"step": 370
},
{
"epoch": 0.04768851867538865,
"grad_norm": 0.2794686257839203,
"learning_rate": 4.910078923534739e-05,
"loss": 1.1735503196716308,
"step": 380
},
{
"epoch": 0.04894347969316203,
"grad_norm": 0.20266857743263245,
"learning_rate": 4.9036097813429946e-05,
"loss": 1.1360593795776368,
"step": 390
},
{
"epoch": 0.05019844071093542,
"grad_norm": 0.24181729555130005,
"learning_rate": 4.8971406391512486e-05,
"loss": 1.1604348182678224,
"step": 400
},
{
"epoch": 0.051453401728708804,
"grad_norm": 0.23283515870571136,
"learning_rate": 4.890671496959503e-05,
"loss": 1.1582366943359375,
"step": 410
},
{
"epoch": 0.05270836274648219,
"grad_norm": 0.2232106328010559,
"learning_rate": 4.884202354767758e-05,
"loss": 1.1357643127441406,
"step": 420
},
{
"epoch": 0.053963323764255576,
"grad_norm": 0.20864224433898926,
"learning_rate": 4.8777332125760126e-05,
"loss": 1.1960598945617675,
"step": 430
},
{
"epoch": 0.055218284782028955,
"grad_norm": 0.21834968030452728,
"learning_rate": 4.871264070384267e-05,
"loss": 1.1870046615600587,
"step": 440
},
{
"epoch": 0.05647324579980234,
"grad_norm": 0.22693583369255066,
"learning_rate": 4.864794928192522e-05,
"loss": 1.1584319114685058,
"step": 450
},
{
"epoch": 0.057728206817575727,
"grad_norm": 0.22258850932121277,
"learning_rate": 4.8583257860007766e-05,
"loss": 1.1739194869995118,
"step": 460
},
{
"epoch": 0.05898316783534911,
"grad_norm": 0.21987426280975342,
"learning_rate": 4.851856643809031e-05,
"loss": 1.2010994911193849,
"step": 470
},
{
"epoch": 0.0602381288531225,
"grad_norm": 0.21596240997314453,
"learning_rate": 4.845387501617285e-05,
"loss": 1.0946701049804688,
"step": 480
},
{
"epoch": 0.061493089870895884,
"grad_norm": 0.18262015283107758,
"learning_rate": 4.8389183594255406e-05,
"loss": 1.1652299880981445,
"step": 490
},
{
"epoch": 0.06274805088866928,
"grad_norm": 0.22206667065620422,
"learning_rate": 4.8324492172337946e-05,
"loss": 1.1554561614990235,
"step": 500
},
{
"epoch": 0.06400301190644266,
"grad_norm": 0.19047950208187103,
"learning_rate": 4.82598007504205e-05,
"loss": 1.1874130249023438,
"step": 510
},
{
"epoch": 0.06525797292421605,
"grad_norm": 0.23143501579761505,
"learning_rate": 4.819510932850304e-05,
"loss": 1.1211513519287108,
"step": 520
},
{
"epoch": 0.06651293394198943,
"grad_norm": 0.21261698007583618,
"learning_rate": 4.8130417906585586e-05,
"loss": 1.1845171928405762,
"step": 530
},
{
"epoch": 0.0677678949597628,
"grad_norm": 0.22298000752925873,
"learning_rate": 4.806572648466813e-05,
"loss": 1.1463261604309083,
"step": 540
},
{
"epoch": 0.0690228559775362,
"grad_norm": 0.20260395109653473,
"learning_rate": 4.800103506275068e-05,
"loss": 1.1507064819335937,
"step": 550
},
{
"epoch": 0.07027781699530958,
"grad_norm": 0.22262270748615265,
"learning_rate": 4.793634364083323e-05,
"loss": 1.1626813888549805,
"step": 560
},
{
"epoch": 0.07153277801308297,
"grad_norm": 0.18661239743232727,
"learning_rate": 4.7871652218915773e-05,
"loss": 1.1173880577087403,
"step": 570
},
{
"epoch": 0.07278773903085635,
"grad_norm": 0.17793259024620056,
"learning_rate": 4.780696079699832e-05,
"loss": 1.182445240020752,
"step": 580
},
{
"epoch": 0.07404270004862974,
"grad_norm": 0.22001393139362335,
"learning_rate": 4.774226937508087e-05,
"loss": 1.0757221221923827,
"step": 590
},
{
"epoch": 0.07529766106640312,
"grad_norm": 0.16966955363750458,
"learning_rate": 4.7677577953163414e-05,
"loss": 1.1065872192382813,
"step": 600
},
{
"epoch": 0.07655262208417651,
"grad_norm": 0.18004654347896576,
"learning_rate": 4.761288653124596e-05,
"loss": 1.1050881385803222,
"step": 610
},
{
"epoch": 0.07780758310194989,
"grad_norm": 0.21302928030490875,
"learning_rate": 4.75481951093285e-05,
"loss": 1.1550076484680176,
"step": 620
},
{
"epoch": 0.07906254411972329,
"grad_norm": 0.19082143902778625,
"learning_rate": 4.7483503687411054e-05,
"loss": 1.133737564086914,
"step": 630
},
{
"epoch": 0.08031750513749666,
"grad_norm": 0.18166916072368622,
"learning_rate": 4.7418812265493594e-05,
"loss": 1.1011496543884278,
"step": 640
},
{
"epoch": 0.08157246615527006,
"grad_norm": 0.2051081359386444,
"learning_rate": 4.735412084357615e-05,
"loss": 1.2461429595947267,
"step": 650
},
{
"epoch": 0.08282742717304344,
"grad_norm": 0.1807452291250229,
"learning_rate": 4.7289429421658694e-05,
"loss": 1.160153579711914,
"step": 660
},
{
"epoch": 0.08408238819081683,
"grad_norm": 0.2180749773979187,
"learning_rate": 4.7224737999741234e-05,
"loss": 1.1506930351257325,
"step": 670
},
{
"epoch": 0.08533734920859021,
"grad_norm": 0.18058760464191437,
"learning_rate": 4.716004657782379e-05,
"loss": 1.1412607192993165,
"step": 680
},
{
"epoch": 0.0865923102263636,
"grad_norm": 0.1865541785955429,
"learning_rate": 4.709535515590633e-05,
"loss": 1.0827465057373047,
"step": 690
},
{
"epoch": 0.08784727124413698,
"grad_norm": 0.20097248256206512,
"learning_rate": 4.703066373398888e-05,
"loss": 1.1497159004211426,
"step": 700
},
{
"epoch": 0.08910223226191036,
"grad_norm": 0.2066005915403366,
"learning_rate": 4.696597231207142e-05,
"loss": 1.1188447952270508,
"step": 710
},
{
"epoch": 0.09035719327968375,
"grad_norm": 0.23538357019424438,
"learning_rate": 4.690128089015397e-05,
"loss": 1.1101722717285156,
"step": 720
},
{
"epoch": 0.09161215429745713,
"grad_norm": 0.2259693592786789,
"learning_rate": 4.6836589468236515e-05,
"loss": 1.1163352966308593,
"step": 730
},
{
"epoch": 0.09286711531523052,
"grad_norm": 0.1654050350189209,
"learning_rate": 4.677189804631906e-05,
"loss": 1.0785648345947265,
"step": 740
},
{
"epoch": 0.0941220763330039,
"grad_norm": 0.1840600073337555,
"learning_rate": 4.670720662440161e-05,
"loss": 1.114925003051758,
"step": 750
},
{
"epoch": 0.0953770373507773,
"grad_norm": 0.22095882892608643,
"learning_rate": 4.6642515202484155e-05,
"loss": 1.1339808464050294,
"step": 760
},
{
"epoch": 0.09663199836855067,
"grad_norm": 0.18420755863189697,
"learning_rate": 4.65778237805667e-05,
"loss": 1.1255131721496583,
"step": 770
},
{
"epoch": 0.09788695938632407,
"grad_norm": 0.20250706374645233,
"learning_rate": 4.651313235864925e-05,
"loss": 1.0625088691711426,
"step": 780
},
{
"epoch": 0.09914192040409744,
"grad_norm": 0.20630766451358795,
"learning_rate": 4.644844093673179e-05,
"loss": 1.0966971397399903,
"step": 790
},
{
"epoch": 0.10039688142187084,
"grad_norm": 0.19232341647148132,
"learning_rate": 4.638374951481434e-05,
"loss": 1.1137746810913085,
"step": 800
},
{
"epoch": 0.10165184243964422,
"grad_norm": 0.22812579572200775,
"learning_rate": 4.631905809289688e-05,
"loss": 1.157925796508789,
"step": 810
},
{
"epoch": 0.10290680345741761,
"grad_norm": 0.17667317390441895,
"learning_rate": 4.6254366670979435e-05,
"loss": 1.1550764083862304,
"step": 820
},
{
"epoch": 0.10416176447519099,
"grad_norm": 0.18233029544353485,
"learning_rate": 4.6189675249061975e-05,
"loss": 1.1150010108947754,
"step": 830
},
{
"epoch": 0.10541672549296438,
"grad_norm": 0.20302756130695343,
"learning_rate": 4.612498382714452e-05,
"loss": 1.0721470832824707,
"step": 840
},
{
"epoch": 0.10667168651073776,
"grad_norm": 0.2043253630399704,
"learning_rate": 4.606029240522707e-05,
"loss": 1.1333942413330078,
"step": 850
},
{
"epoch": 0.10792664752851115,
"grad_norm": 0.19231152534484863,
"learning_rate": 4.5995600983309616e-05,
"loss": 1.1027427673339845,
"step": 860
},
{
"epoch": 0.10918160854628453,
"grad_norm": 0.20718224346637726,
"learning_rate": 4.593090956139216e-05,
"loss": 1.1255599975585937,
"step": 870
},
{
"epoch": 0.11043656956405791,
"grad_norm": 0.1926298588514328,
"learning_rate": 4.586621813947471e-05,
"loss": 1.1083927154541016,
"step": 880
},
{
"epoch": 0.1116915305818313,
"grad_norm": 0.2107614278793335,
"learning_rate": 4.5801526717557256e-05,
"loss": 1.0679313659667968,
"step": 890
},
{
"epoch": 0.11294649159960468,
"grad_norm": 0.1800786405801773,
"learning_rate": 4.57368352956398e-05,
"loss": 1.2032075881958009,
"step": 900
},
{
"epoch": 0.11420145261737807,
"grad_norm": 0.18883837759494781,
"learning_rate": 4.567214387372234e-05,
"loss": 1.1553642272949218,
"step": 910
},
{
"epoch": 0.11545641363515145,
"grad_norm": 0.17952467501163483,
"learning_rate": 4.5607452451804896e-05,
"loss": 1.151566505432129,
"step": 920
},
{
"epoch": 0.11671137465292485,
"grad_norm": 0.18356452882289886,
"learning_rate": 4.5542761029887436e-05,
"loss": 1.1210906028747558,
"step": 930
},
{
"epoch": 0.11796633567069822,
"grad_norm": 0.19210585951805115,
"learning_rate": 4.547806960796999e-05,
"loss": 1.1327272415161134,
"step": 940
},
{
"epoch": 0.11922129668847162,
"grad_norm": 0.2031843215227127,
"learning_rate": 4.541337818605253e-05,
"loss": 1.1083773612976073,
"step": 950
},
{
"epoch": 0.120476257706245,
"grad_norm": 0.16328449547290802,
"learning_rate": 4.5348686764135076e-05,
"loss": 1.163971519470215,
"step": 960
},
{
"epoch": 0.12173121872401839,
"grad_norm": 0.17702428996562958,
"learning_rate": 4.528399534221762e-05,
"loss": 1.1527894973754882,
"step": 970
},
{
"epoch": 0.12298617974179177,
"grad_norm": 0.1729395091533661,
"learning_rate": 4.521930392030017e-05,
"loss": 1.1400185585021974,
"step": 980
},
{
"epoch": 0.12424114075956516,
"grad_norm": 0.17520761489868164,
"learning_rate": 4.5154612498382716e-05,
"loss": 1.0990144729614257,
"step": 990
},
{
"epoch": 0.12549610177733855,
"grad_norm": 0.18720504641532898,
"learning_rate": 4.508992107646526e-05,
"loss": 1.1631937980651856,
"step": 1000
},
{
"epoch": 0.12675106279511192,
"grad_norm": 0.19207048416137695,
"learning_rate": 4.502522965454781e-05,
"loss": 1.1027806282043457,
"step": 1010
},
{
"epoch": 0.1280060238128853,
"grad_norm": 0.1836109161376953,
"learning_rate": 4.496053823263036e-05,
"loss": 1.1294532775878907,
"step": 1020
},
{
"epoch": 0.1292609848306587,
"grad_norm": 0.17419582605361938,
"learning_rate": 4.48958468107129e-05,
"loss": 1.1544736862182616,
"step": 1030
},
{
"epoch": 0.1305159458484321,
"grad_norm": 0.20314334332942963,
"learning_rate": 4.483115538879545e-05,
"loss": 1.204758644104004,
"step": 1040
},
{
"epoch": 0.13177090686620546,
"grad_norm": 0.17511610686779022,
"learning_rate": 4.476646396687799e-05,
"loss": 1.107564353942871,
"step": 1050
},
{
"epoch": 0.13302586788397885,
"grad_norm": 0.215122252702713,
"learning_rate": 4.4701772544960544e-05,
"loss": 1.2063298225402832,
"step": 1060
},
{
"epoch": 0.13428082890175225,
"grad_norm": 0.17670315504074097,
"learning_rate": 4.4637081123043084e-05,
"loss": 1.1757978439331054,
"step": 1070
},
{
"epoch": 0.1355357899195256,
"grad_norm": 0.18765057623386383,
"learning_rate": 4.457238970112563e-05,
"loss": 1.1447077751159669,
"step": 1080
},
{
"epoch": 0.136790750937299,
"grad_norm": 0.5525104403495789,
"learning_rate": 4.450769827920818e-05,
"loss": 1.1048051834106445,
"step": 1090
},
{
"epoch": 0.1380457119550724,
"grad_norm": 0.19476006925106049,
"learning_rate": 4.4443006857290724e-05,
"loss": 1.1642623901367188,
"step": 1100
},
{
"epoch": 0.1393006729728458,
"grad_norm": 0.17488926649093628,
"learning_rate": 4.437831543537328e-05,
"loss": 1.0796038627624511,
"step": 1110
},
{
"epoch": 0.14055563399061916,
"grad_norm": 0.20637401938438416,
"learning_rate": 4.431362401345582e-05,
"loss": 1.1843393325805665,
"step": 1120
},
{
"epoch": 0.14181059500839255,
"grad_norm": 0.22406329214572906,
"learning_rate": 4.4248932591538364e-05,
"loss": 1.1924137115478515,
"step": 1130
},
{
"epoch": 0.14306555602616594,
"grad_norm": 0.20387622714042664,
"learning_rate": 4.418424116962091e-05,
"loss": 1.1867655754089355,
"step": 1140
},
{
"epoch": 0.14432051704393933,
"grad_norm": 0.1762738674879074,
"learning_rate": 4.411954974770346e-05,
"loss": 1.0975682258605957,
"step": 1150
},
{
"epoch": 0.1455754780617127,
"grad_norm": 0.21380206942558289,
"learning_rate": 4.4054858325786004e-05,
"loss": 1.1449914932250977,
"step": 1160
},
{
"epoch": 0.1468304390794861,
"grad_norm": 0.19425804913043976,
"learning_rate": 4.399016690386855e-05,
"loss": 1.109062099456787,
"step": 1170
},
{
"epoch": 0.14808540009725948,
"grad_norm": 0.18465885519981384,
"learning_rate": 4.39254754819511e-05,
"loss": 1.0944741249084473,
"step": 1180
},
{
"epoch": 0.14934036111503288,
"grad_norm": 0.18625572323799133,
"learning_rate": 4.3860784060033645e-05,
"loss": 1.100302505493164,
"step": 1190
},
{
"epoch": 0.15059532213280624,
"grad_norm": 0.19639429450035095,
"learning_rate": 4.3796092638116185e-05,
"loss": 1.1336475372314454,
"step": 1200
},
{
"epoch": 0.15185028315057963,
"grad_norm": 0.17792847752571106,
"learning_rate": 4.373140121619874e-05,
"loss": 1.1655400276184082,
"step": 1210
},
{
"epoch": 0.15310524416835303,
"grad_norm": 0.19248999655246735,
"learning_rate": 4.366670979428128e-05,
"loss": 1.0734170913696288,
"step": 1220
},
{
"epoch": 0.1543602051861264,
"grad_norm": 0.16676078736782074,
"learning_rate": 4.360201837236383e-05,
"loss": 1.1042096138000488,
"step": 1230
},
{
"epoch": 0.15561516620389979,
"grad_norm": 0.2144252061843872,
"learning_rate": 4.353732695044637e-05,
"loss": 1.086879348754883,
"step": 1240
},
{
"epoch": 0.15687012722167318,
"grad_norm": 0.191383957862854,
"learning_rate": 4.347263552852892e-05,
"loss": 1.1877761840820313,
"step": 1250
},
{
"epoch": 0.15812508823944657,
"grad_norm": 0.1510523408651352,
"learning_rate": 4.3407944106611465e-05,
"loss": 1.0977872848510741,
"step": 1260
},
{
"epoch": 0.15938004925721994,
"grad_norm": 0.21486221253871918,
"learning_rate": 4.334325268469401e-05,
"loss": 1.1684078216552733,
"step": 1270
},
{
"epoch": 0.16063501027499333,
"grad_norm": 0.20981676876544952,
"learning_rate": 4.327856126277656e-05,
"loss": 1.1553811073303222,
"step": 1280
},
{
"epoch": 0.16188997129276672,
"grad_norm": 0.17923256754875183,
"learning_rate": 4.3213869840859105e-05,
"loss": 1.084774875640869,
"step": 1290
},
{
"epoch": 0.16314493231054011,
"grad_norm": 0.16394007205963135,
"learning_rate": 4.314917841894165e-05,
"loss": 1.1058798789978028,
"step": 1300
},
{
"epoch": 0.16439989332831348,
"grad_norm": 0.18293628096580505,
"learning_rate": 4.30844869970242e-05,
"loss": 1.1011856079101563,
"step": 1310
},
{
"epoch": 0.16565485434608687,
"grad_norm": 0.15914376080036163,
"learning_rate": 4.301979557510674e-05,
"loss": 1.182911205291748,
"step": 1320
},
{
"epoch": 0.16690981536386026,
"grad_norm": 0.19914117455482483,
"learning_rate": 4.295510415318929e-05,
"loss": 1.138012981414795,
"step": 1330
},
{
"epoch": 0.16816477638163366,
"grad_norm": 0.177483931183815,
"learning_rate": 4.289041273127183e-05,
"loss": 1.1617022514343263,
"step": 1340
},
{
"epoch": 0.16941973739940702,
"grad_norm": 0.18268829584121704,
"learning_rate": 4.2825721309354386e-05,
"loss": 1.1040393829345703,
"step": 1350
},
{
"epoch": 0.17067469841718041,
"grad_norm": 0.1875396966934204,
"learning_rate": 4.2761029887436926e-05,
"loss": 1.1711323738098145,
"step": 1360
},
{
"epoch": 0.1719296594349538,
"grad_norm": 0.20280130207538605,
"learning_rate": 4.269633846551947e-05,
"loss": 1.0739377021789551,
"step": 1370
},
{
"epoch": 0.1731846204527272,
"grad_norm": 0.17182214558124542,
"learning_rate": 4.263164704360202e-05,
"loss": 1.1563935279846191,
"step": 1380
},
{
"epoch": 0.17443958147050057,
"grad_norm": 0.1958380937576294,
"learning_rate": 4.2566955621684566e-05,
"loss": 1.152635383605957,
"step": 1390
},
{
"epoch": 0.17569454248827396,
"grad_norm": 0.17894886434078217,
"learning_rate": 4.250226419976711e-05,
"loss": 1.129916000366211,
"step": 1400
},
{
"epoch": 0.17694950350604735,
"grad_norm": 0.1785098761320114,
"learning_rate": 4.243757277784966e-05,
"loss": 1.1258666038513183,
"step": 1410
},
{
"epoch": 0.17820446452382072,
"grad_norm": 0.19660112261772156,
"learning_rate": 4.2372881355932206e-05,
"loss": 1.083286666870117,
"step": 1420
},
{
"epoch": 0.1794594255415941,
"grad_norm": 0.1892261505126953,
"learning_rate": 4.230818993401475e-05,
"loss": 1.12530574798584,
"step": 1430
},
{
"epoch": 0.1807143865593675,
"grad_norm": 0.22547674179077148,
"learning_rate": 4.224349851209729e-05,
"loss": 1.1078373908996582,
"step": 1440
},
{
"epoch": 0.1819693475771409,
"grad_norm": 0.16683579981327057,
"learning_rate": 4.2178807090179846e-05,
"loss": 1.075201416015625,
"step": 1450
},
{
"epoch": 0.18322430859491426,
"grad_norm": 0.19976581633090973,
"learning_rate": 4.2114115668262386e-05,
"loss": 1.0396580696105957,
"step": 1460
},
{
"epoch": 0.18447926961268765,
"grad_norm": 0.18316680192947388,
"learning_rate": 4.204942424634494e-05,
"loss": 1.1359598159790039,
"step": 1470
},
{
"epoch": 0.18573423063046104,
"grad_norm": 0.2085312008857727,
"learning_rate": 4.198473282442748e-05,
"loss": 1.1656570434570312,
"step": 1480
},
{
"epoch": 0.18698919164823444,
"grad_norm": 0.20343416929244995,
"learning_rate": 4.192004140251003e-05,
"loss": 1.110813522338867,
"step": 1490
},
{
"epoch": 0.1882441526660078,
"grad_norm": 0.19183684885501862,
"learning_rate": 4.1855349980592573e-05,
"loss": 1.0822998046875,
"step": 1500
},
{
"epoch": 0.1894991136837812,
"grad_norm": 0.23563328385353088,
"learning_rate": 4.179065855867512e-05,
"loss": 1.0753483772277832,
"step": 1510
},
{
"epoch": 0.1907540747015546,
"grad_norm": 0.1735006421804428,
"learning_rate": 4.172596713675767e-05,
"loss": 1.0641088485717773,
"step": 1520
},
{
"epoch": 0.19200903571932798,
"grad_norm": 0.1672070324420929,
"learning_rate": 4.1661275714840214e-05,
"loss": 1.202446174621582,
"step": 1530
},
{
"epoch": 0.19326399673710135,
"grad_norm": 0.19555087387561798,
"learning_rate": 4.159658429292276e-05,
"loss": 1.0974900245666503,
"step": 1540
},
{
"epoch": 0.19451895775487474,
"grad_norm": 0.1727800965309143,
"learning_rate": 4.153189287100531e-05,
"loss": 1.0884978294372558,
"step": 1550
},
{
"epoch": 0.19577391877264813,
"grad_norm": 0.1973794847726822,
"learning_rate": 4.146720144908785e-05,
"loss": 1.195762825012207,
"step": 1560
},
{
"epoch": 0.19702887979042152,
"grad_norm": 0.20883244276046753,
"learning_rate": 4.14025100271704e-05,
"loss": 1.1073166847229003,
"step": 1570
},
{
"epoch": 0.1982838408081949,
"grad_norm": 0.1726527363061905,
"learning_rate": 4.133781860525295e-05,
"loss": 1.088674545288086,
"step": 1580
},
{
"epoch": 0.19953880182596828,
"grad_norm": 0.15648233890533447,
"learning_rate": 4.1273127183335494e-05,
"loss": 1.1345646858215332,
"step": 1590
},
{
"epoch": 0.20079376284374167,
"grad_norm": 0.1888463795185089,
"learning_rate": 4.120843576141804e-05,
"loss": 1.09979190826416,
"step": 1600
},
{
"epoch": 0.20204872386151504,
"grad_norm": 0.19210949540138245,
"learning_rate": 4.114374433950058e-05,
"loss": 1.1058323860168457,
"step": 1610
},
{
"epoch": 0.20330368487928843,
"grad_norm": 0.19619908928871155,
"learning_rate": 4.1079052917583134e-05,
"loss": 1.1319670677185059,
"step": 1620
},
{
"epoch": 0.20455864589706182,
"grad_norm": 0.1664625108242035,
"learning_rate": 4.1014361495665674e-05,
"loss": 1.0515789985656738,
"step": 1630
},
{
"epoch": 0.20581360691483522,
"grad_norm": 0.19872358441352844,
"learning_rate": 4.094967007374823e-05,
"loss": 1.1266546249389648,
"step": 1640
},
{
"epoch": 0.20706856793260858,
"grad_norm": 0.1681402623653412,
"learning_rate": 4.088497865183077e-05,
"loss": 1.1066089630126954,
"step": 1650
},
{
"epoch": 0.20832352895038198,
"grad_norm": 0.1954943835735321,
"learning_rate": 4.0820287229913315e-05,
"loss": 1.1911808013916017,
"step": 1660
},
{
"epoch": 0.20957848996815537,
"grad_norm": 0.17476460337638855,
"learning_rate": 4.075559580799586e-05,
"loss": 1.038119125366211,
"step": 1670
},
{
"epoch": 0.21083345098592876,
"grad_norm": 0.18508611619472504,
"learning_rate": 4.069090438607841e-05,
"loss": 1.0409876823425293,
"step": 1680
},
{
"epoch": 0.21208841200370213,
"grad_norm": 0.20123597979545593,
"learning_rate": 4.0626212964160955e-05,
"loss": 1.0861782073974608,
"step": 1690
},
{
"epoch": 0.21334337302147552,
"grad_norm": 0.2034509927034378,
"learning_rate": 4.05615215422435e-05,
"loss": 1.1509716033935546,
"step": 1700
},
{
"epoch": 0.2145983340392489,
"grad_norm": 0.16378086805343628,
"learning_rate": 4.049683012032605e-05,
"loss": 1.1095640182495117,
"step": 1710
},
{
"epoch": 0.2158532950570223,
"grad_norm": 0.2010604441165924,
"learning_rate": 4.0432138698408595e-05,
"loss": 1.0584364891052247,
"step": 1720
},
{
"epoch": 0.21710825607479567,
"grad_norm": 0.1633748859167099,
"learning_rate": 4.0367447276491135e-05,
"loss": 1.0981843948364258,
"step": 1730
},
{
"epoch": 0.21836321709256906,
"grad_norm": 0.1888664811849594,
"learning_rate": 4.030275585457369e-05,
"loss": 1.0630824089050293,
"step": 1740
},
{
"epoch": 0.21961817811034245,
"grad_norm": 0.18354853987693787,
"learning_rate": 4.023806443265623e-05,
"loss": 1.1649972915649414,
"step": 1750
},
{
"epoch": 0.22087313912811582,
"grad_norm": 0.18206484615802765,
"learning_rate": 4.017337301073878e-05,
"loss": 1.128775691986084,
"step": 1760
},
{
"epoch": 0.2221281001458892,
"grad_norm": 0.20967255532741547,
"learning_rate": 4.010868158882132e-05,
"loss": 1.0991705894470214,
"step": 1770
},
{
"epoch": 0.2233830611636626,
"grad_norm": 0.173291876912117,
"learning_rate": 4.004399016690387e-05,
"loss": 1.133495807647705,
"step": 1780
},
{
"epoch": 0.224638022181436,
"grad_norm": 0.18495796620845795,
"learning_rate": 3.9979298744986416e-05,
"loss": 1.1338909149169922,
"step": 1790
},
{
"epoch": 0.22589298319920936,
"grad_norm": 0.15466730296611786,
"learning_rate": 3.991460732306896e-05,
"loss": 1.103053092956543,
"step": 1800
},
{
"epoch": 0.22714794421698276,
"grad_norm": 0.2066875696182251,
"learning_rate": 3.984991590115151e-05,
"loss": 1.0687838554382325,
"step": 1810
},
{
"epoch": 0.22840290523475615,
"grad_norm": 0.1611821949481964,
"learning_rate": 3.9785224479234056e-05,
"loss": 1.0882232666015625,
"step": 1820
},
{
"epoch": 0.22965786625252954,
"grad_norm": 0.21903353929519653,
"learning_rate": 3.97205330573166e-05,
"loss": 1.1616278648376466,
"step": 1830
},
{
"epoch": 0.2309128272703029,
"grad_norm": 0.1864372193813324,
"learning_rate": 3.965584163539915e-05,
"loss": 1.0734145164489746,
"step": 1840
},
{
"epoch": 0.2321677882880763,
"grad_norm": 0.18822331726551056,
"learning_rate": 3.959115021348169e-05,
"loss": 1.082595157623291,
"step": 1850
},
{
"epoch": 0.2334227493058497,
"grad_norm": 0.17215979099273682,
"learning_rate": 3.952645879156424e-05,
"loss": 1.0656033515930177,
"step": 1860
},
{
"epoch": 0.23467771032362308,
"grad_norm": 0.19856449961662292,
"learning_rate": 3.946176736964678e-05,
"loss": 1.0940080642700196,
"step": 1870
},
{
"epoch": 0.23593267134139645,
"grad_norm": 0.19286569952964783,
"learning_rate": 3.9397075947729336e-05,
"loss": 1.1294413566589356,
"step": 1880
},
{
"epoch": 0.23718763235916984,
"grad_norm": 0.21396370232105255,
"learning_rate": 3.9332384525811876e-05,
"loss": 1.1367197990417481,
"step": 1890
},
{
"epoch": 0.23844259337694323,
"grad_norm": 0.19455762207508087,
"learning_rate": 3.926769310389442e-05,
"loss": 1.117790412902832,
"step": 1900
},
{
"epoch": 0.23969755439471663,
"grad_norm": 0.19210918247699738,
"learning_rate": 3.920300168197697e-05,
"loss": 1.078984260559082,
"step": 1910
},
{
"epoch": 0.24095251541249,
"grad_norm": 0.17716605961322784,
"learning_rate": 3.9138310260059516e-05,
"loss": 1.1049820899963378,
"step": 1920
},
{
"epoch": 0.24220747643026339,
"grad_norm": 0.1894821971654892,
"learning_rate": 3.907361883814206e-05,
"loss": 1.1215306282043458,
"step": 1930
},
{
"epoch": 0.24346243744803678,
"grad_norm": 0.16090793907642365,
"learning_rate": 3.900892741622461e-05,
"loss": 1.0604351997375487,
"step": 1940
},
{
"epoch": 0.24471739846581014,
"grad_norm": 0.16751867532730103,
"learning_rate": 3.894423599430716e-05,
"loss": 1.1072792053222655,
"step": 1950
},
{
"epoch": 0.24597235948358354,
"grad_norm": 0.18883423507213593,
"learning_rate": 3.8879544572389703e-05,
"loss": 1.1744267463684082,
"step": 1960
},
{
"epoch": 0.24722732050135693,
"grad_norm": 0.17353016138076782,
"learning_rate": 3.881485315047225e-05,
"loss": 1.1146465301513673,
"step": 1970
},
{
"epoch": 0.24848228151913032,
"grad_norm": 0.17569345235824585,
"learning_rate": 3.87501617285548e-05,
"loss": 1.1410762786865234,
"step": 1980
},
{
"epoch": 0.2497372425369037,
"grad_norm": 0.1861223578453064,
"learning_rate": 3.868547030663734e-05,
"loss": 1.1510584831237793,
"step": 1990
},
{
"epoch": 0.2509922035546771,
"grad_norm": 0.1848827749490738,
"learning_rate": 3.862077888471989e-05,
"loss": 1.159224510192871,
"step": 2000
},
{
"epoch": 0.25224716457245044,
"grad_norm": 0.17827710509300232,
"learning_rate": 3.855608746280243e-05,
"loss": 1.0784708976745605,
"step": 2010
},
{
"epoch": 0.25350212559022384,
"grad_norm": 0.2000972479581833,
"learning_rate": 3.849139604088498e-05,
"loss": 1.0981005668640136,
"step": 2020
},
{
"epoch": 0.25475708660799723,
"grad_norm": 0.17901651561260223,
"learning_rate": 3.842670461896753e-05,
"loss": 1.110117530822754,
"step": 2030
},
{
"epoch": 0.2560120476257706,
"grad_norm": 0.16541555523872375,
"learning_rate": 3.836201319705007e-05,
"loss": 1.100083065032959,
"step": 2040
},
{
"epoch": 0.257267008643544,
"grad_norm": 0.1809515506029129,
"learning_rate": 3.8297321775132624e-05,
"loss": 1.0859627723693848,
"step": 2050
},
{
"epoch": 0.2585219696613174,
"grad_norm": 0.17363417148590088,
"learning_rate": 3.8232630353215164e-05,
"loss": 1.1074336051940918,
"step": 2060
},
{
"epoch": 0.2597769306790908,
"grad_norm": 0.19632141292095184,
"learning_rate": 3.816793893129771e-05,
"loss": 1.092854881286621,
"step": 2070
},
{
"epoch": 0.2610318916968642,
"grad_norm": 0.18531949818134308,
"learning_rate": 3.810324750938026e-05,
"loss": 1.1531734466552734,
"step": 2080
},
{
"epoch": 0.26228685271463753,
"grad_norm": 0.2005389928817749,
"learning_rate": 3.8038556087462804e-05,
"loss": 1.0838706016540527,
"step": 2090
},
{
"epoch": 0.2635418137324109,
"grad_norm": 0.2321166694164276,
"learning_rate": 3.797386466554535e-05,
"loss": 1.1546616554260254,
"step": 2100
},
{
"epoch": 0.2647967747501843,
"grad_norm": 0.1765083223581314,
"learning_rate": 3.79091732436279e-05,
"loss": 1.0811034202575684,
"step": 2110
},
{
"epoch": 0.2660517357679577,
"grad_norm": 0.18589410185813904,
"learning_rate": 3.7844481821710445e-05,
"loss": 1.0837715148925782,
"step": 2120
},
{
"epoch": 0.2673066967857311,
"grad_norm": 0.17447498440742493,
"learning_rate": 3.777979039979299e-05,
"loss": 1.124140453338623,
"step": 2130
},
{
"epoch": 0.2685616578035045,
"grad_norm": 0.18152177333831787,
"learning_rate": 3.771509897787554e-05,
"loss": 1.1175949096679687,
"step": 2140
},
{
"epoch": 0.2698166188212779,
"grad_norm": 0.18476802110671997,
"learning_rate": 3.7650407555958085e-05,
"loss": 1.109239387512207,
"step": 2150
},
{
"epoch": 0.2710715798390512,
"grad_norm": 0.1941436380147934,
"learning_rate": 3.7585716134040625e-05,
"loss": 1.121135139465332,
"step": 2160
},
{
"epoch": 0.2723265408568246,
"grad_norm": 0.21705880761146545,
"learning_rate": 3.752102471212318e-05,
"loss": 1.0808409690856933,
"step": 2170
},
{
"epoch": 0.273581501874598,
"grad_norm": 0.20270583033561707,
"learning_rate": 3.745633329020572e-05,
"loss": 1.0374409675598144,
"step": 2180
},
{
"epoch": 0.2748364628923714,
"grad_norm": 0.1888822466135025,
"learning_rate": 3.739164186828827e-05,
"loss": 1.1372817039489747,
"step": 2190
},
{
"epoch": 0.2760914239101448,
"grad_norm": 0.18205349147319794,
"learning_rate": 3.732695044637081e-05,
"loss": 1.063914966583252,
"step": 2200
},
{
"epoch": 0.2773463849279182,
"grad_norm": 0.18658319115638733,
"learning_rate": 3.726225902445336e-05,
"loss": 1.0600374221801758,
"step": 2210
},
{
"epoch": 0.2786013459456916,
"grad_norm": 0.17652806639671326,
"learning_rate": 3.7197567602535905e-05,
"loss": 1.175550937652588,
"step": 2220
},
{
"epoch": 0.279856306963465,
"grad_norm": 0.3145999312400818,
"learning_rate": 3.713287618061845e-05,
"loss": 1.1401835441589356,
"step": 2230
},
{
"epoch": 0.2811112679812383,
"grad_norm": 0.19413627684116364,
"learning_rate": 3.7068184758701e-05,
"loss": 1.0594032287597657,
"step": 2240
},
{
"epoch": 0.2823662289990117,
"grad_norm": 0.19201666116714478,
"learning_rate": 3.7003493336783546e-05,
"loss": 1.1342046737670899,
"step": 2250
},
{
"epoch": 0.2836211900167851,
"grad_norm": 0.15544365346431732,
"learning_rate": 3.693880191486609e-05,
"loss": 1.050437831878662,
"step": 2260
},
{
"epoch": 0.2848761510345585,
"grad_norm": 0.19418646395206451,
"learning_rate": 3.687411049294864e-05,
"loss": 1.1457019805908204,
"step": 2270
},
{
"epoch": 0.2861311120523319,
"grad_norm": 0.1728999763727188,
"learning_rate": 3.680941907103118e-05,
"loss": 1.0303121566772462,
"step": 2280
},
{
"epoch": 0.2873860730701053,
"grad_norm": 0.17530235648155212,
"learning_rate": 3.674472764911373e-05,
"loss": 1.056619358062744,
"step": 2290
},
{
"epoch": 0.28864103408787867,
"grad_norm": 0.17840541899204254,
"learning_rate": 3.668003622719627e-05,
"loss": 1.1103222846984864,
"step": 2300
},
{
"epoch": 0.289895995105652,
"grad_norm": 0.1811124086380005,
"learning_rate": 3.6615344805278826e-05,
"loss": 1.0357528686523438,
"step": 2310
},
{
"epoch": 0.2911509561234254,
"grad_norm": 0.18500889837741852,
"learning_rate": 3.6550653383361366e-05,
"loss": 1.0739376068115234,
"step": 2320
},
{
"epoch": 0.2924059171411988,
"grad_norm": 0.1825813204050064,
"learning_rate": 3.648596196144391e-05,
"loss": 1.0761238098144532,
"step": 2330
},
{
"epoch": 0.2936608781589722,
"grad_norm": 0.16683438420295715,
"learning_rate": 3.642127053952646e-05,
"loss": 1.1081546783447265,
"step": 2340
},
{
"epoch": 0.2949158391767456,
"grad_norm": 0.2046334445476532,
"learning_rate": 3.6356579117609006e-05,
"loss": 1.1215451240539551,
"step": 2350
},
{
"epoch": 0.29617080019451897,
"grad_norm": 0.16321095824241638,
"learning_rate": 3.629188769569155e-05,
"loss": 1.0684563636779785,
"step": 2360
},
{
"epoch": 0.29742576121229236,
"grad_norm": 0.17285047471523285,
"learning_rate": 3.62271962737741e-05,
"loss": 1.117433452606201,
"step": 2370
},
{
"epoch": 0.29868072223006575,
"grad_norm": 0.17733103036880493,
"learning_rate": 3.6162504851856646e-05,
"loss": 1.0875020980834962,
"step": 2380
},
{
"epoch": 0.2999356832478391,
"grad_norm": 0.20012889802455902,
"learning_rate": 3.609781342993919e-05,
"loss": 1.090438175201416,
"step": 2390
},
{
"epoch": 0.3011906442656125,
"grad_norm": 0.2170931100845337,
"learning_rate": 3.603312200802173e-05,
"loss": 1.1388078689575196,
"step": 2400
},
{
"epoch": 0.3024456052833859,
"grad_norm": 0.1782235950231552,
"learning_rate": 3.596843058610429e-05,
"loss": 1.096130657196045,
"step": 2410
},
{
"epoch": 0.30370056630115927,
"grad_norm": 0.1945246011018753,
"learning_rate": 3.590373916418683e-05,
"loss": 1.1477142333984376,
"step": 2420
},
{
"epoch": 0.30495552731893266,
"grad_norm": 0.18570466339588165,
"learning_rate": 3.583904774226938e-05,
"loss": 1.1268895149230957,
"step": 2430
},
{
"epoch": 0.30621048833670605,
"grad_norm": 0.18892186880111694,
"learning_rate": 3.577435632035192e-05,
"loss": 1.0669918060302734,
"step": 2440
},
{
"epoch": 0.30746544935447945,
"grad_norm": 0.1974097490310669,
"learning_rate": 3.570966489843447e-05,
"loss": 1.1709393501281737,
"step": 2450
},
{
"epoch": 0.3087204103722528,
"grad_norm": 0.24947650730609894,
"learning_rate": 3.5644973476517014e-05,
"loss": 1.141524314880371,
"step": 2460
},
{
"epoch": 0.3099753713900262,
"grad_norm": 0.20610560476779938,
"learning_rate": 3.558028205459956e-05,
"loss": 1.1172548294067384,
"step": 2470
},
{
"epoch": 0.31123033240779957,
"grad_norm": 0.1904863864183426,
"learning_rate": 3.551559063268211e-05,
"loss": 1.092203712463379,
"step": 2480
},
{
"epoch": 0.31248529342557296,
"grad_norm": 0.20900042355060577,
"learning_rate": 3.5450899210764654e-05,
"loss": 1.164710235595703,
"step": 2490
},
{
"epoch": 0.31374025444334636,
"grad_norm": 0.19653667509555817,
"learning_rate": 3.53862077888472e-05,
"loss": 1.1293628692626954,
"step": 2500
},
{
"epoch": 0.31499521546111975,
"grad_norm": 0.1921471506357193,
"learning_rate": 3.532151636692975e-05,
"loss": 1.0006118774414063,
"step": 2510
},
{
"epoch": 0.31625017647889314,
"grad_norm": 0.17006602883338928,
"learning_rate": 3.5256824945012294e-05,
"loss": 1.1141504287719726,
"step": 2520
},
{
"epoch": 0.31750513749666653,
"grad_norm": 0.19610007107257843,
"learning_rate": 3.519213352309484e-05,
"loss": 1.0875147819519042,
"step": 2530
},
{
"epoch": 0.31876009851443987,
"grad_norm": 0.18897046148777008,
"learning_rate": 3.512744210117739e-05,
"loss": 1.1723553657531738,
"step": 2540
},
{
"epoch": 0.32001505953221326,
"grad_norm": 0.2167101353406906,
"learning_rate": 3.5062750679259934e-05,
"loss": 1.1133469581604003,
"step": 2550
},
{
"epoch": 0.32127002054998666,
"grad_norm": 0.19870373606681824,
"learning_rate": 3.499805925734248e-05,
"loss": 1.0627870559692383,
"step": 2560
},
{
"epoch": 0.32252498156776005,
"grad_norm": 0.19916154444217682,
"learning_rate": 3.493336783542502e-05,
"loss": 1.1032466888427734,
"step": 2570
},
{
"epoch": 0.32377994258553344,
"grad_norm": 0.17430101335048676,
"learning_rate": 3.4868676413507575e-05,
"loss": 1.0216200828552247,
"step": 2580
},
{
"epoch": 0.32503490360330684,
"grad_norm": 0.17178688943386078,
"learning_rate": 3.4803984991590115e-05,
"loss": 1.072761631011963,
"step": 2590
},
{
"epoch": 0.32628986462108023,
"grad_norm": 0.18140675127506256,
"learning_rate": 3.473929356967267e-05,
"loss": 1.1414005279541015,
"step": 2600
},
{
"epoch": 0.3275448256388536,
"grad_norm": 0.19831927120685577,
"learning_rate": 3.467460214775521e-05,
"loss": 1.1272093772888183,
"step": 2610
},
{
"epoch": 0.32879978665662696,
"grad_norm": 0.1755538433790207,
"learning_rate": 3.4609910725837755e-05,
"loss": 1.102944564819336,
"step": 2620
},
{
"epoch": 0.33005474767440035,
"grad_norm": 0.22686068713665009,
"learning_rate": 3.45452193039203e-05,
"loss": 1.098013973236084,
"step": 2630
},
{
"epoch": 0.33130970869217374,
"grad_norm": 0.15959154069423676,
"learning_rate": 3.448052788200285e-05,
"loss": 1.0637240409851074,
"step": 2640
},
{
"epoch": 0.33256466970994714,
"grad_norm": 0.19935756921768188,
"learning_rate": 3.4415836460085395e-05,
"loss": 1.0774771690368652,
"step": 2650
},
{
"epoch": 0.33381963072772053,
"grad_norm": 0.16935963928699493,
"learning_rate": 3.435114503816794e-05,
"loss": 1.0928321838378907,
"step": 2660
},
{
"epoch": 0.3350745917454939,
"grad_norm": 0.1932937055826187,
"learning_rate": 3.428645361625049e-05,
"loss": 1.034721279144287,
"step": 2670
},
{
"epoch": 0.3363295527632673,
"grad_norm": 0.20397278666496277,
"learning_rate": 3.4221762194333035e-05,
"loss": 1.1345463752746583,
"step": 2680
},
{
"epoch": 0.33758451378104065,
"grad_norm": 0.1837303638458252,
"learning_rate": 3.4157070772415575e-05,
"loss": 1.1856120109558106,
"step": 2690
},
{
"epoch": 0.33883947479881404,
"grad_norm": 0.2031344324350357,
"learning_rate": 3.409237935049813e-05,
"loss": 1.0397148132324219,
"step": 2700
},
{
"epoch": 0.34009443581658744,
"grad_norm": 0.20356477797031403,
"learning_rate": 3.402768792858067e-05,
"loss": 1.0965538024902344,
"step": 2710
},
{
"epoch": 0.34134939683436083,
"grad_norm": 0.17668817937374115,
"learning_rate": 3.396299650666322e-05,
"loss": 1.0809722900390626,
"step": 2720
},
{
"epoch": 0.3426043578521342,
"grad_norm": 0.20212671160697937,
"learning_rate": 3.389830508474576e-05,
"loss": 1.0418371200561523,
"step": 2730
},
{
"epoch": 0.3438593188699076,
"grad_norm": 0.17411935329437256,
"learning_rate": 3.383361366282831e-05,
"loss": 1.0937856674194335,
"step": 2740
},
{
"epoch": 0.345114279887681,
"grad_norm": 0.20167027413845062,
"learning_rate": 3.3768922240910856e-05,
"loss": 1.0584357261657715,
"step": 2750
},
{
"epoch": 0.3463692409054544,
"grad_norm": 0.23438484966754913,
"learning_rate": 3.37042308189934e-05,
"loss": 1.1240981101989747,
"step": 2760
},
{
"epoch": 0.34762420192322774,
"grad_norm": 0.20869354903697968,
"learning_rate": 3.363953939707595e-05,
"loss": 1.0020330429077149,
"step": 2770
},
{
"epoch": 0.34887916294100113,
"grad_norm": 0.17483501136302948,
"learning_rate": 3.3574847975158496e-05,
"loss": 1.0768984794616698,
"step": 2780
},
{
"epoch": 0.3501341239587745,
"grad_norm": 0.19473516941070557,
"learning_rate": 3.351015655324104e-05,
"loss": 1.1058112144470216,
"step": 2790
},
{
"epoch": 0.3513890849765479,
"grad_norm": 0.18327480554580688,
"learning_rate": 3.344546513132359e-05,
"loss": 1.1330906867980957,
"step": 2800
},
{
"epoch": 0.3526440459943213,
"grad_norm": 0.19095930457115173,
"learning_rate": 3.338077370940613e-05,
"loss": 1.0920269012451171,
"step": 2810
},
{
"epoch": 0.3538990070120947,
"grad_norm": 0.20432385802268982,
"learning_rate": 3.331608228748868e-05,
"loss": 1.0859003067016602,
"step": 2820
},
{
"epoch": 0.3551539680298681,
"grad_norm": 0.17931047081947327,
"learning_rate": 3.325139086557122e-05,
"loss": 1.0956171989440917,
"step": 2830
},
{
"epoch": 0.35640892904764143,
"grad_norm": 0.19674962759017944,
"learning_rate": 3.3186699443653776e-05,
"loss": 1.0600525856018066,
"step": 2840
},
{
"epoch": 0.3576638900654148,
"grad_norm": 0.2171870470046997,
"learning_rate": 3.3122008021736316e-05,
"loss": 1.1198451042175293,
"step": 2850
},
{
"epoch": 0.3589188510831882,
"grad_norm": 0.17714793980121613,
"learning_rate": 3.305731659981886e-05,
"loss": 1.0424016952514648,
"step": 2860
},
{
"epoch": 0.3601738121009616,
"grad_norm": 0.17595025897026062,
"learning_rate": 3.299262517790141e-05,
"loss": 1.0837072372436523,
"step": 2870
},
{
"epoch": 0.361428773118735,
"grad_norm": 0.1887323260307312,
"learning_rate": 3.292793375598396e-05,
"loss": 1.0680608749389648,
"step": 2880
},
{
"epoch": 0.3626837341365084,
"grad_norm": 0.194399893283844,
"learning_rate": 3.2863242334066503e-05,
"loss": 0.9988933563232422,
"step": 2890
},
{
"epoch": 0.3639386951542818,
"grad_norm": 0.1708306223154068,
"learning_rate": 3.279855091214905e-05,
"loss": 1.1282541275024414,
"step": 2900
},
{
"epoch": 0.3651936561720552,
"grad_norm": 0.20244979858398438,
"learning_rate": 3.27338594902316e-05,
"loss": 1.1450789451599122,
"step": 2910
},
{
"epoch": 0.3664486171898285,
"grad_norm": 0.1962517648935318,
"learning_rate": 3.2669168068314144e-05,
"loss": 1.0602560997009278,
"step": 2920
},
{
"epoch": 0.3677035782076019,
"grad_norm": 0.20567139983177185,
"learning_rate": 3.2604476646396684e-05,
"loss": 1.1111217498779298,
"step": 2930
},
{
"epoch": 0.3689585392253753,
"grad_norm": 0.20776300132274628,
"learning_rate": 3.253978522447924e-05,
"loss": 1.0344942092895508,
"step": 2940
},
{
"epoch": 0.3702135002431487,
"grad_norm": 0.21871210634708405,
"learning_rate": 3.2475093802561784e-05,
"loss": 1.1111419677734375,
"step": 2950
},
{
"epoch": 0.3714684612609221,
"grad_norm": 0.185679093003273,
"learning_rate": 3.241040238064433e-05,
"loss": 1.1700579643249511,
"step": 2960
},
{
"epoch": 0.3727234222786955,
"grad_norm": 0.19544155895709991,
"learning_rate": 3.234571095872688e-05,
"loss": 1.0801254272460938,
"step": 2970
},
{
"epoch": 0.3739783832964689,
"grad_norm": 0.20506669580936432,
"learning_rate": 3.228101953680942e-05,
"loss": 1.1814807891845702,
"step": 2980
},
{
"epoch": 0.3752333443142422,
"grad_norm": 0.17181119322776794,
"learning_rate": 3.221632811489197e-05,
"loss": 1.0522710800170898,
"step": 2990
},
{
"epoch": 0.3764883053320156,
"grad_norm": 0.23374006152153015,
"learning_rate": 3.215163669297451e-05,
"loss": 1.0902445793151856,
"step": 3000
},
{
"epoch": 0.377743266349789,
"grad_norm": 0.21061059832572937,
"learning_rate": 3.2086945271057064e-05,
"loss": 1.1061354637145997,
"step": 3010
},
{
"epoch": 0.3789982273675624,
"grad_norm": 0.18865549564361572,
"learning_rate": 3.2022253849139604e-05,
"loss": 1.090491485595703,
"step": 3020
},
{
"epoch": 0.3802531883853358,
"grad_norm": 0.19853392243385315,
"learning_rate": 3.195756242722215e-05,
"loss": 1.1597092628479004,
"step": 3030
},
{
"epoch": 0.3815081494031092,
"grad_norm": 0.1734134405851364,
"learning_rate": 3.18928710053047e-05,
"loss": 1.1168682098388671,
"step": 3040
},
{
"epoch": 0.38276311042088257,
"grad_norm": 0.18935444951057434,
"learning_rate": 3.1828179583387245e-05,
"loss": 1.1608116149902343,
"step": 3050
},
{
"epoch": 0.38401807143865596,
"grad_norm": 0.18997101485729218,
"learning_rate": 3.176348816146979e-05,
"loss": 1.094447135925293,
"step": 3060
},
{
"epoch": 0.3852730324564293,
"grad_norm": 0.16133999824523926,
"learning_rate": 3.169879673955234e-05,
"loss": 1.1392805099487304,
"step": 3070
},
{
"epoch": 0.3865279934742027,
"grad_norm": 0.19918449223041534,
"learning_rate": 3.1634105317634885e-05,
"loss": 1.1354280471801759,
"step": 3080
},
{
"epoch": 0.3877829544919761,
"grad_norm": 0.17987719178199768,
"learning_rate": 3.156941389571743e-05,
"loss": 1.0349795341491699,
"step": 3090
},
{
"epoch": 0.3890379155097495,
"grad_norm": 0.21309016644954681,
"learning_rate": 3.150472247379997e-05,
"loss": 1.1042888641357422,
"step": 3100
},
{
"epoch": 0.39029287652752287,
"grad_norm": 0.17456930875778198,
"learning_rate": 3.1440031051882525e-05,
"loss": 1.1372068405151368,
"step": 3110
},
{
"epoch": 0.39154783754529626,
"grad_norm": 0.17959140241146088,
"learning_rate": 3.1375339629965065e-05,
"loss": 1.0295021057128906,
"step": 3120
},
{
"epoch": 0.39280279856306966,
"grad_norm": 0.21057923138141632,
"learning_rate": 3.131064820804762e-05,
"loss": 1.14412841796875,
"step": 3130
},
{
"epoch": 0.39405775958084305,
"grad_norm": 0.18713043630123138,
"learning_rate": 3.124595678613016e-05,
"loss": 1.0776933670043944,
"step": 3140
},
{
"epoch": 0.3953127205986164,
"grad_norm": 0.19994860887527466,
"learning_rate": 3.1181265364212705e-05,
"loss": 1.0793471336364746,
"step": 3150
},
{
"epoch": 0.3965676816163898,
"grad_norm": 0.18249864876270294,
"learning_rate": 3.111657394229525e-05,
"loss": 1.061374568939209,
"step": 3160
},
{
"epoch": 0.39782264263416317,
"grad_norm": 0.2102167159318924,
"learning_rate": 3.10518825203778e-05,
"loss": 1.0960093498229981,
"step": 3170
},
{
"epoch": 0.39907760365193656,
"grad_norm": 0.20832930505275726,
"learning_rate": 3.0987191098460346e-05,
"loss": 1.033592987060547,
"step": 3180
},
{
"epoch": 0.40033256466970996,
"grad_norm": 0.18993455171585083,
"learning_rate": 3.092249967654289e-05,
"loss": 1.1482149124145509,
"step": 3190
},
{
"epoch": 0.40158752568748335,
"grad_norm": 0.16434840857982635,
"learning_rate": 3.085780825462544e-05,
"loss": 1.1257658004760742,
"step": 3200
},
{
"epoch": 0.40284248670525674,
"grad_norm": 0.17940276861190796,
"learning_rate": 3.0793116832707986e-05,
"loss": 1.031651210784912,
"step": 3210
},
{
"epoch": 0.4040974477230301,
"grad_norm": 0.20808145403862,
"learning_rate": 3.0728425410790526e-05,
"loss": 1.0851681709289551,
"step": 3220
},
{
"epoch": 0.40535240874080347,
"grad_norm": 0.1987367421388626,
"learning_rate": 3.066373398887308e-05,
"loss": 1.1109742164611816,
"step": 3230
},
{
"epoch": 0.40660736975857686,
"grad_norm": 0.19617575407028198,
"learning_rate": 3.059904256695562e-05,
"loss": 1.0537912368774414,
"step": 3240
},
{
"epoch": 0.40786233077635026,
"grad_norm": 0.21639147400856018,
"learning_rate": 3.053435114503817e-05,
"loss": 1.125661563873291,
"step": 3250
},
{
"epoch": 0.40911729179412365,
"grad_norm": 0.1862943321466446,
"learning_rate": 3.0469659723120713e-05,
"loss": 1.1534263610839843,
"step": 3260
},
{
"epoch": 0.41037225281189704,
"grad_norm": 0.19426733255386353,
"learning_rate": 3.0404968301203263e-05,
"loss": 1.1007325172424316,
"step": 3270
},
{
"epoch": 0.41162721382967044,
"grad_norm": 0.19233031570911407,
"learning_rate": 3.0340276879285806e-05,
"loss": 1.101203441619873,
"step": 3280
},
{
"epoch": 0.41288217484744383,
"grad_norm": 0.1994284987449646,
"learning_rate": 3.0275585457368356e-05,
"loss": 1.1052302360534667,
"step": 3290
},
{
"epoch": 0.41413713586521717,
"grad_norm": 0.2279525250196457,
"learning_rate": 3.02108940354509e-05,
"loss": 1.061030387878418,
"step": 3300
},
{
"epoch": 0.41539209688299056,
"grad_norm": 0.17196883261203766,
"learning_rate": 3.0146202613533446e-05,
"loss": 0.9545375823974609,
"step": 3310
},
{
"epoch": 0.41664705790076395,
"grad_norm": 0.2149975597858429,
"learning_rate": 3.008151119161599e-05,
"loss": 1.048507022857666,
"step": 3320
},
{
"epoch": 0.41790201891853734,
"grad_norm": 0.16794604063034058,
"learning_rate": 3.001681976969854e-05,
"loss": 1.0982236862182617,
"step": 3330
},
{
"epoch": 0.41915697993631074,
"grad_norm": 0.177035853266716,
"learning_rate": 2.9952128347781083e-05,
"loss": 1.0892048835754395,
"step": 3340
},
{
"epoch": 0.42041194095408413,
"grad_norm": 0.19969920814037323,
"learning_rate": 2.9887436925863633e-05,
"loss": 1.0508790016174316,
"step": 3350
},
{
"epoch": 0.4216669019718575,
"grad_norm": 0.1925683468580246,
"learning_rate": 2.9822745503946177e-05,
"loss": 1.115567111968994,
"step": 3360
},
{
"epoch": 0.42292186298963086,
"grad_norm": 0.1786133497953415,
"learning_rate": 2.9758054082028724e-05,
"loss": 1.1230090141296387,
"step": 3370
},
{
"epoch": 0.42417682400740425,
"grad_norm": 0.18185003101825714,
"learning_rate": 2.9693362660111267e-05,
"loss": 1.0770362854003905,
"step": 3380
},
{
"epoch": 0.42543178502517764,
"grad_norm": 0.18131586909294128,
"learning_rate": 2.9628671238193817e-05,
"loss": 1.0566128730773925,
"step": 3390
},
{
"epoch": 0.42668674604295104,
"grad_norm": 0.1889081597328186,
"learning_rate": 2.9563979816276367e-05,
"loss": 1.1312256813049317,
"step": 3400
},
{
"epoch": 0.42794170706072443,
"grad_norm": 0.19797080755233765,
"learning_rate": 2.949928839435891e-05,
"loss": 1.097049617767334,
"step": 3410
},
{
"epoch": 0.4291966680784978,
"grad_norm": 0.21253708004951477,
"learning_rate": 2.9434596972441457e-05,
"loss": 1.063642692565918,
"step": 3420
},
{
"epoch": 0.4304516290962712,
"grad_norm": 0.18668265640735626,
"learning_rate": 2.9369905550524e-05,
"loss": 1.11463623046875,
"step": 3430
},
{
"epoch": 0.4317065901140446,
"grad_norm": 0.1904267519712448,
"learning_rate": 2.930521412860655e-05,
"loss": 1.0638154029846192,
"step": 3440
},
{
"epoch": 0.43296155113181795,
"grad_norm": 0.15511858463287354,
"learning_rate": 2.9240522706689094e-05,
"loss": 1.1070695877075196,
"step": 3450
},
{
"epoch": 0.43421651214959134,
"grad_norm": 0.18738579750061035,
"learning_rate": 2.9175831284771644e-05,
"loss": 1.0907926559448242,
"step": 3460
},
{
"epoch": 0.43547147316736473,
"grad_norm": 0.20306473970413208,
"learning_rate": 2.9111139862854188e-05,
"loss": 1.0509754180908204,
"step": 3470
},
{
"epoch": 0.4367264341851381,
"grad_norm": 0.17330707609653473,
"learning_rate": 2.9046448440936734e-05,
"loss": 1.086292552947998,
"step": 3480
},
{
"epoch": 0.4379813952029115,
"grad_norm": 0.21493610739707947,
"learning_rate": 2.8981757019019278e-05,
"loss": 1.0941463470458985,
"step": 3490
},
{
"epoch": 0.4392363562206849,
"grad_norm": 0.20350311696529388,
"learning_rate": 2.8917065597101828e-05,
"loss": 1.1737398147583007,
"step": 3500
},
{
"epoch": 0.4404913172384583,
"grad_norm": 0.19272877275943756,
"learning_rate": 2.885237417518437e-05,
"loss": 1.0502948760986328,
"step": 3510
},
{
"epoch": 0.44174627825623164,
"grad_norm": 0.17556731402873993,
"learning_rate": 2.878768275326692e-05,
"loss": 1.103357982635498,
"step": 3520
},
{
"epoch": 0.44300123927400503,
"grad_norm": 0.22516275942325592,
"learning_rate": 2.8722991331349465e-05,
"loss": 1.1097342491149902,
"step": 3530
},
{
"epoch": 0.4442562002917784,
"grad_norm": 0.17185984551906586,
"learning_rate": 2.865829990943201e-05,
"loss": 1.0627781867980957,
"step": 3540
},
{
"epoch": 0.4455111613095518,
"grad_norm": 0.20243440568447113,
"learning_rate": 2.8593608487514555e-05,
"loss": 1.083481216430664,
"step": 3550
},
{
"epoch": 0.4467661223273252,
"grad_norm": 0.20377662777900696,
"learning_rate": 2.8528917065597105e-05,
"loss": 1.0517413139343261,
"step": 3560
},
{
"epoch": 0.4480210833450986,
"grad_norm": 0.19456355273723602,
"learning_rate": 2.846422564367965e-05,
"loss": 1.0656819343566895,
"step": 3570
},
{
"epoch": 0.449276044362872,
"grad_norm": 0.21682517230510712,
"learning_rate": 2.83995342217622e-05,
"loss": 1.0958752632141113,
"step": 3580
},
{
"epoch": 0.4505310053806454,
"grad_norm": 0.19284336268901825,
"learning_rate": 2.8334842799844742e-05,
"loss": 1.0725554466247558,
"step": 3590
},
{
"epoch": 0.4517859663984187,
"grad_norm": 0.1780819147825241,
"learning_rate": 2.827015137792729e-05,
"loss": 1.0786738395690918,
"step": 3600
},
{
"epoch": 0.4530409274161921,
"grad_norm": 0.19883157312870026,
"learning_rate": 2.8205459956009832e-05,
"loss": 1.066401481628418,
"step": 3610
},
{
"epoch": 0.4542958884339655,
"grad_norm": 0.1856444627046585,
"learning_rate": 2.8140768534092382e-05,
"loss": 1.1166013717651366,
"step": 3620
},
{
"epoch": 0.4555508494517389,
"grad_norm": 0.2152242809534073,
"learning_rate": 2.8076077112174925e-05,
"loss": 1.0979412078857422,
"step": 3630
},
{
"epoch": 0.4568058104695123,
"grad_norm": 0.19830797612667084,
"learning_rate": 2.8011385690257476e-05,
"loss": 1.1257426261901855,
"step": 3640
},
{
"epoch": 0.4580607714872857,
"grad_norm": 0.2238456904888153,
"learning_rate": 2.794669426834002e-05,
"loss": 1.1249502182006836,
"step": 3650
},
{
"epoch": 0.4593157325050591,
"grad_norm": 0.17598365247249603,
"learning_rate": 2.7882002846422566e-05,
"loss": 1.0131544113159179,
"step": 3660
},
{
"epoch": 0.4605706935228324,
"grad_norm": 0.2041376829147339,
"learning_rate": 2.781731142450511e-05,
"loss": 1.0872151374816894,
"step": 3670
},
{
"epoch": 0.4618256545406058,
"grad_norm": 0.20209568738937378,
"learning_rate": 2.775262000258766e-05,
"loss": 1.0570232391357421,
"step": 3680
},
{
"epoch": 0.4630806155583792,
"grad_norm": 0.1764563024044037,
"learning_rate": 2.7687928580670203e-05,
"loss": 1.0938175201416016,
"step": 3690
},
{
"epoch": 0.4643355765761526,
"grad_norm": 0.197507843375206,
"learning_rate": 2.7623237158752753e-05,
"loss": 1.0432971000671387,
"step": 3700
},
{
"epoch": 0.465590537593926,
"grad_norm": 0.1874912828207016,
"learning_rate": 2.7558545736835296e-05,
"loss": 1.0999769210815429,
"step": 3710
},
{
"epoch": 0.4668454986116994,
"grad_norm": 0.18974807858467102,
"learning_rate": 2.7493854314917843e-05,
"loss": 1.1539668083190917,
"step": 3720
},
{
"epoch": 0.4681004596294728,
"grad_norm": 0.20754633843898773,
"learning_rate": 2.7429162893000386e-05,
"loss": 1.0792550086975097,
"step": 3730
},
{
"epoch": 0.46935542064724617,
"grad_norm": 0.18311122059822083,
"learning_rate": 2.7364471471082936e-05,
"loss": 1.0288339614868165,
"step": 3740
},
{
"epoch": 0.4706103816650195,
"grad_norm": 0.21719680726528168,
"learning_rate": 2.729978004916548e-05,
"loss": 1.0714326858520509,
"step": 3750
},
{
"epoch": 0.4718653426827929,
"grad_norm": 0.2074572890996933,
"learning_rate": 2.723508862724803e-05,
"loss": 1.0084345817565918,
"step": 3760
},
{
"epoch": 0.4731203037005663,
"grad_norm": 0.1585981547832489,
"learning_rate": 2.7170397205330573e-05,
"loss": 1.065823745727539,
"step": 3770
},
{
"epoch": 0.4743752647183397,
"grad_norm": 0.1950136125087738,
"learning_rate": 2.710570578341312e-05,
"loss": 1.0472920417785645,
"step": 3780
},
{
"epoch": 0.4756302257361131,
"grad_norm": 0.20094197988510132,
"learning_rate": 2.7041014361495663e-05,
"loss": 1.0385844230651855,
"step": 3790
},
{
"epoch": 0.47688518675388647,
"grad_norm": 0.18639400601387024,
"learning_rate": 2.6976322939578213e-05,
"loss": 1.065108299255371,
"step": 3800
},
{
"epoch": 0.47814014777165986,
"grad_norm": 0.190430149435997,
"learning_rate": 2.6911631517660757e-05,
"loss": 1.0249187469482421,
"step": 3810
},
{
"epoch": 0.47939510878943326,
"grad_norm": 0.19305895268917084,
"learning_rate": 2.6846940095743307e-05,
"loss": 1.0661443710327148,
"step": 3820
},
{
"epoch": 0.4806500698072066,
"grad_norm": 0.1928487867116928,
"learning_rate": 2.678224867382585e-05,
"loss": 1.0664926528930665,
"step": 3830
},
{
"epoch": 0.48190503082498,
"grad_norm": 0.19009914994239807,
"learning_rate": 2.6717557251908397e-05,
"loss": 1.0212774276733398,
"step": 3840
},
{
"epoch": 0.4831599918427534,
"grad_norm": 0.18818970024585724,
"learning_rate": 2.665286582999094e-05,
"loss": 1.0675536155700684,
"step": 3850
},
{
"epoch": 0.48441495286052677,
"grad_norm": 0.19587545096874237,
"learning_rate": 2.658817440807349e-05,
"loss": 1.1134575843811034,
"step": 3860
},
{
"epoch": 0.48566991387830016,
"grad_norm": 0.1728365272283554,
"learning_rate": 2.652348298615604e-05,
"loss": 1.0674519538879395,
"step": 3870
},
{
"epoch": 0.48692487489607356,
"grad_norm": 0.18507780134677887,
"learning_rate": 2.6458791564238584e-05,
"loss": 1.0612947463989257,
"step": 3880
},
{
"epoch": 0.48817983591384695,
"grad_norm": 0.17732012271881104,
"learning_rate": 2.639410014232113e-05,
"loss": 1.1351598739624023,
"step": 3890
},
{
"epoch": 0.4894347969316203,
"grad_norm": 0.1905921846628189,
"learning_rate": 2.6329408720403674e-05,
"loss": 1.105655574798584,
"step": 3900
},
{
"epoch": 0.4906897579493937,
"grad_norm": 0.1902066469192505,
"learning_rate": 2.6264717298486224e-05,
"loss": 1.1006397247314452,
"step": 3910
},
{
"epoch": 0.49194471896716707,
"grad_norm": 0.16983291506767273,
"learning_rate": 2.6200025876568768e-05,
"loss": 1.0307989120483398,
"step": 3920
},
{
"epoch": 0.49319967998494046,
"grad_norm": 0.16205719113349915,
"learning_rate": 2.6135334454651318e-05,
"loss": 1.1160860061645508,
"step": 3930
},
{
"epoch": 0.49445464100271386,
"grad_norm": 0.16868488490581512,
"learning_rate": 2.607064303273386e-05,
"loss": 1.1177806854248047,
"step": 3940
},
{
"epoch": 0.49570960202048725,
"grad_norm": 0.19134441018104553,
"learning_rate": 2.6005951610816408e-05,
"loss": 1.0095656394958497,
"step": 3950
},
{
"epoch": 0.49696456303826064,
"grad_norm": 0.17027780413627625,
"learning_rate": 2.594126018889895e-05,
"loss": 1.1302468299865722,
"step": 3960
},
{
"epoch": 0.49821952405603404,
"grad_norm": 0.16149652004241943,
"learning_rate": 2.58765687669815e-05,
"loss": 1.0545706748962402,
"step": 3970
},
{
"epoch": 0.4994744850738074,
"grad_norm": 0.1791815310716629,
"learning_rate": 2.5811877345064045e-05,
"loss": 1.0691394805908203,
"step": 3980
},
{
"epoch": 0.5007294460915808,
"grad_norm": 0.19383347034454346,
"learning_rate": 2.5747185923146595e-05,
"loss": 1.071311855316162,
"step": 3990
},
{
"epoch": 0.5019844071093542,
"grad_norm": 0.20299892127513885,
"learning_rate": 2.5682494501229138e-05,
"loss": 1.0849023818969727,
"step": 4000
},
{
"epoch": 0.5032393681271276,
"grad_norm": 0.18995410203933716,
"learning_rate": 2.5617803079311685e-05,
"loss": 1.0594166755676269,
"step": 4010
},
{
"epoch": 0.5044943291449009,
"grad_norm": 0.22304005920886993,
"learning_rate": 2.5553111657394228e-05,
"loss": 1.0542009353637696,
"step": 4020
},
{
"epoch": 0.5057492901626743,
"grad_norm": 0.22307968139648438,
"learning_rate": 2.548842023547678e-05,
"loss": 1.107966136932373,
"step": 4030
},
{
"epoch": 0.5070042511804477,
"grad_norm": 0.1840265691280365,
"learning_rate": 2.5423728813559322e-05,
"loss": 1.1096989631652832,
"step": 4040
},
{
"epoch": 0.5082592121982211,
"grad_norm": 0.21252040565013885,
"learning_rate": 2.5359037391641872e-05,
"loss": 1.0967977523803711,
"step": 4050
},
{
"epoch": 0.5095141732159945,
"grad_norm": 0.18273372948169708,
"learning_rate": 2.5294345969724415e-05,
"loss": 1.018519401550293,
"step": 4060
},
{
"epoch": 0.5107691342337679,
"grad_norm": 0.18331177532672882,
"learning_rate": 2.5229654547806962e-05,
"loss": 1.049039077758789,
"step": 4070
},
{
"epoch": 0.5120240952515412,
"grad_norm": 0.19110405445098877,
"learning_rate": 2.5164963125889505e-05,
"loss": 1.0827875137329102,
"step": 4080
},
{
"epoch": 0.5132790562693146,
"grad_norm": 0.16529154777526855,
"learning_rate": 2.5100271703972055e-05,
"loss": 1.0904165267944337,
"step": 4090
},
{
"epoch": 0.514534017287088,
"grad_norm": 0.17822785675525665,
"learning_rate": 2.50355802820546e-05,
"loss": 1.0597058296203614,
"step": 4100
},
{
"epoch": 0.5157889783048614,
"grad_norm": 0.1769823282957077,
"learning_rate": 2.497088886013715e-05,
"loss": 1.0827682495117188,
"step": 4110
},
{
"epoch": 0.5170439393226348,
"grad_norm": 0.19942662119865417,
"learning_rate": 2.4906197438219696e-05,
"loss": 1.0987505912780762,
"step": 4120
},
{
"epoch": 0.5182989003404082,
"grad_norm": 0.19398993253707886,
"learning_rate": 2.484150601630224e-05,
"loss": 1.0218748092651366,
"step": 4130
},
{
"epoch": 0.5195538613581816,
"grad_norm": 0.21403302252292633,
"learning_rate": 2.4776814594384786e-05,
"loss": 1.1508358001708985,
"step": 4140
},
{
"epoch": 0.5208088223759549,
"grad_norm": 0.1951906532049179,
"learning_rate": 2.4712123172467333e-05,
"loss": 1.1334738731384277,
"step": 4150
},
{
"epoch": 0.5220637833937284,
"grad_norm": 0.18993432819843292,
"learning_rate": 2.464743175054988e-05,
"loss": 1.0698814392089844,
"step": 4160
},
{
"epoch": 0.5233187444115017,
"grad_norm": 0.2009001225233078,
"learning_rate": 2.4582740328632426e-05,
"loss": 1.1019264221191407,
"step": 4170
},
{
"epoch": 0.5245737054292751,
"grad_norm": 0.1592164784669876,
"learning_rate": 2.4518048906714973e-05,
"loss": 1.0502429008483887,
"step": 4180
},
{
"epoch": 0.5258286664470485,
"grad_norm": 0.20943738520145416,
"learning_rate": 2.4453357484797516e-05,
"loss": 1.1068140029907227,
"step": 4190
},
{
"epoch": 0.5270836274648218,
"grad_norm": 0.1841343641281128,
"learning_rate": 2.4388666062880063e-05,
"loss": 1.0863500595092774,
"step": 4200
},
{
"epoch": 0.5283385884825953,
"grad_norm": 0.21408788859844208,
"learning_rate": 2.432397464096261e-05,
"loss": 1.0539629936218262,
"step": 4210
},
{
"epoch": 0.5295935495003686,
"grad_norm": 0.2031819224357605,
"learning_rate": 2.4259283219045156e-05,
"loss": 1.0763005256652831,
"step": 4220
},
{
"epoch": 0.5308485105181421,
"grad_norm": 0.1948988288640976,
"learning_rate": 2.4194591797127703e-05,
"loss": 1.1096826553344727,
"step": 4230
},
{
"epoch": 0.5321034715359154,
"grad_norm": 0.19453151524066925,
"learning_rate": 2.412990037521025e-05,
"loss": 1.0530555725097657,
"step": 4240
},
{
"epoch": 0.5333584325536888,
"grad_norm": 0.22539453208446503,
"learning_rate": 2.4065208953292793e-05,
"loss": 1.0634571075439454,
"step": 4250
},
{
"epoch": 0.5346133935714622,
"grad_norm": 0.2108486145734787,
"learning_rate": 2.400051753137534e-05,
"loss": 1.0783962249755858,
"step": 4260
},
{
"epoch": 0.5358683545892355,
"grad_norm": 0.22550557553768158,
"learning_rate": 2.3935826109457887e-05,
"loss": 1.0793485641479492,
"step": 4270
},
{
"epoch": 0.537123315607009,
"grad_norm": 0.18710452318191528,
"learning_rate": 2.3871134687540433e-05,
"loss": 1.0253171920776367,
"step": 4280
},
{
"epoch": 0.5383782766247823,
"grad_norm": 0.18738947808742523,
"learning_rate": 2.380644326562298e-05,
"loss": 1.0552565574645996,
"step": 4290
},
{
"epoch": 0.5396332376425558,
"grad_norm": 0.19568362832069397,
"learning_rate": 2.3741751843705527e-05,
"loss": 1.0678536415100097,
"step": 4300
},
{
"epoch": 0.5408881986603291,
"grad_norm": 0.16987361013889313,
"learning_rate": 2.3677060421788074e-05,
"loss": 1.1124211311340333,
"step": 4310
},
{
"epoch": 0.5421431596781024,
"grad_norm": 0.2189544439315796,
"learning_rate": 2.3612368999870617e-05,
"loss": 1.0600645065307617,
"step": 4320
},
{
"epoch": 0.5433981206958759,
"grad_norm": 0.17936541140079498,
"learning_rate": 2.3547677577953164e-05,
"loss": 1.0915284156799316,
"step": 4330
},
{
"epoch": 0.5446530817136492,
"grad_norm": 0.19402658939361572,
"learning_rate": 2.348298615603571e-05,
"loss": 1.0507197380065918,
"step": 4340
},
{
"epoch": 0.5459080427314227,
"grad_norm": 0.23639065027236938,
"learning_rate": 2.3418294734118257e-05,
"loss": 1.1000999450683593,
"step": 4350
},
{
"epoch": 0.547163003749196,
"grad_norm": 0.1931799352169037,
"learning_rate": 2.3353603312200804e-05,
"loss": 1.0721006393432617,
"step": 4360
},
{
"epoch": 0.5484179647669695,
"grad_norm": 0.20396549999713898,
"learning_rate": 2.328891189028335e-05,
"loss": 1.1172670364379882,
"step": 4370
},
{
"epoch": 0.5496729257847428,
"grad_norm": 0.2059352695941925,
"learning_rate": 2.3224220468365894e-05,
"loss": 1.070617389678955,
"step": 4380
},
{
"epoch": 0.5509278868025161,
"grad_norm": 0.2139720320701599,
"learning_rate": 2.315952904644844e-05,
"loss": 1.0570523262023925,
"step": 4390
},
{
"epoch": 0.5521828478202896,
"grad_norm": 0.16804735362529755,
"learning_rate": 2.3094837624530988e-05,
"loss": 1.0962253570556642,
"step": 4400
},
{
"epoch": 0.5534378088380629,
"grad_norm": 0.19438262283802032,
"learning_rate": 2.3030146202613534e-05,
"loss": 1.052304172515869,
"step": 4410
},
{
"epoch": 0.5546927698558364,
"grad_norm": 0.1669342964887619,
"learning_rate": 2.296545478069608e-05,
"loss": 0.9899564743041992,
"step": 4420
},
{
"epoch": 0.5559477308736097,
"grad_norm": 0.19921936094760895,
"learning_rate": 2.2900763358778628e-05,
"loss": 1.143895435333252,
"step": 4430
},
{
"epoch": 0.5572026918913832,
"grad_norm": 0.18776285648345947,
"learning_rate": 2.283607193686117e-05,
"loss": 1.1010767936706543,
"step": 4440
},
{
"epoch": 0.5584576529091565,
"grad_norm": 0.19864770770072937,
"learning_rate": 2.2771380514943718e-05,
"loss": 1.1463205337524414,
"step": 4450
},
{
"epoch": 0.55971261392693,
"grad_norm": 0.251132607460022,
"learning_rate": 2.2706689093026265e-05,
"loss": 1.1078590393066405,
"step": 4460
},
{
"epoch": 0.5609675749447033,
"grad_norm": 0.18442977964878082,
"learning_rate": 2.264199767110881e-05,
"loss": 1.0865904808044433,
"step": 4470
},
{
"epoch": 0.5622225359624766,
"grad_norm": 0.19264988601207733,
"learning_rate": 2.2577306249191358e-05,
"loss": 1.098098087310791,
"step": 4480
},
{
"epoch": 0.5634774969802501,
"grad_norm": 0.22054506838321686,
"learning_rate": 2.2512614827273905e-05,
"loss": 1.1162951469421387,
"step": 4490
},
{
"epoch": 0.5647324579980234,
"grad_norm": 0.18666113913059235,
"learning_rate": 2.244792340535645e-05,
"loss": 0.9729397773742676,
"step": 4500
},
{
"epoch": 0.5659874190157969,
"grad_norm": 0.19067735970020294,
"learning_rate": 2.2383231983438995e-05,
"loss": 1.0903347969055175,
"step": 4510
},
{
"epoch": 0.5672423800335702,
"grad_norm": 0.18109118938446045,
"learning_rate": 2.2318540561521542e-05,
"loss": 1.063498592376709,
"step": 4520
},
{
"epoch": 0.5684973410513436,
"grad_norm": 0.18894648551940918,
"learning_rate": 2.225384913960409e-05,
"loss": 1.0704896926879883,
"step": 4530
},
{
"epoch": 0.569752302069117,
"grad_norm": 0.1963120698928833,
"learning_rate": 2.218915771768664e-05,
"loss": 1.0621774673461915,
"step": 4540
},
{
"epoch": 0.5710072630868903,
"grad_norm": 0.22396929562091827,
"learning_rate": 2.2124466295769182e-05,
"loss": 1.084723663330078,
"step": 4550
},
{
"epoch": 0.5722622241046638,
"grad_norm": 0.1798727661371231,
"learning_rate": 2.205977487385173e-05,
"loss": 1.05871639251709,
"step": 4560
},
{
"epoch": 0.5735171851224371,
"grad_norm": 0.1864812821149826,
"learning_rate": 2.1995083451934276e-05,
"loss": 1.0498836517333985,
"step": 4570
},
{
"epoch": 0.5747721461402105,
"grad_norm": 0.19118115305900574,
"learning_rate": 2.1930392030016822e-05,
"loss": 1.099202823638916,
"step": 4580
},
{
"epoch": 0.5760271071579839,
"grad_norm": 0.21500501036643982,
"learning_rate": 2.186570060809937e-05,
"loss": 1.1167634010314942,
"step": 4590
},
{
"epoch": 0.5772820681757573,
"grad_norm": 0.22220079600811005,
"learning_rate": 2.1801009186181916e-05,
"loss": 1.1403026580810547,
"step": 4600
},
{
"epoch": 0.5785370291935307,
"grad_norm": 0.1848171502351761,
"learning_rate": 2.173631776426446e-05,
"loss": 1.0590831756591796,
"step": 4610
},
{
"epoch": 0.579791990211304,
"grad_norm": 0.20435841381549835,
"learning_rate": 2.1671626342347006e-05,
"loss": 1.090967559814453,
"step": 4620
},
{
"epoch": 0.5810469512290775,
"grad_norm": 0.1693217009305954,
"learning_rate": 2.1606934920429553e-05,
"loss": 1.066522216796875,
"step": 4630
},
{
"epoch": 0.5823019122468508,
"grad_norm": 0.18368832767009735,
"learning_rate": 2.15422434985121e-05,
"loss": 1.0151689529418946,
"step": 4640
},
{
"epoch": 0.5835568732646242,
"grad_norm": 0.18117396533489227,
"learning_rate": 2.1477552076594646e-05,
"loss": 1.1592654228210448,
"step": 4650
},
{
"epoch": 0.5848118342823976,
"grad_norm": 0.17642000317573547,
"learning_rate": 2.1412860654677193e-05,
"loss": 1.0778505325317382,
"step": 4660
},
{
"epoch": 0.586066795300171,
"grad_norm": 0.1876673847436905,
"learning_rate": 2.1348169232759736e-05,
"loss": 1.077680492401123,
"step": 4670
},
{
"epoch": 0.5873217563179444,
"grad_norm": 0.20584918558597565,
"learning_rate": 2.1283477810842283e-05,
"loss": 1.0664028167724608,
"step": 4680
},
{
"epoch": 0.5885767173357178,
"grad_norm": 0.18888245522975922,
"learning_rate": 2.121878638892483e-05,
"loss": 1.044777774810791,
"step": 4690
},
{
"epoch": 0.5898316783534912,
"grad_norm": 0.18159054219722748,
"learning_rate": 2.1154094967007376e-05,
"loss": 1.140822982788086,
"step": 4700
},
{
"epoch": 0.5910866393712645,
"grad_norm": 0.209550142288208,
"learning_rate": 2.1089403545089923e-05,
"loss": 1.1214600563049317,
"step": 4710
},
{
"epoch": 0.5923416003890379,
"grad_norm": 0.19829699397087097,
"learning_rate": 2.102471212317247e-05,
"loss": 1.0791502952575684,
"step": 4720
},
{
"epoch": 0.5935965614068113,
"grad_norm": 0.18692730367183685,
"learning_rate": 2.0960020701255013e-05,
"loss": 1.083406352996826,
"step": 4730
},
{
"epoch": 0.5948515224245847,
"grad_norm": 0.203223317861557,
"learning_rate": 2.089532927933756e-05,
"loss": 1.1177435874938966,
"step": 4740
},
{
"epoch": 0.5961064834423581,
"grad_norm": 0.18921977281570435,
"learning_rate": 2.0830637857420107e-05,
"loss": 1.1003907203674317,
"step": 4750
},
{
"epoch": 0.5973614444601315,
"grad_norm": 0.21286743879318237,
"learning_rate": 2.0765946435502654e-05,
"loss": 1.07603759765625,
"step": 4760
},
{
"epoch": 0.5986164054779048,
"grad_norm": 0.20326635241508484,
"learning_rate": 2.07012550135852e-05,
"loss": 1.0025395393371581,
"step": 4770
},
{
"epoch": 0.5998713664956782,
"grad_norm": 0.19325220584869385,
"learning_rate": 2.0636563591667747e-05,
"loss": 1.119535255432129,
"step": 4780
},
{
"epoch": 0.6011263275134516,
"grad_norm": 0.19274058938026428,
"learning_rate": 2.057187216975029e-05,
"loss": 1.073643970489502,
"step": 4790
},
{
"epoch": 0.602381288531225,
"grad_norm": 0.1856803596019745,
"learning_rate": 2.0507180747832837e-05,
"loss": 1.0840465545654296,
"step": 4800
},
{
"epoch": 0.6036362495489984,
"grad_norm": 0.17938411235809326,
"learning_rate": 2.0442489325915384e-05,
"loss": 0.9779937744140625,
"step": 4810
},
{
"epoch": 0.6048912105667718,
"grad_norm": 0.22091540694236755,
"learning_rate": 2.037779790399793e-05,
"loss": 1.023126220703125,
"step": 4820
},
{
"epoch": 0.6061461715845452,
"grad_norm": 0.16854438185691833,
"learning_rate": 2.0313106482080477e-05,
"loss": 1.0393505096435547,
"step": 4830
},
{
"epoch": 0.6074011326023185,
"grad_norm": 0.18858060240745544,
"learning_rate": 2.0248415060163024e-05,
"loss": 1.0123619079589843,
"step": 4840
},
{
"epoch": 0.6086560936200919,
"grad_norm": 0.1751975268125534,
"learning_rate": 2.0183723638245568e-05,
"loss": 1.1000499725341797,
"step": 4850
},
{
"epoch": 0.6099110546378653,
"grad_norm": 0.21609099209308624,
"learning_rate": 2.0119032216328114e-05,
"loss": 1.0882283210754395,
"step": 4860
},
{
"epoch": 0.6111660156556387,
"grad_norm": 0.18892847001552582,
"learning_rate": 2.005434079441066e-05,
"loss": 1.0862364768981934,
"step": 4870
},
{
"epoch": 0.6124209766734121,
"grad_norm": 0.16767629981040955,
"learning_rate": 1.9989649372493208e-05,
"loss": 1.104989719390869,
"step": 4880
},
{
"epoch": 0.6136759376911854,
"grad_norm": 0.16860723495483398,
"learning_rate": 1.9924957950575755e-05,
"loss": 1.1031203269958496,
"step": 4890
},
{
"epoch": 0.6149308987089589,
"grad_norm": 0.22763581573963165,
"learning_rate": 1.98602665286583e-05,
"loss": 1.0784507751464845,
"step": 4900
},
{
"epoch": 0.6161858597267322,
"grad_norm": 0.1832362413406372,
"learning_rate": 1.9795575106740845e-05,
"loss": 1.139822292327881,
"step": 4910
},
{
"epoch": 0.6174408207445056,
"grad_norm": 0.18726925551891327,
"learning_rate": 1.973088368482339e-05,
"loss": 1.0816995620727539,
"step": 4920
},
{
"epoch": 0.618695781762279,
"grad_norm": 0.21192297339439392,
"learning_rate": 1.9666192262905938e-05,
"loss": 1.0300673484802245,
"step": 4930
},
{
"epoch": 0.6199507427800524,
"grad_norm": 0.18673470616340637,
"learning_rate": 1.9601500840988485e-05,
"loss": 1.0630935668945312,
"step": 4940
},
{
"epoch": 0.6212057037978258,
"grad_norm": 0.19416026771068573,
"learning_rate": 1.953680941907103e-05,
"loss": 1.0864192962646484,
"step": 4950
},
{
"epoch": 0.6224606648155991,
"grad_norm": 0.17337632179260254,
"learning_rate": 1.947211799715358e-05,
"loss": 1.0580739974975586,
"step": 4960
},
{
"epoch": 0.6237156258333726,
"grad_norm": 0.1994207203388214,
"learning_rate": 1.9407426575236125e-05,
"loss": 1.0788381576538086,
"step": 4970
},
{
"epoch": 0.6249705868511459,
"grad_norm": 0.1950082629919052,
"learning_rate": 1.934273515331867e-05,
"loss": 1.0657068252563477,
"step": 4980
},
{
"epoch": 0.6262255478689194,
"grad_norm": 0.19410517811775208,
"learning_rate": 1.9278043731401215e-05,
"loss": 0.9977058410644531,
"step": 4990
},
{
"epoch": 0.6274805088866927,
"grad_norm": 0.18865294754505157,
"learning_rate": 1.9213352309483765e-05,
"loss": 1.0384069442749024,
"step": 5000
},
{
"epoch": 0.628735469904466,
"grad_norm": 0.20763295888900757,
"learning_rate": 1.9148660887566312e-05,
"loss": 1.036677646636963,
"step": 5010
},
{
"epoch": 0.6299904309222395,
"grad_norm": 0.20186074078083038,
"learning_rate": 1.9083969465648855e-05,
"loss": 1.0434916496276856,
"step": 5020
},
{
"epoch": 0.6312453919400128,
"grad_norm": 0.20196175575256348,
"learning_rate": 1.9019278043731402e-05,
"loss": 1.0678818702697754,
"step": 5030
},
{
"epoch": 0.6325003529577863,
"grad_norm": 0.18434342741966248,
"learning_rate": 1.895458662181395e-05,
"loss": 0.9929670333862305,
"step": 5040
},
{
"epoch": 0.6337553139755596,
"grad_norm": 0.17742949724197388,
"learning_rate": 1.8889895199896496e-05,
"loss": 1.0131173133850098,
"step": 5050
},
{
"epoch": 0.6350102749933331,
"grad_norm": 0.1894538849592209,
"learning_rate": 1.8825203777979042e-05,
"loss": 0.9884425163269043,
"step": 5060
},
{
"epoch": 0.6362652360111064,
"grad_norm": 0.19760024547576904,
"learning_rate": 1.876051235606159e-05,
"loss": 1.1242119789123535,
"step": 5070
},
{
"epoch": 0.6375201970288797,
"grad_norm": 0.21608242392539978,
"learning_rate": 1.8695820934144136e-05,
"loss": 1.0201186180114745,
"step": 5080
},
{
"epoch": 0.6387751580466532,
"grad_norm": 0.19353747367858887,
"learning_rate": 1.863112951222668e-05,
"loss": 1.0742148399353026,
"step": 5090
},
{
"epoch": 0.6400301190644265,
"grad_norm": 0.1773729920387268,
"learning_rate": 1.8566438090309226e-05,
"loss": 1.0668268203735352,
"step": 5100
},
{
"epoch": 0.6412850800822,
"grad_norm": 0.16842322051525116,
"learning_rate": 1.8501746668391773e-05,
"loss": 1.111803913116455,
"step": 5110
},
{
"epoch": 0.6425400410999733,
"grad_norm": 0.17250721156597137,
"learning_rate": 1.843705524647432e-05,
"loss": 1.0860756874084472,
"step": 5120
},
{
"epoch": 0.6437950021177468,
"grad_norm": 0.20766721665859222,
"learning_rate": 1.8372363824556866e-05,
"loss": 1.0899797439575196,
"step": 5130
},
{
"epoch": 0.6450499631355201,
"grad_norm": 0.16800430417060852,
"learning_rate": 1.8307672402639413e-05,
"loss": 1.0710078239440919,
"step": 5140
},
{
"epoch": 0.6463049241532934,
"grad_norm": 0.19757574796676636,
"learning_rate": 1.8242980980721956e-05,
"loss": 1.1292811393737794,
"step": 5150
},
{
"epoch": 0.6475598851710669,
"grad_norm": 0.20521418750286102,
"learning_rate": 1.8178289558804503e-05,
"loss": 1.0243899345397949,
"step": 5160
},
{
"epoch": 0.6488148461888402,
"grad_norm": 0.16537714004516602,
"learning_rate": 1.811359813688705e-05,
"loss": 1.057924747467041,
"step": 5170
},
{
"epoch": 0.6500698072066137,
"grad_norm": 0.18862581253051758,
"learning_rate": 1.8048906714969597e-05,
"loss": 0.996666145324707,
"step": 5180
},
{
"epoch": 0.651324768224387,
"grad_norm": 0.17373378574848175,
"learning_rate": 1.7984215293052143e-05,
"loss": 1.0661702156066895,
"step": 5190
},
{
"epoch": 0.6525797292421605,
"grad_norm": 0.20793020725250244,
"learning_rate": 1.791952387113469e-05,
"loss": 1.081492805480957,
"step": 5200
},
{
"epoch": 0.6538346902599338,
"grad_norm": 0.1953442394733429,
"learning_rate": 1.7854832449217233e-05,
"loss": 1.0730542182922362,
"step": 5210
},
{
"epoch": 0.6550896512777072,
"grad_norm": 0.22261761128902435,
"learning_rate": 1.779014102729978e-05,
"loss": 1.0947938919067384,
"step": 5220
},
{
"epoch": 0.6563446122954806,
"grad_norm": 0.18047408759593964,
"learning_rate": 1.7725449605382327e-05,
"loss": 1.1488112449645995,
"step": 5230
},
{
"epoch": 0.6575995733132539,
"grad_norm": 0.21739941835403442,
"learning_rate": 1.7660758183464874e-05,
"loss": 1.0836384773254395,
"step": 5240
},
{
"epoch": 0.6588545343310274,
"grad_norm": 0.16749468445777893,
"learning_rate": 1.759606676154742e-05,
"loss": 1.0533074378967284,
"step": 5250
},
{
"epoch": 0.6601094953488007,
"grad_norm": 0.19696615636348724,
"learning_rate": 1.7531375339629967e-05,
"loss": 1.0351654052734376,
"step": 5260
},
{
"epoch": 0.6613644563665741,
"grad_norm": 0.206893190741539,
"learning_rate": 1.746668391771251e-05,
"loss": 1.03566312789917,
"step": 5270
},
{
"epoch": 0.6626194173843475,
"grad_norm": 0.21252737939357758,
"learning_rate": 1.7401992495795057e-05,
"loss": 1.098302173614502,
"step": 5280
},
{
"epoch": 0.6638743784021209,
"grad_norm": 0.18752658367156982,
"learning_rate": 1.7337301073877604e-05,
"loss": 1.1029739379882812,
"step": 5290
},
{
"epoch": 0.6651293394198943,
"grad_norm": 0.1916881799697876,
"learning_rate": 1.727260965196015e-05,
"loss": 1.0457392692565919,
"step": 5300
},
{
"epoch": 0.6663843004376676,
"grad_norm": 0.16342513263225555,
"learning_rate": 1.7207918230042698e-05,
"loss": 1.0347809791564941,
"step": 5310
},
{
"epoch": 0.6676392614554411,
"grad_norm": 0.1860560029745102,
"learning_rate": 1.7143226808125244e-05,
"loss": 1.0950869560241698,
"step": 5320
},
{
"epoch": 0.6688942224732144,
"grad_norm": 0.19112198054790497,
"learning_rate": 1.7078535386207788e-05,
"loss": 1.1099298477172852,
"step": 5330
},
{
"epoch": 0.6701491834909878,
"grad_norm": 0.18603506684303284,
"learning_rate": 1.7013843964290334e-05,
"loss": 1.0756458282470702,
"step": 5340
},
{
"epoch": 0.6714041445087612,
"grad_norm": 0.1958581954240799,
"learning_rate": 1.694915254237288e-05,
"loss": 1.1287766456604005,
"step": 5350
},
{
"epoch": 0.6726591055265346,
"grad_norm": 0.1939549297094345,
"learning_rate": 1.6884461120455428e-05,
"loss": 1.1160104751586915,
"step": 5360
},
{
"epoch": 0.673914066544308,
"grad_norm": 0.17285564541816711,
"learning_rate": 1.6819769698537975e-05,
"loss": 1.0963159561157227,
"step": 5370
},
{
"epoch": 0.6751690275620813,
"grad_norm": 0.1727699637413025,
"learning_rate": 1.675507827662052e-05,
"loss": 1.139615821838379,
"step": 5380
},
{
"epoch": 0.6764239885798548,
"grad_norm": 0.19430099427700043,
"learning_rate": 1.6690386854703065e-05,
"loss": 1.0105334281921388,
"step": 5390
},
{
"epoch": 0.6776789495976281,
"grad_norm": 0.1681535243988037,
"learning_rate": 1.662569543278561e-05,
"loss": 1.0578609466552735,
"step": 5400
},
{
"epoch": 0.6789339106154015,
"grad_norm": 0.18775980174541473,
"learning_rate": 1.6561004010868158e-05,
"loss": 1.0771096229553223,
"step": 5410
},
{
"epoch": 0.6801888716331749,
"grad_norm": 0.24977093935012817,
"learning_rate": 1.6496312588950705e-05,
"loss": 1.0842226028442383,
"step": 5420
},
{
"epoch": 0.6814438326509483,
"grad_norm": 0.21266375482082367,
"learning_rate": 1.6431621167033252e-05,
"loss": 1.01865816116333,
"step": 5430
},
{
"epoch": 0.6826987936687217,
"grad_norm": 0.20400455594062805,
"learning_rate": 1.63669297451158e-05,
"loss": 1.0554264068603516,
"step": 5440
},
{
"epoch": 0.683953754686495,
"grad_norm": 0.19339953362941742,
"learning_rate": 1.6302238323198342e-05,
"loss": 1.0870820045471192,
"step": 5450
},
{
"epoch": 0.6852087157042684,
"grad_norm": 0.20825320482254028,
"learning_rate": 1.6237546901280892e-05,
"loss": 1.1114639282226562,
"step": 5460
},
{
"epoch": 0.6864636767220418,
"grad_norm": 0.184986412525177,
"learning_rate": 1.617285547936344e-05,
"loss": 1.1310782432556152,
"step": 5470
},
{
"epoch": 0.6877186377398152,
"grad_norm": 0.1939050555229187,
"learning_rate": 1.6108164057445985e-05,
"loss": 1.1249576568603517,
"step": 5480
},
{
"epoch": 0.6889735987575886,
"grad_norm": 0.18793314695358276,
"learning_rate": 1.6043472635528532e-05,
"loss": 1.0266096115112304,
"step": 5490
},
{
"epoch": 0.690228559775362,
"grad_norm": 0.18402604758739471,
"learning_rate": 1.5978781213611076e-05,
"loss": 1.020193386077881,
"step": 5500
},
{
"epoch": 0.6914835207931354,
"grad_norm": 0.1962927132844925,
"learning_rate": 1.5914089791693622e-05,
"loss": 1.12153902053833,
"step": 5510
},
{
"epoch": 0.6927384818109088,
"grad_norm": 0.21442580223083496,
"learning_rate": 1.584939836977617e-05,
"loss": 1.076151180267334,
"step": 5520
},
{
"epoch": 0.6939934428286821,
"grad_norm": 0.18974730372428894,
"learning_rate": 1.5784706947858716e-05,
"loss": 1.0637462615966797,
"step": 5530
},
{
"epoch": 0.6952484038464555,
"grad_norm": 0.19226610660552979,
"learning_rate": 1.5720015525941263e-05,
"loss": 1.0863216400146485,
"step": 5540
},
{
"epoch": 0.6965033648642289,
"grad_norm": 0.18693000078201294,
"learning_rate": 1.565532410402381e-05,
"loss": 1.050516128540039,
"step": 5550
},
{
"epoch": 0.6977583258820023,
"grad_norm": 0.2240767627954483,
"learning_rate": 1.5590632682106353e-05,
"loss": 1.1396910667419433,
"step": 5560
},
{
"epoch": 0.6990132868997757,
"grad_norm": 0.24488072097301483,
"learning_rate": 1.55259412601889e-05,
"loss": 1.029798984527588,
"step": 5570
},
{
"epoch": 0.700268247917549,
"grad_norm": 0.1701984703540802,
"learning_rate": 1.5461249838271446e-05,
"loss": 1.0401766777038575,
"step": 5580
},
{
"epoch": 0.7015232089353225,
"grad_norm": 0.17540504038333893,
"learning_rate": 1.5396558416353993e-05,
"loss": 1.0871668815612794,
"step": 5590
},
{
"epoch": 0.7027781699530958,
"grad_norm": 0.212332084774971,
"learning_rate": 1.533186699443654e-05,
"loss": 1.0655070304870606,
"step": 5600
},
{
"epoch": 0.7040331309708692,
"grad_norm": 0.2142452895641327,
"learning_rate": 1.5267175572519086e-05,
"loss": 1.0310181617736816,
"step": 5610
},
{
"epoch": 0.7052880919886426,
"grad_norm": 0.2183569073677063,
"learning_rate": 1.5202484150601631e-05,
"loss": 1.0585816383361817,
"step": 5620
},
{
"epoch": 0.706543053006416,
"grad_norm": 0.2261342704296112,
"learning_rate": 1.5137792728684178e-05,
"loss": 1.0550565719604492,
"step": 5630
},
{
"epoch": 0.7077980140241894,
"grad_norm": 0.20529380440711975,
"learning_rate": 1.5073101306766723e-05,
"loss": 1.0414663314819337,
"step": 5640
},
{
"epoch": 0.7090529750419627,
"grad_norm": 0.19481569528579712,
"learning_rate": 1.500840988484927e-05,
"loss": 1.0689180374145508,
"step": 5650
},
{
"epoch": 0.7103079360597362,
"grad_norm": 0.18099959194660187,
"learning_rate": 1.4943718462931817e-05,
"loss": 1.0106795310974122,
"step": 5660
},
{
"epoch": 0.7115628970775095,
"grad_norm": 0.1786813735961914,
"learning_rate": 1.4879027041014362e-05,
"loss": 1.071911334991455,
"step": 5670
},
{
"epoch": 0.7128178580952829,
"grad_norm": 0.17721523344516754,
"learning_rate": 1.4814335619096909e-05,
"loss": 1.0632891654968262,
"step": 5680
},
{
"epoch": 0.7140728191130563,
"grad_norm": 0.2053815722465515,
"learning_rate": 1.4749644197179455e-05,
"loss": 1.0814258575439453,
"step": 5690
},
{
"epoch": 0.7153277801308296,
"grad_norm": 0.20590366423130035,
"learning_rate": 1.4684952775262e-05,
"loss": 1.0101026535034179,
"step": 5700
},
{
"epoch": 0.7165827411486031,
"grad_norm": 0.18634529411792755,
"learning_rate": 1.4620261353344547e-05,
"loss": 1.0727294921875,
"step": 5710
},
{
"epoch": 0.7178377021663764,
"grad_norm": 0.21531324088573456,
"learning_rate": 1.4555569931427094e-05,
"loss": 1.0686802864074707,
"step": 5720
},
{
"epoch": 0.7190926631841499,
"grad_norm": 0.18119627237319946,
"learning_rate": 1.4490878509509639e-05,
"loss": 1.001711654663086,
"step": 5730
},
{
"epoch": 0.7203476242019232,
"grad_norm": 0.17979778349399567,
"learning_rate": 1.4426187087592186e-05,
"loss": 1.0803590774536134,
"step": 5740
},
{
"epoch": 0.7216025852196967,
"grad_norm": 0.20080508291721344,
"learning_rate": 1.4361495665674732e-05,
"loss": 1.10612154006958,
"step": 5750
},
{
"epoch": 0.72285754623747,
"grad_norm": 0.2107263058423996,
"learning_rate": 1.4296804243757277e-05,
"loss": 1.1027125358581542,
"step": 5760
},
{
"epoch": 0.7241125072552433,
"grad_norm": 0.23398469388484955,
"learning_rate": 1.4232112821839824e-05,
"loss": 1.1061213493347168,
"step": 5770
},
{
"epoch": 0.7253674682730168,
"grad_norm": 0.17343750596046448,
"learning_rate": 1.4167421399922371e-05,
"loss": 1.0411705017089843,
"step": 5780
},
{
"epoch": 0.7266224292907901,
"grad_norm": 0.2107427567243576,
"learning_rate": 1.4102729978004916e-05,
"loss": 1.0695199966430664,
"step": 5790
},
{
"epoch": 0.7278773903085636,
"grad_norm": 0.2061903476715088,
"learning_rate": 1.4038038556087463e-05,
"loss": 1.0993520736694335,
"step": 5800
},
{
"epoch": 0.7291323513263369,
"grad_norm": 0.2150152623653412,
"learning_rate": 1.397334713417001e-05,
"loss": 1.0545531272888184,
"step": 5810
},
{
"epoch": 0.7303873123441104,
"grad_norm": 0.17038771510124207,
"learning_rate": 1.3908655712252555e-05,
"loss": 1.0232912063598634,
"step": 5820
},
{
"epoch": 0.7316422733618837,
"grad_norm": 0.19553017616271973,
"learning_rate": 1.3843964290335101e-05,
"loss": 1.051710033416748,
"step": 5830
},
{
"epoch": 0.732897234379657,
"grad_norm": 0.21933381259441376,
"learning_rate": 1.3779272868417648e-05,
"loss": 1.0953669548034668,
"step": 5840
},
{
"epoch": 0.7341521953974305,
"grad_norm": 0.1644992232322693,
"learning_rate": 1.3714581446500193e-05,
"loss": 1.0111507415771483,
"step": 5850
},
{
"epoch": 0.7354071564152038,
"grad_norm": 0.1784752458333969,
"learning_rate": 1.364989002458274e-05,
"loss": 1.042880153656006,
"step": 5860
},
{
"epoch": 0.7366621174329773,
"grad_norm": 0.20959775149822235,
"learning_rate": 1.3585198602665287e-05,
"loss": 1.022576904296875,
"step": 5870
},
{
"epoch": 0.7379170784507506,
"grad_norm": 0.2106805443763733,
"learning_rate": 1.3520507180747832e-05,
"loss": 1.046870803833008,
"step": 5880
},
{
"epoch": 0.7391720394685241,
"grad_norm": 0.21699438989162445,
"learning_rate": 1.3455815758830378e-05,
"loss": 1.1192692756652831,
"step": 5890
},
{
"epoch": 0.7404270004862974,
"grad_norm": 0.19661927223205566,
"learning_rate": 1.3391124336912925e-05,
"loss": 1.0794744491577148,
"step": 5900
},
{
"epoch": 0.7416819615040707,
"grad_norm": 0.19780300557613373,
"learning_rate": 1.332643291499547e-05,
"loss": 1.0534735679626466,
"step": 5910
},
{
"epoch": 0.7429369225218442,
"grad_norm": 0.17464447021484375,
"learning_rate": 1.326174149307802e-05,
"loss": 1.0962336540222168,
"step": 5920
},
{
"epoch": 0.7441918835396175,
"grad_norm": 0.2023210972547531,
"learning_rate": 1.3197050071160565e-05,
"loss": 1.1575682640075684,
"step": 5930
},
{
"epoch": 0.745446844557391,
"grad_norm": 0.21466964483261108,
"learning_rate": 1.3132358649243112e-05,
"loss": 1.123062515258789,
"step": 5940
},
{
"epoch": 0.7467018055751643,
"grad_norm": 0.22124673426151276,
"learning_rate": 1.3067667227325659e-05,
"loss": 1.0591267585754394,
"step": 5950
},
{
"epoch": 0.7479567665929377,
"grad_norm": 0.2065040022134781,
"learning_rate": 1.3002975805408204e-05,
"loss": 1.0695542335510253,
"step": 5960
},
{
"epoch": 0.7492117276107111,
"grad_norm": 0.1949198693037033,
"learning_rate": 1.293828438349075e-05,
"loss": 1.1152711868286134,
"step": 5970
},
{
"epoch": 0.7504666886284844,
"grad_norm": 0.20681394636631012,
"learning_rate": 1.2873592961573297e-05,
"loss": 1.0414956092834473,
"step": 5980
},
{
"epoch": 0.7517216496462579,
"grad_norm": 0.2022944539785385,
"learning_rate": 1.2808901539655842e-05,
"loss": 1.0200919151306151,
"step": 5990
},
{
"epoch": 0.7529766106640312,
"grad_norm": 0.24508307874202728,
"learning_rate": 1.274421011773839e-05,
"loss": 1.0655020713806151,
"step": 6000
},
{
"epoch": 0.7542315716818047,
"grad_norm": 0.1903439313173294,
"learning_rate": 1.2679518695820936e-05,
"loss": 1.0706681251525878,
"step": 6010
},
{
"epoch": 0.755486532699578,
"grad_norm": 0.19773785769939423,
"learning_rate": 1.2614827273903481e-05,
"loss": 1.086234188079834,
"step": 6020
},
{
"epoch": 0.7567414937173514,
"grad_norm": 0.21088463068008423,
"learning_rate": 1.2550135851986028e-05,
"loss": 1.069676399230957,
"step": 6030
},
{
"epoch": 0.7579964547351248,
"grad_norm": 0.20454522967338562,
"learning_rate": 1.2485444430068574e-05,
"loss": 1.0646330833435058,
"step": 6040
},
{
"epoch": 0.7592514157528982,
"grad_norm": 0.19824111461639404,
"learning_rate": 1.242075300815112e-05,
"loss": 1.0749752044677734,
"step": 6050
},
{
"epoch": 0.7605063767706716,
"grad_norm": 0.19081714749336243,
"learning_rate": 1.2356061586233666e-05,
"loss": 1.0570926666259766,
"step": 6060
},
{
"epoch": 0.7617613377884449,
"grad_norm": 0.20483826100826263,
"learning_rate": 1.2291370164316213e-05,
"loss": 1.0441864013671875,
"step": 6070
},
{
"epoch": 0.7630162988062184,
"grad_norm": 0.2218000292778015,
"learning_rate": 1.2226678742398758e-05,
"loss": 1.0976847648620605,
"step": 6080
},
{
"epoch": 0.7642712598239917,
"grad_norm": 0.19163018465042114,
"learning_rate": 1.2161987320481305e-05,
"loss": 1.0738938331604004,
"step": 6090
},
{
"epoch": 0.7655262208417651,
"grad_norm": 0.22696638107299805,
"learning_rate": 1.2097295898563852e-05,
"loss": 1.09117431640625,
"step": 6100
},
{
"epoch": 0.7667811818595385,
"grad_norm": 0.21094925701618195,
"learning_rate": 1.2032604476646397e-05,
"loss": 1.1293853759765624,
"step": 6110
},
{
"epoch": 0.7680361428773119,
"grad_norm": 0.19011946022510529,
"learning_rate": 1.1967913054728943e-05,
"loss": 1.0315247535705567,
"step": 6120
},
{
"epoch": 0.7692911038950853,
"grad_norm": 0.22294586896896362,
"learning_rate": 1.190322163281149e-05,
"loss": 1.0366369247436524,
"step": 6130
},
{
"epoch": 0.7705460649128586,
"grad_norm": 0.19210191071033478,
"learning_rate": 1.1838530210894037e-05,
"loss": 1.041628646850586,
"step": 6140
},
{
"epoch": 0.771801025930632,
"grad_norm": 0.17535246908664703,
"learning_rate": 1.1773838788976582e-05,
"loss": 1.0377203941345214,
"step": 6150
},
{
"epoch": 0.7730559869484054,
"grad_norm": 0.1910182386636734,
"learning_rate": 1.1709147367059129e-05,
"loss": 1.0049347877502441,
"step": 6160
},
{
"epoch": 0.7743109479661788,
"grad_norm": 0.18827295303344727,
"learning_rate": 1.1644455945141675e-05,
"loss": 1.1181403160095216,
"step": 6170
},
{
"epoch": 0.7755659089839522,
"grad_norm": 0.19850969314575195,
"learning_rate": 1.157976452322422e-05,
"loss": 1.1215065002441407,
"step": 6180
},
{
"epoch": 0.7768208700017256,
"grad_norm": 0.21341505646705627,
"learning_rate": 1.1515073101306767e-05,
"loss": 1.0259138107299806,
"step": 6190
},
{
"epoch": 0.778075831019499,
"grad_norm": 0.1731249988079071,
"learning_rate": 1.1450381679389314e-05,
"loss": 1.0396163940429688,
"step": 6200
},
{
"epoch": 0.7793307920372723,
"grad_norm": 0.20062355697155,
"learning_rate": 1.1385690257471859e-05,
"loss": 1.0933070182800293,
"step": 6210
},
{
"epoch": 0.7805857530550457,
"grad_norm": 0.214784637093544,
"learning_rate": 1.1320998835554406e-05,
"loss": 1.038003921508789,
"step": 6220
},
{
"epoch": 0.7818407140728191,
"grad_norm": 0.17985184490680695,
"learning_rate": 1.1256307413636952e-05,
"loss": 1.0742274284362794,
"step": 6230
},
{
"epoch": 0.7830956750905925,
"grad_norm": 0.19000248610973358,
"learning_rate": 1.1191615991719498e-05,
"loss": 1.051091480255127,
"step": 6240
},
{
"epoch": 0.7843506361083659,
"grad_norm": 0.21635884046554565,
"learning_rate": 1.1126924569802044e-05,
"loss": 1.0835393905639648,
"step": 6250
},
{
"epoch": 0.7856055971261393,
"grad_norm": 0.22533932328224182,
"learning_rate": 1.1062233147884591e-05,
"loss": 1.046684741973877,
"step": 6260
},
{
"epoch": 0.7868605581439126,
"grad_norm": 0.1827181726694107,
"learning_rate": 1.0997541725967138e-05,
"loss": 1.0681885719299316,
"step": 6270
},
{
"epoch": 0.7881155191616861,
"grad_norm": 0.22982963919639587,
"learning_rate": 1.0932850304049685e-05,
"loss": 1.0472677230834961,
"step": 6280
},
{
"epoch": 0.7893704801794594,
"grad_norm": 0.19450107216835022,
"learning_rate": 1.086815888213223e-05,
"loss": 1.0826946258544923,
"step": 6290
},
{
"epoch": 0.7906254411972328,
"grad_norm": 0.19893890619277954,
"learning_rate": 1.0803467460214776e-05,
"loss": 1.0179737091064454,
"step": 6300
},
{
"epoch": 0.7918804022150062,
"grad_norm": 0.17373962700366974,
"learning_rate": 1.0738776038297323e-05,
"loss": 1.0917057037353515,
"step": 6310
},
{
"epoch": 0.7931353632327796,
"grad_norm": 0.2234533429145813,
"learning_rate": 1.0674084616379868e-05,
"loss": 1.0998245239257813,
"step": 6320
},
{
"epoch": 0.794390324250553,
"grad_norm": 0.2080935388803482,
"learning_rate": 1.0609393194462415e-05,
"loss": 1.021653938293457,
"step": 6330
},
{
"epoch": 0.7956452852683263,
"grad_norm": 0.2437697947025299,
"learning_rate": 1.0544701772544962e-05,
"loss": 1.083084487915039,
"step": 6340
},
{
"epoch": 0.7969002462860998,
"grad_norm": 0.20368912816047668,
"learning_rate": 1.0480010350627507e-05,
"loss": 1.0707550048828125,
"step": 6350
},
{
"epoch": 0.7981552073038731,
"grad_norm": 0.16665543615818024,
"learning_rate": 1.0415318928710053e-05,
"loss": 1.136690044403076,
"step": 6360
},
{
"epoch": 0.7994101683216465,
"grad_norm": 0.17936968803405762,
"learning_rate": 1.03506275067926e-05,
"loss": 1.0300110816955566,
"step": 6370
},
{
"epoch": 0.8006651293394199,
"grad_norm": 0.17155979573726654,
"learning_rate": 1.0285936084875145e-05,
"loss": 1.2345178604125977,
"step": 6380
},
{
"epoch": 0.8019200903571932,
"grad_norm": 0.19316871464252472,
"learning_rate": 1.0221244662957692e-05,
"loss": 1.1063278198242188,
"step": 6390
},
{
"epoch": 0.8031750513749667,
"grad_norm": 0.19231335818767548,
"learning_rate": 1.0156553241040239e-05,
"loss": 1.1099343299865723,
"step": 6400
},
{
"epoch": 0.80443001239274,
"grad_norm": 0.20124751329421997,
"learning_rate": 1.0091861819122784e-05,
"loss": 1.0818553924560548,
"step": 6410
},
{
"epoch": 0.8056849734105135,
"grad_norm": 0.23050783574581146,
"learning_rate": 1.002717039720533e-05,
"loss": 1.0924180030822754,
"step": 6420
},
{
"epoch": 0.8069399344282868,
"grad_norm": 0.19461168348789215,
"learning_rate": 9.962478975287877e-06,
"loss": 1.0269258499145508,
"step": 6430
},
{
"epoch": 0.8081948954460602,
"grad_norm": 0.1916336864233017,
"learning_rate": 9.897787553370422e-06,
"loss": 1.1376501083374024,
"step": 6440
},
{
"epoch": 0.8094498564638336,
"grad_norm": 0.18255390226840973,
"learning_rate": 9.833096131452969e-06,
"loss": 1.0916749000549317,
"step": 6450
},
{
"epoch": 0.8107048174816069,
"grad_norm": 0.1791938841342926,
"learning_rate": 9.768404709535516e-06,
"loss": 1.0888887405395509,
"step": 6460
},
{
"epoch": 0.8119597784993804,
"grad_norm": 0.1699710339307785,
"learning_rate": 9.703713287618063e-06,
"loss": 1.0881536483764649,
"step": 6470
},
{
"epoch": 0.8132147395171537,
"grad_norm": 0.19622161984443665,
"learning_rate": 9.639021865700608e-06,
"loss": 1.0142560958862306,
"step": 6480
},
{
"epoch": 0.8144697005349272,
"grad_norm": 0.18209852278232574,
"learning_rate": 9.574330443783156e-06,
"loss": 1.0698868751525878,
"step": 6490
},
{
"epoch": 0.8157246615527005,
"grad_norm": 0.16895531117916107,
"learning_rate": 9.509639021865701e-06,
"loss": 1.1377266883850097,
"step": 6500
},
{
"epoch": 0.8169796225704739,
"grad_norm": 0.1833142638206482,
"learning_rate": 9.444947599948248e-06,
"loss": 1.0924424171447753,
"step": 6510
},
{
"epoch": 0.8182345835882473,
"grad_norm": 0.2109987437725067,
"learning_rate": 9.380256178030795e-06,
"loss": 1.0673909187316895,
"step": 6520
},
{
"epoch": 0.8194895446060206,
"grad_norm": 0.17565134167671204,
"learning_rate": 9.31556475611334e-06,
"loss": 1.0398826599121094,
"step": 6530
},
{
"epoch": 0.8207445056237941,
"grad_norm": 0.22704172134399414,
"learning_rate": 9.250873334195886e-06,
"loss": 1.1169721603393554,
"step": 6540
},
{
"epoch": 0.8219994666415674,
"grad_norm": 0.21614673733711243,
"learning_rate": 9.186181912278433e-06,
"loss": 1.040649127960205,
"step": 6550
},
{
"epoch": 0.8232544276593409,
"grad_norm": 0.21649399399757385,
"learning_rate": 9.121490490360978e-06,
"loss": 1.1043192863464355,
"step": 6560
},
{
"epoch": 0.8245093886771142,
"grad_norm": 0.1912921667098999,
"learning_rate": 9.056799068443525e-06,
"loss": 1.127028465270996,
"step": 6570
},
{
"epoch": 0.8257643496948877,
"grad_norm": 0.2091410607099533,
"learning_rate": 8.992107646526072e-06,
"loss": 1.0905369758605956,
"step": 6580
},
{
"epoch": 0.827019310712661,
"grad_norm": 0.22103574872016907,
"learning_rate": 8.927416224608617e-06,
"loss": 0.9964225769042969,
"step": 6590
},
{
"epoch": 0.8282742717304343,
"grad_norm": 0.16947044432163239,
"learning_rate": 8.862724802691163e-06,
"loss": 1.1176776885986328,
"step": 6600
},
{
"epoch": 0.8295292327482078,
"grad_norm": 0.19035208225250244,
"learning_rate": 8.79803338077371e-06,
"loss": 1.1607711791992188,
"step": 6610
},
{
"epoch": 0.8307841937659811,
"grad_norm": 0.22612328827381134,
"learning_rate": 8.733341958856255e-06,
"loss": 1.0306846618652343,
"step": 6620
},
{
"epoch": 0.8320391547837546,
"grad_norm": 0.19116735458374023,
"learning_rate": 8.668650536938802e-06,
"loss": 1.0862977981567383,
"step": 6630
},
{
"epoch": 0.8332941158015279,
"grad_norm": 0.19095416367053986,
"learning_rate": 8.603959115021349e-06,
"loss": 1.0201908111572267,
"step": 6640
},
{
"epoch": 0.8345490768193013,
"grad_norm": 0.18990927934646606,
"learning_rate": 8.539267693103894e-06,
"loss": 1.0904932975769044,
"step": 6650
},
{
"epoch": 0.8358040378370747,
"grad_norm": 0.21695184707641602,
"learning_rate": 8.47457627118644e-06,
"loss": 1.0262943267822267,
"step": 6660
},
{
"epoch": 0.837058998854848,
"grad_norm": 0.18091970682144165,
"learning_rate": 8.409884849268987e-06,
"loss": 1.0900959014892577,
"step": 6670
},
{
"epoch": 0.8383139598726215,
"grad_norm": 0.21363726258277893,
"learning_rate": 8.345193427351532e-06,
"loss": 1.0485601425170898,
"step": 6680
},
{
"epoch": 0.8395689208903948,
"grad_norm": 0.19334258139133453,
"learning_rate": 8.280502005434079e-06,
"loss": 1.0345725059509276,
"step": 6690
},
{
"epoch": 0.8408238819081683,
"grad_norm": 0.20086072385311127,
"learning_rate": 8.215810583516626e-06,
"loss": 1.1097275733947753,
"step": 6700
},
{
"epoch": 0.8420788429259416,
"grad_norm": 0.1856435388326645,
"learning_rate": 8.151119161599171e-06,
"loss": 1.0502695083618163,
"step": 6710
},
{
"epoch": 0.843333803943715,
"grad_norm": 0.20465126633644104,
"learning_rate": 8.08642773968172e-06,
"loss": 1.0540699005126952,
"step": 6720
},
{
"epoch": 0.8445887649614884,
"grad_norm": 0.18902461230754852,
"learning_rate": 8.021736317764266e-06,
"loss": 1.1354016304016112,
"step": 6730
},
{
"epoch": 0.8458437259792617,
"grad_norm": 0.20140230655670166,
"learning_rate": 7.957044895846811e-06,
"loss": 0.9932634353637695,
"step": 6740
},
{
"epoch": 0.8470986869970352,
"grad_norm": 0.19942064583301544,
"learning_rate": 7.892353473929358e-06,
"loss": 1.1147568702697754,
"step": 6750
},
{
"epoch": 0.8483536480148085,
"grad_norm": 0.21955204010009766,
"learning_rate": 7.827662052011905e-06,
"loss": 0.9939888954162598,
"step": 6760
},
{
"epoch": 0.849608609032582,
"grad_norm": 0.2062138170003891,
"learning_rate": 7.76297063009445e-06,
"loss": 1.0063923835754394,
"step": 6770
},
{
"epoch": 0.8508635700503553,
"grad_norm": 0.23989014327526093,
"learning_rate": 7.698279208176996e-06,
"loss": 1.0760384559631349,
"step": 6780
},
{
"epoch": 0.8521185310681287,
"grad_norm": 0.20089785754680634,
"learning_rate": 7.633587786259543e-06,
"loss": 1.028395652770996,
"step": 6790
},
{
"epoch": 0.8533734920859021,
"grad_norm": 0.19919630885124207,
"learning_rate": 7.568896364342089e-06,
"loss": 1.02840576171875,
"step": 6800
},
{
"epoch": 0.8546284531036754,
"grad_norm": 0.2014761120080948,
"learning_rate": 7.504204942424635e-06,
"loss": 1.0546991348266601,
"step": 6810
},
{
"epoch": 0.8558834141214489,
"grad_norm": 0.19204354286193848,
"learning_rate": 7.439513520507181e-06,
"loss": 1.03892183303833,
"step": 6820
},
{
"epoch": 0.8571383751392222,
"grad_norm": 0.22647128999233246,
"learning_rate": 7.374822098589728e-06,
"loss": 1.0341573715209962,
"step": 6830
},
{
"epoch": 0.8583933361569956,
"grad_norm": 0.2208699733018875,
"learning_rate": 7.3101306766722735e-06,
"loss": 1.0098363876342773,
"step": 6840
},
{
"epoch": 0.859648297174769,
"grad_norm": 0.219742089509964,
"learning_rate": 7.2454392547548194e-06,
"loss": 1.0027207374572753,
"step": 6850
},
{
"epoch": 0.8609032581925424,
"grad_norm": 0.20601145923137665,
"learning_rate": 7.180747832837366e-06,
"loss": 1.0074657440185546,
"step": 6860
},
{
"epoch": 0.8621582192103158,
"grad_norm": 0.18209318816661835,
"learning_rate": 7.116056410919912e-06,
"loss": 1.0155368804931642,
"step": 6870
},
{
"epoch": 0.8634131802280892,
"grad_norm": 0.23554867506027222,
"learning_rate": 7.051364989002458e-06,
"loss": 1.047171401977539,
"step": 6880
},
{
"epoch": 0.8646681412458626,
"grad_norm": 0.17976488173007965,
"learning_rate": 6.986673567085005e-06,
"loss": 1.1108320236206055,
"step": 6890
},
{
"epoch": 0.8659231022636359,
"grad_norm": 0.17746324837207794,
"learning_rate": 6.921982145167551e-06,
"loss": 1.033961868286133,
"step": 6900
},
{
"epoch": 0.8671780632814093,
"grad_norm": 0.20548702776432037,
"learning_rate": 6.8572907232500965e-06,
"loss": 0.9573192596435547,
"step": 6910
},
{
"epoch": 0.8684330242991827,
"grad_norm": 0.24221506714820862,
"learning_rate": 6.792599301332643e-06,
"loss": 1.0891441345214843,
"step": 6920
},
{
"epoch": 0.8696879853169561,
"grad_norm": 0.1774619221687317,
"learning_rate": 6.727907879415189e-06,
"loss": 1.0774598121643066,
"step": 6930
},
{
"epoch": 0.8709429463347295,
"grad_norm": 0.1890013962984085,
"learning_rate": 6.663216457497735e-06,
"loss": 1.082331371307373,
"step": 6940
},
{
"epoch": 0.8721979073525029,
"grad_norm": 0.19728416204452515,
"learning_rate": 6.598525035580283e-06,
"loss": 1.0425737380981446,
"step": 6950
},
{
"epoch": 0.8734528683702762,
"grad_norm": 0.1938554048538208,
"learning_rate": 6.533833613662829e-06,
"loss": 0.9840812683105469,
"step": 6960
},
{
"epoch": 0.8747078293880496,
"grad_norm": 0.20868176221847534,
"learning_rate": 6.469142191745375e-06,
"loss": 1.107065486907959,
"step": 6970
},
{
"epoch": 0.875962790405823,
"grad_norm": 0.20235563814640045,
"learning_rate": 6.404450769827921e-06,
"loss": 1.035740566253662,
"step": 6980
},
{
"epoch": 0.8772177514235964,
"grad_norm": 0.19022290408611298,
"learning_rate": 6.339759347910468e-06,
"loss": 1.0365036010742188,
"step": 6990
},
{
"epoch": 0.8784727124413698,
"grad_norm": 0.19704179465770721,
"learning_rate": 6.275067925993014e-06,
"loss": 1.0721055030822755,
"step": 7000
},
{
"epoch": 0.8797276734591432,
"grad_norm": 0.21241937577724457,
"learning_rate": 6.21037650407556e-06,
"loss": 1.0706797599792481,
"step": 7010
},
{
"epoch": 0.8809826344769166,
"grad_norm": 0.2100059986114502,
"learning_rate": 6.1456850821581065e-06,
"loss": 1.0387989044189454,
"step": 7020
},
{
"epoch": 0.8822375954946899,
"grad_norm": 0.22000259160995483,
"learning_rate": 6.080993660240652e-06,
"loss": 1.0321297645568848,
"step": 7030
},
{
"epoch": 0.8834925565124633,
"grad_norm": 0.20955264568328857,
"learning_rate": 6.016302238323198e-06,
"loss": 1.0901686668395996,
"step": 7040
},
{
"epoch": 0.8847475175302367,
"grad_norm": 0.20897357165813446,
"learning_rate": 5.951610816405745e-06,
"loss": 1.1447209358215331,
"step": 7050
},
{
"epoch": 0.8860024785480101,
"grad_norm": 0.20097708702087402,
"learning_rate": 5.886919394488291e-06,
"loss": 0.9972308158874512,
"step": 7060
},
{
"epoch": 0.8872574395657835,
"grad_norm": 0.18822167813777924,
"learning_rate": 5.822227972570838e-06,
"loss": 1.1192616462707519,
"step": 7070
},
{
"epoch": 0.8885124005835568,
"grad_norm": 0.21870264410972595,
"learning_rate": 5.757536550653384e-06,
"loss": 1.0673290252685548,
"step": 7080
},
{
"epoch": 0.8897673616013303,
"grad_norm": 0.18924005329608917,
"learning_rate": 5.6928451287359295e-06,
"loss": 1.0575652122497559,
"step": 7090
},
{
"epoch": 0.8910223226191036,
"grad_norm": 0.1820686012506485,
"learning_rate": 5.628153706818476e-06,
"loss": 1.0578106880187987,
"step": 7100
},
{
"epoch": 0.8922772836368771,
"grad_norm": 0.21963050961494446,
"learning_rate": 5.563462284901022e-06,
"loss": 1.036845874786377,
"step": 7110
},
{
"epoch": 0.8935322446546504,
"grad_norm": 0.2212342619895935,
"learning_rate": 5.498770862983569e-06,
"loss": 1.0462807655334472,
"step": 7120
},
{
"epoch": 0.8947872056724238,
"grad_norm": 0.19352254271507263,
"learning_rate": 5.434079441066115e-06,
"loss": 1.0695903778076172,
"step": 7130
},
{
"epoch": 0.8960421666901972,
"grad_norm": 0.21432216465473175,
"learning_rate": 5.3693880191486615e-06,
"loss": 1.0361966133117675,
"step": 7140
},
{
"epoch": 0.8972971277079705,
"grad_norm": 0.193306565284729,
"learning_rate": 5.3046965972312074e-06,
"loss": 1.119828987121582,
"step": 7150
},
{
"epoch": 0.898552088725744,
"grad_norm": 0.18354584276676178,
"learning_rate": 5.240005175313753e-06,
"loss": 1.0150964736938477,
"step": 7160
},
{
"epoch": 0.8998070497435173,
"grad_norm": 0.2255086451768875,
"learning_rate": 5.1753137533963e-06,
"loss": 1.0812021255493165,
"step": 7170
},
{
"epoch": 0.9010620107612908,
"grad_norm": 0.1983460932970047,
"learning_rate": 5.110622331478846e-06,
"loss": 1.0526619911193849,
"step": 7180
},
{
"epoch": 0.9023169717790641,
"grad_norm": 0.2795880138874054,
"learning_rate": 5.045930909561392e-06,
"loss": 0.9882980346679687,
"step": 7190
},
{
"epoch": 0.9035719327968375,
"grad_norm": 0.2017148733139038,
"learning_rate": 4.981239487643939e-06,
"loss": 1.079946517944336,
"step": 7200
},
{
"epoch": 0.9048268938146109,
"grad_norm": 0.19753098487854004,
"learning_rate": 4.9165480657264845e-06,
"loss": 1.0410076141357423,
"step": 7210
},
{
"epoch": 0.9060818548323842,
"grad_norm": 0.21375982463359833,
"learning_rate": 4.851856643809031e-06,
"loss": 1.0886569023132324,
"step": 7220
},
{
"epoch": 0.9073368158501577,
"grad_norm": 0.18023167550563812,
"learning_rate": 4.787165221891578e-06,
"loss": 1.01919584274292,
"step": 7230
},
{
"epoch": 0.908591776867931,
"grad_norm": 0.21556456387043,
"learning_rate": 4.722473799974124e-06,
"loss": 1.1226733207702637,
"step": 7240
},
{
"epoch": 0.9098467378857045,
"grad_norm": 0.1888088434934616,
"learning_rate": 4.65778237805667e-06,
"loss": 1.0532227516174317,
"step": 7250
},
{
"epoch": 0.9111016989034778,
"grad_norm": 0.21888945996761322,
"learning_rate": 4.5930909561392166e-06,
"loss": 1.1242030143737793,
"step": 7260
},
{
"epoch": 0.9123566599212511,
"grad_norm": 0.20070919394493103,
"learning_rate": 4.5283995342217625e-06,
"loss": 1.0876989364624023,
"step": 7270
},
{
"epoch": 0.9136116209390246,
"grad_norm": 0.21463949978351593,
"learning_rate": 4.463708112304308e-06,
"loss": 1.0497437477111817,
"step": 7280
},
{
"epoch": 0.9148665819567979,
"grad_norm": 0.21296384930610657,
"learning_rate": 4.399016690386855e-06,
"loss": 1.0168041229248046,
"step": 7290
},
{
"epoch": 0.9161215429745714,
"grad_norm": 0.2151423692703247,
"learning_rate": 4.334325268469401e-06,
"loss": 1.0028361320495605,
"step": 7300
},
{
"epoch": 0.9173765039923447,
"grad_norm": 0.17901144921779633,
"learning_rate": 4.269633846551947e-06,
"loss": 1.0603734970092773,
"step": 7310
},
{
"epoch": 0.9186314650101182,
"grad_norm": 0.22024358808994293,
"learning_rate": 4.204942424634494e-06,
"loss": 1.133981418609619,
"step": 7320
},
{
"epoch": 0.9198864260278915,
"grad_norm": 0.18600453436374664,
"learning_rate": 4.1402510027170396e-06,
"loss": 1.043716812133789,
"step": 7330
},
{
"epoch": 0.9211413870456648,
"grad_norm": 0.1897258311510086,
"learning_rate": 4.0755595807995855e-06,
"loss": 1.0358457565307617,
"step": 7340
},
{
"epoch": 0.9223963480634383,
"grad_norm": 0.2092253863811493,
"learning_rate": 4.010868158882133e-06,
"loss": 1.0762650489807128,
"step": 7350
},
{
"epoch": 0.9236513090812116,
"grad_norm": 0.1960146427154541,
"learning_rate": 3.946176736964679e-06,
"loss": 1.0524661064147949,
"step": 7360
},
{
"epoch": 0.9249062700989851,
"grad_norm": 0.21514521539211273,
"learning_rate": 3.881485315047225e-06,
"loss": 1.0553058624267577,
"step": 7370
},
{
"epoch": 0.9261612311167584,
"grad_norm": 0.18721559643745422,
"learning_rate": 3.816793893129772e-06,
"loss": 1.120730495452881,
"step": 7380
},
{
"epoch": 0.9274161921345319,
"grad_norm": 0.20185963809490204,
"learning_rate": 3.7521024712123175e-06,
"loss": 1.0561814308166504,
"step": 7390
},
{
"epoch": 0.9286711531523052,
"grad_norm": 0.18291395902633667,
"learning_rate": 3.687411049294864e-06,
"loss": 1.065229320526123,
"step": 7400
},
{
"epoch": 0.9299261141700786,
"grad_norm": 0.20202836394309998,
"learning_rate": 3.6227196273774097e-06,
"loss": 1.092850112915039,
"step": 7410
},
{
"epoch": 0.931181075187852,
"grad_norm": 0.1978691816329956,
"learning_rate": 3.558028205459956e-06,
"loss": 1.068133544921875,
"step": 7420
},
{
"epoch": 0.9324360362056253,
"grad_norm": 0.20446452498435974,
"learning_rate": 3.4933367835425024e-06,
"loss": 1.037320613861084,
"step": 7430
},
{
"epoch": 0.9336909972233988,
"grad_norm": 0.17508593201637268,
"learning_rate": 3.4286453616250483e-06,
"loss": 1.141841220855713,
"step": 7440
},
{
"epoch": 0.9349459582411721,
"grad_norm": 0.1751583069562912,
"learning_rate": 3.3639539397075946e-06,
"loss": 1.0543017387390137,
"step": 7450
},
{
"epoch": 0.9362009192589456,
"grad_norm": 0.16998153924942017,
"learning_rate": 3.2992625177901413e-06,
"loss": 1.0321502685546875,
"step": 7460
},
{
"epoch": 0.9374558802767189,
"grad_norm": 0.17314793169498444,
"learning_rate": 3.2345710958726877e-06,
"loss": 1.0988115310668944,
"step": 7470
},
{
"epoch": 0.9387108412944923,
"grad_norm": 0.19269074499607086,
"learning_rate": 3.169879673955234e-06,
"loss": 1.070913314819336,
"step": 7480
},
{
"epoch": 0.9399658023122657,
"grad_norm": 0.23561343550682068,
"learning_rate": 3.10518825203778e-06,
"loss": 1.049428939819336,
"step": 7490
},
{
"epoch": 0.941220763330039,
"grad_norm": 0.2069864720106125,
"learning_rate": 3.040496830120326e-06,
"loss": 1.003286647796631,
"step": 7500
},
{
"epoch": 0.9424757243478125,
"grad_norm": 0.18430078029632568,
"learning_rate": 2.9758054082028725e-06,
"loss": 1.0501197814941405,
"step": 7510
},
{
"epoch": 0.9437306853655858,
"grad_norm": 0.20570716261863708,
"learning_rate": 2.911113986285419e-06,
"loss": 1.0311017990112306,
"step": 7520
},
{
"epoch": 0.9449856463833592,
"grad_norm": 0.23062951862812042,
"learning_rate": 2.8464225643679647e-06,
"loss": 1.0960111618041992,
"step": 7530
},
{
"epoch": 0.9462406074011326,
"grad_norm": 0.20773741602897644,
"learning_rate": 2.781731142450511e-06,
"loss": 1.0412386894226073,
"step": 7540
},
{
"epoch": 0.947495568418906,
"grad_norm": 0.24511989951133728,
"learning_rate": 2.7170397205330574e-06,
"loss": 1.1096014976501465,
"step": 7550
},
{
"epoch": 0.9487505294366794,
"grad_norm": 0.20896856486797333,
"learning_rate": 2.6523482986156037e-06,
"loss": 1.0784428596496582,
"step": 7560
},
{
"epoch": 0.9500054904544527,
"grad_norm": 0.19488734006881714,
"learning_rate": 2.58765687669815e-06,
"loss": 1.043881607055664,
"step": 7570
},
{
"epoch": 0.9512604514722262,
"grad_norm": 0.19251015782356262,
"learning_rate": 2.522965454780696e-06,
"loss": 1.0701610565185546,
"step": 7580
},
{
"epoch": 0.9525154124899995,
"grad_norm": 0.19297641515731812,
"learning_rate": 2.4582740328632423e-06,
"loss": 1.0647805213928223,
"step": 7590
},
{
"epoch": 0.9537703735077729,
"grad_norm": 0.20945972204208374,
"learning_rate": 2.393582610945789e-06,
"loss": 1.036386489868164,
"step": 7600
},
{
"epoch": 0.9550253345255463,
"grad_norm": 0.19576741755008698,
"learning_rate": 2.328891189028335e-06,
"loss": 1.0599888801574706,
"step": 7610
},
{
"epoch": 0.9562802955433197,
"grad_norm": 0.21709538996219635,
"learning_rate": 2.2641997671108812e-06,
"loss": 1.0713414192199706,
"step": 7620
},
{
"epoch": 0.9575352565610931,
"grad_norm": 0.2199336290359497,
"learning_rate": 2.1995083451934276e-06,
"loss": 1.071683406829834,
"step": 7630
},
{
"epoch": 0.9587902175788665,
"grad_norm": 0.24035602807998657,
"learning_rate": 2.1348169232759735e-06,
"loss": 1.074808692932129,
"step": 7640
},
{
"epoch": 0.9600451785966398,
"grad_norm": 0.22513709962368011,
"learning_rate": 2.0701255013585198e-06,
"loss": 1.080063247680664,
"step": 7650
},
{
"epoch": 0.9613001396144132,
"grad_norm": 0.20860207080841064,
"learning_rate": 2.0054340794410665e-06,
"loss": 1.0631601333618164,
"step": 7660
},
{
"epoch": 0.9625551006321866,
"grad_norm": 0.20075741410255432,
"learning_rate": 1.9407426575236124e-06,
"loss": 1.103189468383789,
"step": 7670
},
{
"epoch": 0.96381006164996,
"grad_norm": 0.21978308260440826,
"learning_rate": 1.8760512356061587e-06,
"loss": 1.0389853477478028,
"step": 7680
},
{
"epoch": 0.9650650226677334,
"grad_norm": 0.1870948076248169,
"learning_rate": 1.8113598136887049e-06,
"loss": 1.0636377334594727,
"step": 7690
},
{
"epoch": 0.9663199836855068,
"grad_norm": 0.18612495064735413,
"learning_rate": 1.7466683917712512e-06,
"loss": 1.054550838470459,
"step": 7700
},
{
"epoch": 0.9675749447032802,
"grad_norm": 0.1692449450492859,
"learning_rate": 1.6819769698537973e-06,
"loss": 1.0402187347412108,
"step": 7710
},
{
"epoch": 0.9688299057210535,
"grad_norm": 0.17843835055828094,
"learning_rate": 1.6172855479363438e-06,
"loss": 1.0575499534606934,
"step": 7720
},
{
"epoch": 0.9700848667388269,
"grad_norm": 0.22015692293643951,
"learning_rate": 1.55259412601889e-06,
"loss": 1.0864248275756836,
"step": 7730
},
{
"epoch": 0.9713398277566003,
"grad_norm": 0.20895177125930786,
"learning_rate": 1.4879027041014363e-06,
"loss": 1.0197461128234864,
"step": 7740
},
{
"epoch": 0.9725947887743737,
"grad_norm": 0.20695802569389343,
"learning_rate": 1.4232112821839824e-06,
"loss": 1.1009364128112793,
"step": 7750
},
{
"epoch": 0.9738497497921471,
"grad_norm": 0.16815753281116486,
"learning_rate": 1.3585198602665287e-06,
"loss": 0.9824440956115723,
"step": 7760
},
{
"epoch": 0.9751047108099204,
"grad_norm": 0.20957960188388824,
"learning_rate": 1.293828438349075e-06,
"loss": 1.1214850425720215,
"step": 7770
},
{
"epoch": 0.9763596718276939,
"grad_norm": 0.2056475281715393,
"learning_rate": 1.2291370164316211e-06,
"loss": 1.0337078094482421,
"step": 7780
},
{
"epoch": 0.9776146328454672,
"grad_norm": 0.20233125984668732,
"learning_rate": 1.1644455945141675e-06,
"loss": 1.1166194915771483,
"step": 7790
},
{
"epoch": 0.9788695938632406,
"grad_norm": 0.19711875915527344,
"learning_rate": 1.0997541725967138e-06,
"loss": 1.0613553047180175,
"step": 7800
},
{
"epoch": 0.980124554881014,
"grad_norm": 0.23265467584133148,
"learning_rate": 1.0350627506792599e-06,
"loss": 1.0709218978881836,
"step": 7810
},
{
"epoch": 0.9813795158987874,
"grad_norm": 0.1931038498878479,
"learning_rate": 9.703713287618062e-07,
"loss": 1.0655381202697753,
"step": 7820
},
{
"epoch": 0.9826344769165608,
"grad_norm": 0.19685102999210358,
"learning_rate": 9.056799068443524e-07,
"loss": 1.0683314323425293,
"step": 7830
},
{
"epoch": 0.9838894379343341,
"grad_norm": 0.18740233778953552,
"learning_rate": 8.409884849268986e-07,
"loss": 1.0544939041137695,
"step": 7840
},
{
"epoch": 0.9851443989521076,
"grad_norm": 0.2561696767807007,
"learning_rate": 7.76297063009445e-07,
"loss": 1.092136287689209,
"step": 7850
},
{
"epoch": 0.9863993599698809,
"grad_norm": 0.20890219509601593,
"learning_rate": 7.116056410919912e-07,
"loss": 1.0438971519470215,
"step": 7860
},
{
"epoch": 0.9876543209876543,
"grad_norm": 0.1991521716117859,
"learning_rate": 6.469142191745375e-07,
"loss": 1.0979823112487792,
"step": 7870
},
{
"epoch": 0.9889092820054277,
"grad_norm": 0.25123023986816406,
"learning_rate": 5.822227972570837e-07,
"loss": 1.0025765419006347,
"step": 7880
},
{
"epoch": 0.990164243023201,
"grad_norm": 0.20250628888607025,
"learning_rate": 5.175313753396299e-07,
"loss": 1.092966651916504,
"step": 7890
},
{
"epoch": 0.9914192040409745,
"grad_norm": 0.2285197377204895,
"learning_rate": 4.528399534221762e-07,
"loss": 1.0923130989074707,
"step": 7900
},
{
"epoch": 0.9926741650587478,
"grad_norm": 0.1967306137084961,
"learning_rate": 3.881485315047225e-07,
"loss": 1.0842831611633301,
"step": 7910
},
{
"epoch": 0.9939291260765213,
"grad_norm": 0.20673462748527527,
"learning_rate": 3.2345710958726876e-07,
"loss": 1.0047653198242188,
"step": 7920
},
{
"epoch": 0.9951840870942946,
"grad_norm": 0.19918572902679443,
"learning_rate": 2.5876568766981497e-07,
"loss": 1.0727598190307617,
"step": 7930
},
{
"epoch": 0.9964390481120681,
"grad_norm": 0.16394563019275665,
"learning_rate": 1.9407426575236124e-07,
"loss": 1.0194159507751466,
"step": 7940
},
{
"epoch": 0.9976940091298414,
"grad_norm": 0.22014449536800385,
"learning_rate": 1.2938284383490749e-07,
"loss": 1.0323500633239746,
"step": 7950
},
{
"epoch": 0.9989489701476147,
"grad_norm": 0.2236953228712082,
"learning_rate": 6.469142191745374e-08,
"loss": 1.030369472503662,
"step": 7960
},
{
"epoch": 1.0,
"eval_loss": 0.9335432648658752,
"eval_runtime": 895.0319,
"eval_samples_per_second": 14.994,
"eval_steps_per_second": 7.497,
"step": 7969
}
],
"logging_steps": 10,
"max_steps": 7969,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4.797602843269349e+18,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}