{ "best_global_step": 7969, "best_metric": 0.9335432648658752, "best_model_checkpoint": "./qwen3-1.7b-text2json-v3/checkpoint-7969", "epoch": 1.0, "eval_steps": 500, "global_step": 7969, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0012549610177733854, "grad_norm": 2.7938790321350098, "learning_rate": 1.875e-06, "loss": 1.6903125762939453, "step": 10 }, { "epoch": 0.0025099220355467708, "grad_norm": 2.206455945968628, "learning_rate": 3.958333333333333e-06, "loss": 1.661090087890625, "step": 20 }, { "epoch": 0.003764883053320156, "grad_norm": 0.9010259509086609, "learning_rate": 6.041666666666667e-06, "loss": 1.5127840042114258, "step": 30 }, { "epoch": 0.0050198440710935415, "grad_norm": 0.4326864778995514, "learning_rate": 8.125000000000001e-06, "loss": 1.3494388580322265, "step": 40 }, { "epoch": 0.006274805088866927, "grad_norm": 0.41406598687171936, "learning_rate": 1.0208333333333334e-05, "loss": 1.3530027389526367, "step": 50 }, { "epoch": 0.007529766106640312, "grad_norm": 0.3024258613586426, "learning_rate": 1.2291666666666666e-05, "loss": 1.2346282958984376, "step": 60 }, { "epoch": 0.008784727124413697, "grad_norm": 0.25805506110191345, "learning_rate": 1.4374999999999999e-05, "loss": 1.2771096229553223, "step": 70 }, { "epoch": 0.010039688142187083, "grad_norm": 0.2437220960855484, "learning_rate": 1.6458333333333335e-05, "loss": 1.261610221862793, "step": 80 }, { "epoch": 0.011294649159960469, "grad_norm": 0.26054802536964417, "learning_rate": 1.854166666666667e-05, "loss": 1.173118495941162, "step": 90 }, { "epoch": 0.012549610177733855, "grad_norm": 0.28059178590774536, "learning_rate": 2.0625e-05, "loss": 1.1928260803222657, "step": 100 }, { "epoch": 0.013804571195507239, "grad_norm": 0.23667506873607635, "learning_rate": 2.2708333333333334e-05, "loss": 1.2138886451721191, "step": 110 }, { "epoch": 0.015059532213280625, "grad_norm": 0.24474164843559265, "learning_rate": 2.479166666666667e-05, "loss": 1.2723681449890136, "step": 120 }, { "epoch": 0.016314493231054012, "grad_norm": 0.33744102716445923, "learning_rate": 2.6875e-05, "loss": 1.267289924621582, "step": 130 }, { "epoch": 0.017569454248827394, "grad_norm": 0.26319584250450134, "learning_rate": 2.8958333333333337e-05, "loss": 1.190561866760254, "step": 140 }, { "epoch": 0.01882441526660078, "grad_norm": 0.2557962238788605, "learning_rate": 3.104166666666667e-05, "loss": 1.1946110725402832, "step": 150 }, { "epoch": 0.020079376284374166, "grad_norm": 0.27757933735847473, "learning_rate": 3.3125e-05, "loss": 1.1806714057922363, "step": 160 }, { "epoch": 0.021334337302147552, "grad_norm": 0.22700543701648712, "learning_rate": 3.520833333333334e-05, "loss": 1.2129878997802734, "step": 170 }, { "epoch": 0.022589298319920938, "grad_norm": 0.2856610417366028, "learning_rate": 3.729166666666667e-05, "loss": 1.1290763854980468, "step": 180 }, { "epoch": 0.023844259337694323, "grad_norm": 0.26019546389579773, "learning_rate": 3.9375e-05, "loss": 1.2425550460815429, "step": 190 }, { "epoch": 0.02509922035546771, "grad_norm": 0.23760564625263214, "learning_rate": 4.1458333333333336e-05, "loss": 1.2198989868164063, "step": 200 }, { "epoch": 0.026354181373241095, "grad_norm": 0.26171621680259705, "learning_rate": 4.354166666666667e-05, "loss": 1.228119468688965, "step": 210 }, { "epoch": 0.027609142391014477, "grad_norm": 0.23990799486637115, "learning_rate": 4.5625e-05, "loss": 1.1751232147216797, "step": 220 }, { "epoch": 0.028864103408787863, "grad_norm": 0.21109728515148163, "learning_rate": 4.770833333333334e-05, "loss": 1.18958740234375, "step": 230 }, { "epoch": 0.03011906442656125, "grad_norm": 0.22801515460014343, "learning_rate": 4.979166666666667e-05, "loss": 1.2031834602355957, "step": 240 }, { "epoch": 0.03137402544433464, "grad_norm": 0.2113051414489746, "learning_rate": 4.99417777202743e-05, "loss": 1.104047679901123, "step": 250 }, { "epoch": 0.032628986462108024, "grad_norm": 0.2567385733127594, "learning_rate": 4.987708629835684e-05, "loss": 1.124267292022705, "step": 260 }, { "epoch": 0.0338839474798814, "grad_norm": 0.25872015953063965, "learning_rate": 4.981239487643939e-05, "loss": 1.1195612907409669, "step": 270 }, { "epoch": 0.03513890849765479, "grad_norm": 0.22646528482437134, "learning_rate": 4.974770345452193e-05, "loss": 1.129570484161377, "step": 280 }, { "epoch": 0.036393869515428175, "grad_norm": 0.26482081413269043, "learning_rate": 4.968301203260448e-05, "loss": 1.1967822074890138, "step": 290 }, { "epoch": 0.03764883053320156, "grad_norm": 0.2646678686141968, "learning_rate": 4.9618320610687025e-05, "loss": 1.1607979774475097, "step": 300 }, { "epoch": 0.038903791550974946, "grad_norm": 0.22207561135292053, "learning_rate": 4.955362918876957e-05, "loss": 1.1674532890319824, "step": 310 }, { "epoch": 0.04015875256874833, "grad_norm": 0.24940666556358337, "learning_rate": 4.948893776685212e-05, "loss": 1.0974184036254884, "step": 320 }, { "epoch": 0.04141371358652172, "grad_norm": 0.2815345823764801, "learning_rate": 4.9424246344934665e-05, "loss": 1.1656072616577149, "step": 330 }, { "epoch": 0.042668674604295104, "grad_norm": 0.2820636034011841, "learning_rate": 4.935955492301721e-05, "loss": 1.1393600463867188, "step": 340 }, { "epoch": 0.04392363562206849, "grad_norm": 0.21207420527935028, "learning_rate": 4.929486350109976e-05, "loss": 1.1813325881958008, "step": 350 }, { "epoch": 0.045178596639841875, "grad_norm": 0.2317555546760559, "learning_rate": 4.92301720791823e-05, "loss": 1.2024839401245118, "step": 360 }, { "epoch": 0.04643355765761526, "grad_norm": 0.22415006160736084, "learning_rate": 4.916548065726485e-05, "loss": 1.1521187782287599, "step": 370 }, { "epoch": 0.04768851867538865, "grad_norm": 0.2794686257839203, "learning_rate": 4.910078923534739e-05, "loss": 1.1735503196716308, "step": 380 }, { "epoch": 0.04894347969316203, "grad_norm": 0.20266857743263245, "learning_rate": 4.9036097813429946e-05, "loss": 1.1360593795776368, "step": 390 }, { "epoch": 0.05019844071093542, "grad_norm": 0.24181729555130005, "learning_rate": 4.8971406391512486e-05, "loss": 1.1604348182678224, "step": 400 }, { "epoch": 0.051453401728708804, "grad_norm": 0.23283515870571136, "learning_rate": 4.890671496959503e-05, "loss": 1.1582366943359375, "step": 410 }, { "epoch": 0.05270836274648219, "grad_norm": 0.2232106328010559, "learning_rate": 4.884202354767758e-05, "loss": 1.1357643127441406, "step": 420 }, { "epoch": 0.053963323764255576, "grad_norm": 0.20864224433898926, "learning_rate": 4.8777332125760126e-05, "loss": 1.1960598945617675, "step": 430 }, { "epoch": 0.055218284782028955, "grad_norm": 0.21834968030452728, "learning_rate": 4.871264070384267e-05, "loss": 1.1870046615600587, "step": 440 }, { "epoch": 0.05647324579980234, "grad_norm": 0.22693583369255066, "learning_rate": 4.864794928192522e-05, "loss": 1.1584319114685058, "step": 450 }, { "epoch": 0.057728206817575727, "grad_norm": 0.22258850932121277, "learning_rate": 4.8583257860007766e-05, "loss": 1.1739194869995118, "step": 460 }, { "epoch": 0.05898316783534911, "grad_norm": 0.21987426280975342, "learning_rate": 4.851856643809031e-05, "loss": 1.2010994911193849, "step": 470 }, { "epoch": 0.0602381288531225, "grad_norm": 0.21596240997314453, "learning_rate": 4.845387501617285e-05, "loss": 1.0946701049804688, "step": 480 }, { "epoch": 0.061493089870895884, "grad_norm": 0.18262015283107758, "learning_rate": 4.8389183594255406e-05, "loss": 1.1652299880981445, "step": 490 }, { "epoch": 0.06274805088866928, "grad_norm": 0.22206667065620422, "learning_rate": 4.8324492172337946e-05, "loss": 1.1554561614990235, "step": 500 }, { "epoch": 0.06400301190644266, "grad_norm": 0.19047950208187103, "learning_rate": 4.82598007504205e-05, "loss": 1.1874130249023438, "step": 510 }, { "epoch": 0.06525797292421605, "grad_norm": 0.23143501579761505, "learning_rate": 4.819510932850304e-05, "loss": 1.1211513519287108, "step": 520 }, { "epoch": 0.06651293394198943, "grad_norm": 0.21261698007583618, "learning_rate": 4.8130417906585586e-05, "loss": 1.1845171928405762, "step": 530 }, { "epoch": 0.0677678949597628, "grad_norm": 0.22298000752925873, "learning_rate": 4.806572648466813e-05, "loss": 1.1463261604309083, "step": 540 }, { "epoch": 0.0690228559775362, "grad_norm": 0.20260395109653473, "learning_rate": 4.800103506275068e-05, "loss": 1.1507064819335937, "step": 550 }, { "epoch": 0.07027781699530958, "grad_norm": 0.22262270748615265, "learning_rate": 4.793634364083323e-05, "loss": 1.1626813888549805, "step": 560 }, { "epoch": 0.07153277801308297, "grad_norm": 0.18661239743232727, "learning_rate": 4.7871652218915773e-05, "loss": 1.1173880577087403, "step": 570 }, { "epoch": 0.07278773903085635, "grad_norm": 0.17793259024620056, "learning_rate": 4.780696079699832e-05, "loss": 1.182445240020752, "step": 580 }, { "epoch": 0.07404270004862974, "grad_norm": 0.22001393139362335, "learning_rate": 4.774226937508087e-05, "loss": 1.0757221221923827, "step": 590 }, { "epoch": 0.07529766106640312, "grad_norm": 0.16966955363750458, "learning_rate": 4.7677577953163414e-05, "loss": 1.1065872192382813, "step": 600 }, { "epoch": 0.07655262208417651, "grad_norm": 0.18004654347896576, "learning_rate": 4.761288653124596e-05, "loss": 1.1050881385803222, "step": 610 }, { "epoch": 0.07780758310194989, "grad_norm": 0.21302928030490875, "learning_rate": 4.75481951093285e-05, "loss": 1.1550076484680176, "step": 620 }, { "epoch": 0.07906254411972329, "grad_norm": 0.19082143902778625, "learning_rate": 4.7483503687411054e-05, "loss": 1.133737564086914, "step": 630 }, { "epoch": 0.08031750513749666, "grad_norm": 0.18166916072368622, "learning_rate": 4.7418812265493594e-05, "loss": 1.1011496543884278, "step": 640 }, { "epoch": 0.08157246615527006, "grad_norm": 0.2051081359386444, "learning_rate": 4.735412084357615e-05, "loss": 1.2461429595947267, "step": 650 }, { "epoch": 0.08282742717304344, "grad_norm": 0.1807452291250229, "learning_rate": 4.7289429421658694e-05, "loss": 1.160153579711914, "step": 660 }, { "epoch": 0.08408238819081683, "grad_norm": 0.2180749773979187, "learning_rate": 4.7224737999741234e-05, "loss": 1.1506930351257325, "step": 670 }, { "epoch": 0.08533734920859021, "grad_norm": 0.18058760464191437, "learning_rate": 4.716004657782379e-05, "loss": 1.1412607192993165, "step": 680 }, { "epoch": 0.0865923102263636, "grad_norm": 0.1865541785955429, "learning_rate": 4.709535515590633e-05, "loss": 1.0827465057373047, "step": 690 }, { "epoch": 0.08784727124413698, "grad_norm": 0.20097248256206512, "learning_rate": 4.703066373398888e-05, "loss": 1.1497159004211426, "step": 700 }, { "epoch": 0.08910223226191036, "grad_norm": 0.2066005915403366, "learning_rate": 4.696597231207142e-05, "loss": 1.1188447952270508, "step": 710 }, { "epoch": 0.09035719327968375, "grad_norm": 0.23538357019424438, "learning_rate": 4.690128089015397e-05, "loss": 1.1101722717285156, "step": 720 }, { "epoch": 0.09161215429745713, "grad_norm": 0.2259693592786789, "learning_rate": 4.6836589468236515e-05, "loss": 1.1163352966308593, "step": 730 }, { "epoch": 0.09286711531523052, "grad_norm": 0.1654050350189209, "learning_rate": 4.677189804631906e-05, "loss": 1.0785648345947265, "step": 740 }, { "epoch": 0.0941220763330039, "grad_norm": 0.1840600073337555, "learning_rate": 4.670720662440161e-05, "loss": 1.114925003051758, "step": 750 }, { "epoch": 0.0953770373507773, "grad_norm": 0.22095882892608643, "learning_rate": 4.6642515202484155e-05, "loss": 1.1339808464050294, "step": 760 }, { "epoch": 0.09663199836855067, "grad_norm": 0.18420755863189697, "learning_rate": 4.65778237805667e-05, "loss": 1.1255131721496583, "step": 770 }, { "epoch": 0.09788695938632407, "grad_norm": 0.20250706374645233, "learning_rate": 4.651313235864925e-05, "loss": 1.0625088691711426, "step": 780 }, { "epoch": 0.09914192040409744, "grad_norm": 0.20630766451358795, "learning_rate": 4.644844093673179e-05, "loss": 1.0966971397399903, "step": 790 }, { "epoch": 0.10039688142187084, "grad_norm": 0.19232341647148132, "learning_rate": 4.638374951481434e-05, "loss": 1.1137746810913085, "step": 800 }, { "epoch": 0.10165184243964422, "grad_norm": 0.22812579572200775, "learning_rate": 4.631905809289688e-05, "loss": 1.157925796508789, "step": 810 }, { "epoch": 0.10290680345741761, "grad_norm": 0.17667317390441895, "learning_rate": 4.6254366670979435e-05, "loss": 1.1550764083862304, "step": 820 }, { "epoch": 0.10416176447519099, "grad_norm": 0.18233029544353485, "learning_rate": 4.6189675249061975e-05, "loss": 1.1150010108947754, "step": 830 }, { "epoch": 0.10541672549296438, "grad_norm": 0.20302756130695343, "learning_rate": 4.612498382714452e-05, "loss": 1.0721470832824707, "step": 840 }, { "epoch": 0.10667168651073776, "grad_norm": 0.2043253630399704, "learning_rate": 4.606029240522707e-05, "loss": 1.1333942413330078, "step": 850 }, { "epoch": 0.10792664752851115, "grad_norm": 0.19231152534484863, "learning_rate": 4.5995600983309616e-05, "loss": 1.1027427673339845, "step": 860 }, { "epoch": 0.10918160854628453, "grad_norm": 0.20718224346637726, "learning_rate": 4.593090956139216e-05, "loss": 1.1255599975585937, "step": 870 }, { "epoch": 0.11043656956405791, "grad_norm": 0.1926298588514328, "learning_rate": 4.586621813947471e-05, "loss": 1.1083927154541016, "step": 880 }, { "epoch": 0.1116915305818313, "grad_norm": 0.2107614278793335, "learning_rate": 4.5801526717557256e-05, "loss": 1.0679313659667968, "step": 890 }, { "epoch": 0.11294649159960468, "grad_norm": 0.1800786405801773, "learning_rate": 4.57368352956398e-05, "loss": 1.2032075881958009, "step": 900 }, { "epoch": 0.11420145261737807, "grad_norm": 0.18883837759494781, "learning_rate": 4.567214387372234e-05, "loss": 1.1553642272949218, "step": 910 }, { "epoch": 0.11545641363515145, "grad_norm": 0.17952467501163483, "learning_rate": 4.5607452451804896e-05, "loss": 1.151566505432129, "step": 920 }, { "epoch": 0.11671137465292485, "grad_norm": 0.18356452882289886, "learning_rate": 4.5542761029887436e-05, "loss": 1.1210906028747558, "step": 930 }, { "epoch": 0.11796633567069822, "grad_norm": 0.19210585951805115, "learning_rate": 4.547806960796999e-05, "loss": 1.1327272415161134, "step": 940 }, { "epoch": 0.11922129668847162, "grad_norm": 0.2031843215227127, "learning_rate": 4.541337818605253e-05, "loss": 1.1083773612976073, "step": 950 }, { "epoch": 0.120476257706245, "grad_norm": 0.16328449547290802, "learning_rate": 4.5348686764135076e-05, "loss": 1.163971519470215, "step": 960 }, { "epoch": 0.12173121872401839, "grad_norm": 0.17702428996562958, "learning_rate": 4.528399534221762e-05, "loss": 1.1527894973754882, "step": 970 }, { "epoch": 0.12298617974179177, "grad_norm": 0.1729395091533661, "learning_rate": 4.521930392030017e-05, "loss": 1.1400185585021974, "step": 980 }, { "epoch": 0.12424114075956516, "grad_norm": 0.17520761489868164, "learning_rate": 4.5154612498382716e-05, "loss": 1.0990144729614257, "step": 990 }, { "epoch": 0.12549610177733855, "grad_norm": 0.18720504641532898, "learning_rate": 4.508992107646526e-05, "loss": 1.1631937980651856, "step": 1000 }, { "epoch": 0.12675106279511192, "grad_norm": 0.19207048416137695, "learning_rate": 4.502522965454781e-05, "loss": 1.1027806282043457, "step": 1010 }, { "epoch": 0.1280060238128853, "grad_norm": 0.1836109161376953, "learning_rate": 4.496053823263036e-05, "loss": 1.1294532775878907, "step": 1020 }, { "epoch": 0.1292609848306587, "grad_norm": 0.17419582605361938, "learning_rate": 4.48958468107129e-05, "loss": 1.1544736862182616, "step": 1030 }, { "epoch": 0.1305159458484321, "grad_norm": 0.20314334332942963, "learning_rate": 4.483115538879545e-05, "loss": 1.204758644104004, "step": 1040 }, { "epoch": 0.13177090686620546, "grad_norm": 0.17511610686779022, "learning_rate": 4.476646396687799e-05, "loss": 1.107564353942871, "step": 1050 }, { "epoch": 0.13302586788397885, "grad_norm": 0.215122252702713, "learning_rate": 4.4701772544960544e-05, "loss": 1.2063298225402832, "step": 1060 }, { "epoch": 0.13428082890175225, "grad_norm": 0.17670315504074097, "learning_rate": 4.4637081123043084e-05, "loss": 1.1757978439331054, "step": 1070 }, { "epoch": 0.1355357899195256, "grad_norm": 0.18765057623386383, "learning_rate": 4.457238970112563e-05, "loss": 1.1447077751159669, "step": 1080 }, { "epoch": 0.136790750937299, "grad_norm": 0.5525104403495789, "learning_rate": 4.450769827920818e-05, "loss": 1.1048051834106445, "step": 1090 }, { "epoch": 0.1380457119550724, "grad_norm": 0.19476006925106049, "learning_rate": 4.4443006857290724e-05, "loss": 1.1642623901367188, "step": 1100 }, { "epoch": 0.1393006729728458, "grad_norm": 0.17488926649093628, "learning_rate": 4.437831543537328e-05, "loss": 1.0796038627624511, "step": 1110 }, { "epoch": 0.14055563399061916, "grad_norm": 0.20637401938438416, "learning_rate": 4.431362401345582e-05, "loss": 1.1843393325805665, "step": 1120 }, { "epoch": 0.14181059500839255, "grad_norm": 0.22406329214572906, "learning_rate": 4.4248932591538364e-05, "loss": 1.1924137115478515, "step": 1130 }, { "epoch": 0.14306555602616594, "grad_norm": 0.20387622714042664, "learning_rate": 4.418424116962091e-05, "loss": 1.1867655754089355, "step": 1140 }, { "epoch": 0.14432051704393933, "grad_norm": 0.1762738674879074, "learning_rate": 4.411954974770346e-05, "loss": 1.0975682258605957, "step": 1150 }, { "epoch": 0.1455754780617127, "grad_norm": 0.21380206942558289, "learning_rate": 4.4054858325786004e-05, "loss": 1.1449914932250977, "step": 1160 }, { "epoch": 0.1468304390794861, "grad_norm": 0.19425804913043976, "learning_rate": 4.399016690386855e-05, "loss": 1.109062099456787, "step": 1170 }, { "epoch": 0.14808540009725948, "grad_norm": 0.18465885519981384, "learning_rate": 4.39254754819511e-05, "loss": 1.0944741249084473, "step": 1180 }, { "epoch": 0.14934036111503288, "grad_norm": 0.18625572323799133, "learning_rate": 4.3860784060033645e-05, "loss": 1.100302505493164, "step": 1190 }, { "epoch": 0.15059532213280624, "grad_norm": 0.19639429450035095, "learning_rate": 4.3796092638116185e-05, "loss": 1.1336475372314454, "step": 1200 }, { "epoch": 0.15185028315057963, "grad_norm": 0.17792847752571106, "learning_rate": 4.373140121619874e-05, "loss": 1.1655400276184082, "step": 1210 }, { "epoch": 0.15310524416835303, "grad_norm": 0.19248999655246735, "learning_rate": 4.366670979428128e-05, "loss": 1.0734170913696288, "step": 1220 }, { "epoch": 0.1543602051861264, "grad_norm": 0.16676078736782074, "learning_rate": 4.360201837236383e-05, "loss": 1.1042096138000488, "step": 1230 }, { "epoch": 0.15561516620389979, "grad_norm": 0.2144252061843872, "learning_rate": 4.353732695044637e-05, "loss": 1.086879348754883, "step": 1240 }, { "epoch": 0.15687012722167318, "grad_norm": 0.191383957862854, "learning_rate": 4.347263552852892e-05, "loss": 1.1877761840820313, "step": 1250 }, { "epoch": 0.15812508823944657, "grad_norm": 0.1510523408651352, "learning_rate": 4.3407944106611465e-05, "loss": 1.0977872848510741, "step": 1260 }, { "epoch": 0.15938004925721994, "grad_norm": 0.21486221253871918, "learning_rate": 4.334325268469401e-05, "loss": 1.1684078216552733, "step": 1270 }, { "epoch": 0.16063501027499333, "grad_norm": 0.20981676876544952, "learning_rate": 4.327856126277656e-05, "loss": 1.1553811073303222, "step": 1280 }, { "epoch": 0.16188997129276672, "grad_norm": 0.17923256754875183, "learning_rate": 4.3213869840859105e-05, "loss": 1.084774875640869, "step": 1290 }, { "epoch": 0.16314493231054011, "grad_norm": 0.16394007205963135, "learning_rate": 4.314917841894165e-05, "loss": 1.1058798789978028, "step": 1300 }, { "epoch": 0.16439989332831348, "grad_norm": 0.18293628096580505, "learning_rate": 4.30844869970242e-05, "loss": 1.1011856079101563, "step": 1310 }, { "epoch": 0.16565485434608687, "grad_norm": 0.15914376080036163, "learning_rate": 4.301979557510674e-05, "loss": 1.182911205291748, "step": 1320 }, { "epoch": 0.16690981536386026, "grad_norm": 0.19914117455482483, "learning_rate": 4.295510415318929e-05, "loss": 1.138012981414795, "step": 1330 }, { "epoch": 0.16816477638163366, "grad_norm": 0.177483931183815, "learning_rate": 4.289041273127183e-05, "loss": 1.1617022514343263, "step": 1340 }, { "epoch": 0.16941973739940702, "grad_norm": 0.18268829584121704, "learning_rate": 4.2825721309354386e-05, "loss": 1.1040393829345703, "step": 1350 }, { "epoch": 0.17067469841718041, "grad_norm": 0.1875396966934204, "learning_rate": 4.2761029887436926e-05, "loss": 1.1711323738098145, "step": 1360 }, { "epoch": 0.1719296594349538, "grad_norm": 0.20280130207538605, "learning_rate": 4.269633846551947e-05, "loss": 1.0739377021789551, "step": 1370 }, { "epoch": 0.1731846204527272, "grad_norm": 0.17182214558124542, "learning_rate": 4.263164704360202e-05, "loss": 1.1563935279846191, "step": 1380 }, { "epoch": 0.17443958147050057, "grad_norm": 0.1958380937576294, "learning_rate": 4.2566955621684566e-05, "loss": 1.152635383605957, "step": 1390 }, { "epoch": 0.17569454248827396, "grad_norm": 0.17894886434078217, "learning_rate": 4.250226419976711e-05, "loss": 1.129916000366211, "step": 1400 }, { "epoch": 0.17694950350604735, "grad_norm": 0.1785098761320114, "learning_rate": 4.243757277784966e-05, "loss": 1.1258666038513183, "step": 1410 }, { "epoch": 0.17820446452382072, "grad_norm": 0.19660112261772156, "learning_rate": 4.2372881355932206e-05, "loss": 1.083286666870117, "step": 1420 }, { "epoch": 0.1794594255415941, "grad_norm": 0.1892261505126953, "learning_rate": 4.230818993401475e-05, "loss": 1.12530574798584, "step": 1430 }, { "epoch": 0.1807143865593675, "grad_norm": 0.22547674179077148, "learning_rate": 4.224349851209729e-05, "loss": 1.1078373908996582, "step": 1440 }, { "epoch": 0.1819693475771409, "grad_norm": 0.16683579981327057, "learning_rate": 4.2178807090179846e-05, "loss": 1.075201416015625, "step": 1450 }, { "epoch": 0.18322430859491426, "grad_norm": 0.19976581633090973, "learning_rate": 4.2114115668262386e-05, "loss": 1.0396580696105957, "step": 1460 }, { "epoch": 0.18447926961268765, "grad_norm": 0.18316680192947388, "learning_rate": 4.204942424634494e-05, "loss": 1.1359598159790039, "step": 1470 }, { "epoch": 0.18573423063046104, "grad_norm": 0.2085312008857727, "learning_rate": 4.198473282442748e-05, "loss": 1.1656570434570312, "step": 1480 }, { "epoch": 0.18698919164823444, "grad_norm": 0.20343416929244995, "learning_rate": 4.192004140251003e-05, "loss": 1.110813522338867, "step": 1490 }, { "epoch": 0.1882441526660078, "grad_norm": 0.19183684885501862, "learning_rate": 4.1855349980592573e-05, "loss": 1.0822998046875, "step": 1500 }, { "epoch": 0.1894991136837812, "grad_norm": 0.23563328385353088, "learning_rate": 4.179065855867512e-05, "loss": 1.0753483772277832, "step": 1510 }, { "epoch": 0.1907540747015546, "grad_norm": 0.1735006421804428, "learning_rate": 4.172596713675767e-05, "loss": 1.0641088485717773, "step": 1520 }, { "epoch": 0.19200903571932798, "grad_norm": 0.1672070324420929, "learning_rate": 4.1661275714840214e-05, "loss": 1.202446174621582, "step": 1530 }, { "epoch": 0.19326399673710135, "grad_norm": 0.19555087387561798, "learning_rate": 4.159658429292276e-05, "loss": 1.0974900245666503, "step": 1540 }, { "epoch": 0.19451895775487474, "grad_norm": 0.1727800965309143, "learning_rate": 4.153189287100531e-05, "loss": 1.0884978294372558, "step": 1550 }, { "epoch": 0.19577391877264813, "grad_norm": 0.1973794847726822, "learning_rate": 4.146720144908785e-05, "loss": 1.195762825012207, "step": 1560 }, { "epoch": 0.19702887979042152, "grad_norm": 0.20883244276046753, "learning_rate": 4.14025100271704e-05, "loss": 1.1073166847229003, "step": 1570 }, { "epoch": 0.1982838408081949, "grad_norm": 0.1726527363061905, "learning_rate": 4.133781860525295e-05, "loss": 1.088674545288086, "step": 1580 }, { "epoch": 0.19953880182596828, "grad_norm": 0.15648233890533447, "learning_rate": 4.1273127183335494e-05, "loss": 1.1345646858215332, "step": 1590 }, { "epoch": 0.20079376284374167, "grad_norm": 0.1888463795185089, "learning_rate": 4.120843576141804e-05, "loss": 1.09979190826416, "step": 1600 }, { "epoch": 0.20204872386151504, "grad_norm": 0.19210949540138245, "learning_rate": 4.114374433950058e-05, "loss": 1.1058323860168457, "step": 1610 }, { "epoch": 0.20330368487928843, "grad_norm": 0.19619908928871155, "learning_rate": 4.1079052917583134e-05, "loss": 1.1319670677185059, "step": 1620 }, { "epoch": 0.20455864589706182, "grad_norm": 0.1664625108242035, "learning_rate": 4.1014361495665674e-05, "loss": 1.0515789985656738, "step": 1630 }, { "epoch": 0.20581360691483522, "grad_norm": 0.19872358441352844, "learning_rate": 4.094967007374823e-05, "loss": 1.1266546249389648, "step": 1640 }, { "epoch": 0.20706856793260858, "grad_norm": 0.1681402623653412, "learning_rate": 4.088497865183077e-05, "loss": 1.1066089630126954, "step": 1650 }, { "epoch": 0.20832352895038198, "grad_norm": 0.1954943835735321, "learning_rate": 4.0820287229913315e-05, "loss": 1.1911808013916017, "step": 1660 }, { "epoch": 0.20957848996815537, "grad_norm": 0.17476460337638855, "learning_rate": 4.075559580799586e-05, "loss": 1.038119125366211, "step": 1670 }, { "epoch": 0.21083345098592876, "grad_norm": 0.18508611619472504, "learning_rate": 4.069090438607841e-05, "loss": 1.0409876823425293, "step": 1680 }, { "epoch": 0.21208841200370213, "grad_norm": 0.20123597979545593, "learning_rate": 4.0626212964160955e-05, "loss": 1.0861782073974608, "step": 1690 }, { "epoch": 0.21334337302147552, "grad_norm": 0.2034509927034378, "learning_rate": 4.05615215422435e-05, "loss": 1.1509716033935546, "step": 1700 }, { "epoch": 0.2145983340392489, "grad_norm": 0.16378086805343628, "learning_rate": 4.049683012032605e-05, "loss": 1.1095640182495117, "step": 1710 }, { "epoch": 0.2158532950570223, "grad_norm": 0.2010604441165924, "learning_rate": 4.0432138698408595e-05, "loss": 1.0584364891052247, "step": 1720 }, { "epoch": 0.21710825607479567, "grad_norm": 0.1633748859167099, "learning_rate": 4.0367447276491135e-05, "loss": 1.0981843948364258, "step": 1730 }, { "epoch": 0.21836321709256906, "grad_norm": 0.1888664811849594, "learning_rate": 4.030275585457369e-05, "loss": 1.0630824089050293, "step": 1740 }, { "epoch": 0.21961817811034245, "grad_norm": 0.18354853987693787, "learning_rate": 4.023806443265623e-05, "loss": 1.1649972915649414, "step": 1750 }, { "epoch": 0.22087313912811582, "grad_norm": 0.18206484615802765, "learning_rate": 4.017337301073878e-05, "loss": 1.128775691986084, "step": 1760 }, { "epoch": 0.2221281001458892, "grad_norm": 0.20967255532741547, "learning_rate": 4.010868158882132e-05, "loss": 1.0991705894470214, "step": 1770 }, { "epoch": 0.2233830611636626, "grad_norm": 0.173291876912117, "learning_rate": 4.004399016690387e-05, "loss": 1.133495807647705, "step": 1780 }, { "epoch": 0.224638022181436, "grad_norm": 0.18495796620845795, "learning_rate": 3.9979298744986416e-05, "loss": 1.1338909149169922, "step": 1790 }, { "epoch": 0.22589298319920936, "grad_norm": 0.15466730296611786, "learning_rate": 3.991460732306896e-05, "loss": 1.103053092956543, "step": 1800 }, { "epoch": 0.22714794421698276, "grad_norm": 0.2066875696182251, "learning_rate": 3.984991590115151e-05, "loss": 1.0687838554382325, "step": 1810 }, { "epoch": 0.22840290523475615, "grad_norm": 0.1611821949481964, "learning_rate": 3.9785224479234056e-05, "loss": 1.0882232666015625, "step": 1820 }, { "epoch": 0.22965786625252954, "grad_norm": 0.21903353929519653, "learning_rate": 3.97205330573166e-05, "loss": 1.1616278648376466, "step": 1830 }, { "epoch": 0.2309128272703029, "grad_norm": 0.1864372193813324, "learning_rate": 3.965584163539915e-05, "loss": 1.0734145164489746, "step": 1840 }, { "epoch": 0.2321677882880763, "grad_norm": 0.18822331726551056, "learning_rate": 3.959115021348169e-05, "loss": 1.082595157623291, "step": 1850 }, { "epoch": 0.2334227493058497, "grad_norm": 0.17215979099273682, "learning_rate": 3.952645879156424e-05, "loss": 1.0656033515930177, "step": 1860 }, { "epoch": 0.23467771032362308, "grad_norm": 0.19856449961662292, "learning_rate": 3.946176736964678e-05, "loss": 1.0940080642700196, "step": 1870 }, { "epoch": 0.23593267134139645, "grad_norm": 0.19286569952964783, "learning_rate": 3.9397075947729336e-05, "loss": 1.1294413566589356, "step": 1880 }, { "epoch": 0.23718763235916984, "grad_norm": 0.21396370232105255, "learning_rate": 3.9332384525811876e-05, "loss": 1.1367197990417481, "step": 1890 }, { "epoch": 0.23844259337694323, "grad_norm": 0.19455762207508087, "learning_rate": 3.926769310389442e-05, "loss": 1.117790412902832, "step": 1900 }, { "epoch": 0.23969755439471663, "grad_norm": 0.19210918247699738, "learning_rate": 3.920300168197697e-05, "loss": 1.078984260559082, "step": 1910 }, { "epoch": 0.24095251541249, "grad_norm": 0.17716605961322784, "learning_rate": 3.9138310260059516e-05, "loss": 1.1049820899963378, "step": 1920 }, { "epoch": 0.24220747643026339, "grad_norm": 0.1894821971654892, "learning_rate": 3.907361883814206e-05, "loss": 1.1215306282043458, "step": 1930 }, { "epoch": 0.24346243744803678, "grad_norm": 0.16090793907642365, "learning_rate": 3.900892741622461e-05, "loss": 1.0604351997375487, "step": 1940 }, { "epoch": 0.24471739846581014, "grad_norm": 0.16751867532730103, "learning_rate": 3.894423599430716e-05, "loss": 1.1072792053222655, "step": 1950 }, { "epoch": 0.24597235948358354, "grad_norm": 0.18883423507213593, "learning_rate": 3.8879544572389703e-05, "loss": 1.1744267463684082, "step": 1960 }, { "epoch": 0.24722732050135693, "grad_norm": 0.17353016138076782, "learning_rate": 3.881485315047225e-05, "loss": 1.1146465301513673, "step": 1970 }, { "epoch": 0.24848228151913032, "grad_norm": 0.17569345235824585, "learning_rate": 3.87501617285548e-05, "loss": 1.1410762786865234, "step": 1980 }, { "epoch": 0.2497372425369037, "grad_norm": 0.1861223578453064, "learning_rate": 3.868547030663734e-05, "loss": 1.1510584831237793, "step": 1990 }, { "epoch": 0.2509922035546771, "grad_norm": 0.1848827749490738, "learning_rate": 3.862077888471989e-05, "loss": 1.159224510192871, "step": 2000 }, { "epoch": 0.25224716457245044, "grad_norm": 0.17827710509300232, "learning_rate": 3.855608746280243e-05, "loss": 1.0784708976745605, "step": 2010 }, { "epoch": 0.25350212559022384, "grad_norm": 0.2000972479581833, "learning_rate": 3.849139604088498e-05, "loss": 1.0981005668640136, "step": 2020 }, { "epoch": 0.25475708660799723, "grad_norm": 0.17901651561260223, "learning_rate": 3.842670461896753e-05, "loss": 1.110117530822754, "step": 2030 }, { "epoch": 0.2560120476257706, "grad_norm": 0.16541555523872375, "learning_rate": 3.836201319705007e-05, "loss": 1.100083065032959, "step": 2040 }, { "epoch": 0.257267008643544, "grad_norm": 0.1809515506029129, "learning_rate": 3.8297321775132624e-05, "loss": 1.0859627723693848, "step": 2050 }, { "epoch": 0.2585219696613174, "grad_norm": 0.17363417148590088, "learning_rate": 3.8232630353215164e-05, "loss": 1.1074336051940918, "step": 2060 }, { "epoch": 0.2597769306790908, "grad_norm": 0.19632141292095184, "learning_rate": 3.816793893129771e-05, "loss": 1.092854881286621, "step": 2070 }, { "epoch": 0.2610318916968642, "grad_norm": 0.18531949818134308, "learning_rate": 3.810324750938026e-05, "loss": 1.1531734466552734, "step": 2080 }, { "epoch": 0.26228685271463753, "grad_norm": 0.2005389928817749, "learning_rate": 3.8038556087462804e-05, "loss": 1.0838706016540527, "step": 2090 }, { "epoch": 0.2635418137324109, "grad_norm": 0.2321166694164276, "learning_rate": 3.797386466554535e-05, "loss": 1.1546616554260254, "step": 2100 }, { "epoch": 0.2647967747501843, "grad_norm": 0.1765083223581314, "learning_rate": 3.79091732436279e-05, "loss": 1.0811034202575684, "step": 2110 }, { "epoch": 0.2660517357679577, "grad_norm": 0.18589410185813904, "learning_rate": 3.7844481821710445e-05, "loss": 1.0837715148925782, "step": 2120 }, { "epoch": 0.2673066967857311, "grad_norm": 0.17447498440742493, "learning_rate": 3.777979039979299e-05, "loss": 1.124140453338623, "step": 2130 }, { "epoch": 0.2685616578035045, "grad_norm": 0.18152177333831787, "learning_rate": 3.771509897787554e-05, "loss": 1.1175949096679687, "step": 2140 }, { "epoch": 0.2698166188212779, "grad_norm": 0.18476802110671997, "learning_rate": 3.7650407555958085e-05, "loss": 1.109239387512207, "step": 2150 }, { "epoch": 0.2710715798390512, "grad_norm": 0.1941436380147934, "learning_rate": 3.7585716134040625e-05, "loss": 1.121135139465332, "step": 2160 }, { "epoch": 0.2723265408568246, "grad_norm": 0.21705880761146545, "learning_rate": 3.752102471212318e-05, "loss": 1.0808409690856933, "step": 2170 }, { "epoch": 0.273581501874598, "grad_norm": 0.20270583033561707, "learning_rate": 3.745633329020572e-05, "loss": 1.0374409675598144, "step": 2180 }, { "epoch": 0.2748364628923714, "grad_norm": 0.1888822466135025, "learning_rate": 3.739164186828827e-05, "loss": 1.1372817039489747, "step": 2190 }, { "epoch": 0.2760914239101448, "grad_norm": 0.18205349147319794, "learning_rate": 3.732695044637081e-05, "loss": 1.063914966583252, "step": 2200 }, { "epoch": 0.2773463849279182, "grad_norm": 0.18658319115638733, "learning_rate": 3.726225902445336e-05, "loss": 1.0600374221801758, "step": 2210 }, { "epoch": 0.2786013459456916, "grad_norm": 0.17652806639671326, "learning_rate": 3.7197567602535905e-05, "loss": 1.175550937652588, "step": 2220 }, { "epoch": 0.279856306963465, "grad_norm": 0.3145999312400818, "learning_rate": 3.713287618061845e-05, "loss": 1.1401835441589356, "step": 2230 }, { "epoch": 0.2811112679812383, "grad_norm": 0.19413627684116364, "learning_rate": 3.7068184758701e-05, "loss": 1.0594032287597657, "step": 2240 }, { "epoch": 0.2823662289990117, "grad_norm": 0.19201666116714478, "learning_rate": 3.7003493336783546e-05, "loss": 1.1342046737670899, "step": 2250 }, { "epoch": 0.2836211900167851, "grad_norm": 0.15544365346431732, "learning_rate": 3.693880191486609e-05, "loss": 1.050437831878662, "step": 2260 }, { "epoch": 0.2848761510345585, "grad_norm": 0.19418646395206451, "learning_rate": 3.687411049294864e-05, "loss": 1.1457019805908204, "step": 2270 }, { "epoch": 0.2861311120523319, "grad_norm": 0.1728999763727188, "learning_rate": 3.680941907103118e-05, "loss": 1.0303121566772462, "step": 2280 }, { "epoch": 0.2873860730701053, "grad_norm": 0.17530235648155212, "learning_rate": 3.674472764911373e-05, "loss": 1.056619358062744, "step": 2290 }, { "epoch": 0.28864103408787867, "grad_norm": 0.17840541899204254, "learning_rate": 3.668003622719627e-05, "loss": 1.1103222846984864, "step": 2300 }, { "epoch": 0.289895995105652, "grad_norm": 0.1811124086380005, "learning_rate": 3.6615344805278826e-05, "loss": 1.0357528686523438, "step": 2310 }, { "epoch": 0.2911509561234254, "grad_norm": 0.18500889837741852, "learning_rate": 3.6550653383361366e-05, "loss": 1.0739376068115234, "step": 2320 }, { "epoch": 0.2924059171411988, "grad_norm": 0.1825813204050064, "learning_rate": 3.648596196144391e-05, "loss": 1.0761238098144532, "step": 2330 }, { "epoch": 0.2936608781589722, "grad_norm": 0.16683438420295715, "learning_rate": 3.642127053952646e-05, "loss": 1.1081546783447265, "step": 2340 }, { "epoch": 0.2949158391767456, "grad_norm": 0.2046334445476532, "learning_rate": 3.6356579117609006e-05, "loss": 1.1215451240539551, "step": 2350 }, { "epoch": 0.29617080019451897, "grad_norm": 0.16321095824241638, "learning_rate": 3.629188769569155e-05, "loss": 1.0684563636779785, "step": 2360 }, { "epoch": 0.29742576121229236, "grad_norm": 0.17285047471523285, "learning_rate": 3.62271962737741e-05, "loss": 1.117433452606201, "step": 2370 }, { "epoch": 0.29868072223006575, "grad_norm": 0.17733103036880493, "learning_rate": 3.6162504851856646e-05, "loss": 1.0875020980834962, "step": 2380 }, { "epoch": 0.2999356832478391, "grad_norm": 0.20012889802455902, "learning_rate": 3.609781342993919e-05, "loss": 1.090438175201416, "step": 2390 }, { "epoch": 0.3011906442656125, "grad_norm": 0.2170931100845337, "learning_rate": 3.603312200802173e-05, "loss": 1.1388078689575196, "step": 2400 }, { "epoch": 0.3024456052833859, "grad_norm": 0.1782235950231552, "learning_rate": 3.596843058610429e-05, "loss": 1.096130657196045, "step": 2410 }, { "epoch": 0.30370056630115927, "grad_norm": 0.1945246011018753, "learning_rate": 3.590373916418683e-05, "loss": 1.1477142333984376, "step": 2420 }, { "epoch": 0.30495552731893266, "grad_norm": 0.18570466339588165, "learning_rate": 3.583904774226938e-05, "loss": 1.1268895149230957, "step": 2430 }, { "epoch": 0.30621048833670605, "grad_norm": 0.18892186880111694, "learning_rate": 3.577435632035192e-05, "loss": 1.0669918060302734, "step": 2440 }, { "epoch": 0.30746544935447945, "grad_norm": 0.1974097490310669, "learning_rate": 3.570966489843447e-05, "loss": 1.1709393501281737, "step": 2450 }, { "epoch": 0.3087204103722528, "grad_norm": 0.24947650730609894, "learning_rate": 3.5644973476517014e-05, "loss": 1.141524314880371, "step": 2460 }, { "epoch": 0.3099753713900262, "grad_norm": 0.20610560476779938, "learning_rate": 3.558028205459956e-05, "loss": 1.1172548294067384, "step": 2470 }, { "epoch": 0.31123033240779957, "grad_norm": 0.1904863864183426, "learning_rate": 3.551559063268211e-05, "loss": 1.092203712463379, "step": 2480 }, { "epoch": 0.31248529342557296, "grad_norm": 0.20900042355060577, "learning_rate": 3.5450899210764654e-05, "loss": 1.164710235595703, "step": 2490 }, { "epoch": 0.31374025444334636, "grad_norm": 0.19653667509555817, "learning_rate": 3.53862077888472e-05, "loss": 1.1293628692626954, "step": 2500 }, { "epoch": 0.31499521546111975, "grad_norm": 0.1921471506357193, "learning_rate": 3.532151636692975e-05, "loss": 1.0006118774414063, "step": 2510 }, { "epoch": 0.31625017647889314, "grad_norm": 0.17006602883338928, "learning_rate": 3.5256824945012294e-05, "loss": 1.1141504287719726, "step": 2520 }, { "epoch": 0.31750513749666653, "grad_norm": 0.19610007107257843, "learning_rate": 3.519213352309484e-05, "loss": 1.0875147819519042, "step": 2530 }, { "epoch": 0.31876009851443987, "grad_norm": 0.18897046148777008, "learning_rate": 3.512744210117739e-05, "loss": 1.1723553657531738, "step": 2540 }, { "epoch": 0.32001505953221326, "grad_norm": 0.2167101353406906, "learning_rate": 3.5062750679259934e-05, "loss": 1.1133469581604003, "step": 2550 }, { "epoch": 0.32127002054998666, "grad_norm": 0.19870373606681824, "learning_rate": 3.499805925734248e-05, "loss": 1.0627870559692383, "step": 2560 }, { "epoch": 0.32252498156776005, "grad_norm": 0.19916154444217682, "learning_rate": 3.493336783542502e-05, "loss": 1.1032466888427734, "step": 2570 }, { "epoch": 0.32377994258553344, "grad_norm": 0.17430101335048676, "learning_rate": 3.4868676413507575e-05, "loss": 1.0216200828552247, "step": 2580 }, { "epoch": 0.32503490360330684, "grad_norm": 0.17178688943386078, "learning_rate": 3.4803984991590115e-05, "loss": 1.072761631011963, "step": 2590 }, { "epoch": 0.32628986462108023, "grad_norm": 0.18140675127506256, "learning_rate": 3.473929356967267e-05, "loss": 1.1414005279541015, "step": 2600 }, { "epoch": 0.3275448256388536, "grad_norm": 0.19831927120685577, "learning_rate": 3.467460214775521e-05, "loss": 1.1272093772888183, "step": 2610 }, { "epoch": 0.32879978665662696, "grad_norm": 0.1755538433790207, "learning_rate": 3.4609910725837755e-05, "loss": 1.102944564819336, "step": 2620 }, { "epoch": 0.33005474767440035, "grad_norm": 0.22686068713665009, "learning_rate": 3.45452193039203e-05, "loss": 1.098013973236084, "step": 2630 }, { "epoch": 0.33130970869217374, "grad_norm": 0.15959154069423676, "learning_rate": 3.448052788200285e-05, "loss": 1.0637240409851074, "step": 2640 }, { "epoch": 0.33256466970994714, "grad_norm": 0.19935756921768188, "learning_rate": 3.4415836460085395e-05, "loss": 1.0774771690368652, "step": 2650 }, { "epoch": 0.33381963072772053, "grad_norm": 0.16935963928699493, "learning_rate": 3.435114503816794e-05, "loss": 1.0928321838378907, "step": 2660 }, { "epoch": 0.3350745917454939, "grad_norm": 0.1932937055826187, "learning_rate": 3.428645361625049e-05, "loss": 1.034721279144287, "step": 2670 }, { "epoch": 0.3363295527632673, "grad_norm": 0.20397278666496277, "learning_rate": 3.4221762194333035e-05, "loss": 1.1345463752746583, "step": 2680 }, { "epoch": 0.33758451378104065, "grad_norm": 0.1837303638458252, "learning_rate": 3.4157070772415575e-05, "loss": 1.1856120109558106, "step": 2690 }, { "epoch": 0.33883947479881404, "grad_norm": 0.2031344324350357, "learning_rate": 3.409237935049813e-05, "loss": 1.0397148132324219, "step": 2700 }, { "epoch": 0.34009443581658744, "grad_norm": 0.20356477797031403, "learning_rate": 3.402768792858067e-05, "loss": 1.0965538024902344, "step": 2710 }, { "epoch": 0.34134939683436083, "grad_norm": 0.17668817937374115, "learning_rate": 3.396299650666322e-05, "loss": 1.0809722900390626, "step": 2720 }, { "epoch": 0.3426043578521342, "grad_norm": 0.20212671160697937, "learning_rate": 3.389830508474576e-05, "loss": 1.0418371200561523, "step": 2730 }, { "epoch": 0.3438593188699076, "grad_norm": 0.17411935329437256, "learning_rate": 3.383361366282831e-05, "loss": 1.0937856674194335, "step": 2740 }, { "epoch": 0.345114279887681, "grad_norm": 0.20167027413845062, "learning_rate": 3.3768922240910856e-05, "loss": 1.0584357261657715, "step": 2750 }, { "epoch": 0.3463692409054544, "grad_norm": 0.23438484966754913, "learning_rate": 3.37042308189934e-05, "loss": 1.1240981101989747, "step": 2760 }, { "epoch": 0.34762420192322774, "grad_norm": 0.20869354903697968, "learning_rate": 3.363953939707595e-05, "loss": 1.0020330429077149, "step": 2770 }, { "epoch": 0.34887916294100113, "grad_norm": 0.17483501136302948, "learning_rate": 3.3574847975158496e-05, "loss": 1.0768984794616698, "step": 2780 }, { "epoch": 0.3501341239587745, "grad_norm": 0.19473516941070557, "learning_rate": 3.351015655324104e-05, "loss": 1.1058112144470216, "step": 2790 }, { "epoch": 0.3513890849765479, "grad_norm": 0.18327480554580688, "learning_rate": 3.344546513132359e-05, "loss": 1.1330906867980957, "step": 2800 }, { "epoch": 0.3526440459943213, "grad_norm": 0.19095930457115173, "learning_rate": 3.338077370940613e-05, "loss": 1.0920269012451171, "step": 2810 }, { "epoch": 0.3538990070120947, "grad_norm": 0.20432385802268982, "learning_rate": 3.331608228748868e-05, "loss": 1.0859003067016602, "step": 2820 }, { "epoch": 0.3551539680298681, "grad_norm": 0.17931047081947327, "learning_rate": 3.325139086557122e-05, "loss": 1.0956171989440917, "step": 2830 }, { "epoch": 0.35640892904764143, "grad_norm": 0.19674962759017944, "learning_rate": 3.3186699443653776e-05, "loss": 1.0600525856018066, "step": 2840 }, { "epoch": 0.3576638900654148, "grad_norm": 0.2171870470046997, "learning_rate": 3.3122008021736316e-05, "loss": 1.1198451042175293, "step": 2850 }, { "epoch": 0.3589188510831882, "grad_norm": 0.17714793980121613, "learning_rate": 3.305731659981886e-05, "loss": 1.0424016952514648, "step": 2860 }, { "epoch": 0.3601738121009616, "grad_norm": 0.17595025897026062, "learning_rate": 3.299262517790141e-05, "loss": 1.0837072372436523, "step": 2870 }, { "epoch": 0.361428773118735, "grad_norm": 0.1887323260307312, "learning_rate": 3.292793375598396e-05, "loss": 1.0680608749389648, "step": 2880 }, { "epoch": 0.3626837341365084, "grad_norm": 0.194399893283844, "learning_rate": 3.2863242334066503e-05, "loss": 0.9988933563232422, "step": 2890 }, { "epoch": 0.3639386951542818, "grad_norm": 0.1708306223154068, "learning_rate": 3.279855091214905e-05, "loss": 1.1282541275024414, "step": 2900 }, { "epoch": 0.3651936561720552, "grad_norm": 0.20244979858398438, "learning_rate": 3.27338594902316e-05, "loss": 1.1450789451599122, "step": 2910 }, { "epoch": 0.3664486171898285, "grad_norm": 0.1962517648935318, "learning_rate": 3.2669168068314144e-05, "loss": 1.0602560997009278, "step": 2920 }, { "epoch": 0.3677035782076019, "grad_norm": 0.20567139983177185, "learning_rate": 3.2604476646396684e-05, "loss": 1.1111217498779298, "step": 2930 }, { "epoch": 0.3689585392253753, "grad_norm": 0.20776300132274628, "learning_rate": 3.253978522447924e-05, "loss": 1.0344942092895508, "step": 2940 }, { "epoch": 0.3702135002431487, "grad_norm": 0.21871210634708405, "learning_rate": 3.2475093802561784e-05, "loss": 1.1111419677734375, "step": 2950 }, { "epoch": 0.3714684612609221, "grad_norm": 0.185679093003273, "learning_rate": 3.241040238064433e-05, "loss": 1.1700579643249511, "step": 2960 }, { "epoch": 0.3727234222786955, "grad_norm": 0.19544155895709991, "learning_rate": 3.234571095872688e-05, "loss": 1.0801254272460938, "step": 2970 }, { "epoch": 0.3739783832964689, "grad_norm": 0.20506669580936432, "learning_rate": 3.228101953680942e-05, "loss": 1.1814807891845702, "step": 2980 }, { "epoch": 0.3752333443142422, "grad_norm": 0.17181119322776794, "learning_rate": 3.221632811489197e-05, "loss": 1.0522710800170898, "step": 2990 }, { "epoch": 0.3764883053320156, "grad_norm": 0.23374006152153015, "learning_rate": 3.215163669297451e-05, "loss": 1.0902445793151856, "step": 3000 }, { "epoch": 0.377743266349789, "grad_norm": 0.21061059832572937, "learning_rate": 3.2086945271057064e-05, "loss": 1.1061354637145997, "step": 3010 }, { "epoch": 0.3789982273675624, "grad_norm": 0.18865549564361572, "learning_rate": 3.2022253849139604e-05, "loss": 1.090491485595703, "step": 3020 }, { "epoch": 0.3802531883853358, "grad_norm": 0.19853392243385315, "learning_rate": 3.195756242722215e-05, "loss": 1.1597092628479004, "step": 3030 }, { "epoch": 0.3815081494031092, "grad_norm": 0.1734134405851364, "learning_rate": 3.18928710053047e-05, "loss": 1.1168682098388671, "step": 3040 }, { "epoch": 0.38276311042088257, "grad_norm": 0.18935444951057434, "learning_rate": 3.1828179583387245e-05, "loss": 1.1608116149902343, "step": 3050 }, { "epoch": 0.38401807143865596, "grad_norm": 0.18997101485729218, "learning_rate": 3.176348816146979e-05, "loss": 1.094447135925293, "step": 3060 }, { "epoch": 0.3852730324564293, "grad_norm": 0.16133999824523926, "learning_rate": 3.169879673955234e-05, "loss": 1.1392805099487304, "step": 3070 }, { "epoch": 0.3865279934742027, "grad_norm": 0.19918449223041534, "learning_rate": 3.1634105317634885e-05, "loss": 1.1354280471801759, "step": 3080 }, { "epoch": 0.3877829544919761, "grad_norm": 0.17987719178199768, "learning_rate": 3.156941389571743e-05, "loss": 1.0349795341491699, "step": 3090 }, { "epoch": 0.3890379155097495, "grad_norm": 0.21309016644954681, "learning_rate": 3.150472247379997e-05, "loss": 1.1042888641357422, "step": 3100 }, { "epoch": 0.39029287652752287, "grad_norm": 0.17456930875778198, "learning_rate": 3.1440031051882525e-05, "loss": 1.1372068405151368, "step": 3110 }, { "epoch": 0.39154783754529626, "grad_norm": 0.17959140241146088, "learning_rate": 3.1375339629965065e-05, "loss": 1.0295021057128906, "step": 3120 }, { "epoch": 0.39280279856306966, "grad_norm": 0.21057923138141632, "learning_rate": 3.131064820804762e-05, "loss": 1.14412841796875, "step": 3130 }, { "epoch": 0.39405775958084305, "grad_norm": 0.18713043630123138, "learning_rate": 3.124595678613016e-05, "loss": 1.0776933670043944, "step": 3140 }, { "epoch": 0.3953127205986164, "grad_norm": 0.19994860887527466, "learning_rate": 3.1181265364212705e-05, "loss": 1.0793471336364746, "step": 3150 }, { "epoch": 0.3965676816163898, "grad_norm": 0.18249864876270294, "learning_rate": 3.111657394229525e-05, "loss": 1.061374568939209, "step": 3160 }, { "epoch": 0.39782264263416317, "grad_norm": 0.2102167159318924, "learning_rate": 3.10518825203778e-05, "loss": 1.0960093498229981, "step": 3170 }, { "epoch": 0.39907760365193656, "grad_norm": 0.20832930505275726, "learning_rate": 3.0987191098460346e-05, "loss": 1.033592987060547, "step": 3180 }, { "epoch": 0.40033256466970996, "grad_norm": 0.18993455171585083, "learning_rate": 3.092249967654289e-05, "loss": 1.1482149124145509, "step": 3190 }, { "epoch": 0.40158752568748335, "grad_norm": 0.16434840857982635, "learning_rate": 3.085780825462544e-05, "loss": 1.1257658004760742, "step": 3200 }, { "epoch": 0.40284248670525674, "grad_norm": 0.17940276861190796, "learning_rate": 3.0793116832707986e-05, "loss": 1.031651210784912, "step": 3210 }, { "epoch": 0.4040974477230301, "grad_norm": 0.20808145403862, "learning_rate": 3.0728425410790526e-05, "loss": 1.0851681709289551, "step": 3220 }, { "epoch": 0.40535240874080347, "grad_norm": 0.1987367421388626, "learning_rate": 3.066373398887308e-05, "loss": 1.1109742164611816, "step": 3230 }, { "epoch": 0.40660736975857686, "grad_norm": 0.19617575407028198, "learning_rate": 3.059904256695562e-05, "loss": 1.0537912368774414, "step": 3240 }, { "epoch": 0.40786233077635026, "grad_norm": 0.21639147400856018, "learning_rate": 3.053435114503817e-05, "loss": 1.125661563873291, "step": 3250 }, { "epoch": 0.40911729179412365, "grad_norm": 0.1862943321466446, "learning_rate": 3.0469659723120713e-05, "loss": 1.1534263610839843, "step": 3260 }, { "epoch": 0.41037225281189704, "grad_norm": 0.19426733255386353, "learning_rate": 3.0404968301203263e-05, "loss": 1.1007325172424316, "step": 3270 }, { "epoch": 0.41162721382967044, "grad_norm": 0.19233031570911407, "learning_rate": 3.0340276879285806e-05, "loss": 1.101203441619873, "step": 3280 }, { "epoch": 0.41288217484744383, "grad_norm": 0.1994284987449646, "learning_rate": 3.0275585457368356e-05, "loss": 1.1052302360534667, "step": 3290 }, { "epoch": 0.41413713586521717, "grad_norm": 0.2279525250196457, "learning_rate": 3.02108940354509e-05, "loss": 1.061030387878418, "step": 3300 }, { "epoch": 0.41539209688299056, "grad_norm": 0.17196883261203766, "learning_rate": 3.0146202613533446e-05, "loss": 0.9545375823974609, "step": 3310 }, { "epoch": 0.41664705790076395, "grad_norm": 0.2149975597858429, "learning_rate": 3.008151119161599e-05, "loss": 1.048507022857666, "step": 3320 }, { "epoch": 0.41790201891853734, "grad_norm": 0.16794604063034058, "learning_rate": 3.001681976969854e-05, "loss": 1.0982236862182617, "step": 3330 }, { "epoch": 0.41915697993631074, "grad_norm": 0.177035853266716, "learning_rate": 2.9952128347781083e-05, "loss": 1.0892048835754395, "step": 3340 }, { "epoch": 0.42041194095408413, "grad_norm": 0.19969920814037323, "learning_rate": 2.9887436925863633e-05, "loss": 1.0508790016174316, "step": 3350 }, { "epoch": 0.4216669019718575, "grad_norm": 0.1925683468580246, "learning_rate": 2.9822745503946177e-05, "loss": 1.115567111968994, "step": 3360 }, { "epoch": 0.42292186298963086, "grad_norm": 0.1786133497953415, "learning_rate": 2.9758054082028724e-05, "loss": 1.1230090141296387, "step": 3370 }, { "epoch": 0.42417682400740425, "grad_norm": 0.18185003101825714, "learning_rate": 2.9693362660111267e-05, "loss": 1.0770362854003905, "step": 3380 }, { "epoch": 0.42543178502517764, "grad_norm": 0.18131586909294128, "learning_rate": 2.9628671238193817e-05, "loss": 1.0566128730773925, "step": 3390 }, { "epoch": 0.42668674604295104, "grad_norm": 0.1889081597328186, "learning_rate": 2.9563979816276367e-05, "loss": 1.1312256813049317, "step": 3400 }, { "epoch": 0.42794170706072443, "grad_norm": 0.19797080755233765, "learning_rate": 2.949928839435891e-05, "loss": 1.097049617767334, "step": 3410 }, { "epoch": 0.4291966680784978, "grad_norm": 0.21253708004951477, "learning_rate": 2.9434596972441457e-05, "loss": 1.063642692565918, "step": 3420 }, { "epoch": 0.4304516290962712, "grad_norm": 0.18668265640735626, "learning_rate": 2.9369905550524e-05, "loss": 1.11463623046875, "step": 3430 }, { "epoch": 0.4317065901140446, "grad_norm": 0.1904267519712448, "learning_rate": 2.930521412860655e-05, "loss": 1.0638154029846192, "step": 3440 }, { "epoch": 0.43296155113181795, "grad_norm": 0.15511858463287354, "learning_rate": 2.9240522706689094e-05, "loss": 1.1070695877075196, "step": 3450 }, { "epoch": 0.43421651214959134, "grad_norm": 0.18738579750061035, "learning_rate": 2.9175831284771644e-05, "loss": 1.0907926559448242, "step": 3460 }, { "epoch": 0.43547147316736473, "grad_norm": 0.20306473970413208, "learning_rate": 2.9111139862854188e-05, "loss": 1.0509754180908204, "step": 3470 }, { "epoch": 0.4367264341851381, "grad_norm": 0.17330707609653473, "learning_rate": 2.9046448440936734e-05, "loss": 1.086292552947998, "step": 3480 }, { "epoch": 0.4379813952029115, "grad_norm": 0.21493610739707947, "learning_rate": 2.8981757019019278e-05, "loss": 1.0941463470458985, "step": 3490 }, { "epoch": 0.4392363562206849, "grad_norm": 0.20350311696529388, "learning_rate": 2.8917065597101828e-05, "loss": 1.1737398147583007, "step": 3500 }, { "epoch": 0.4404913172384583, "grad_norm": 0.19272877275943756, "learning_rate": 2.885237417518437e-05, "loss": 1.0502948760986328, "step": 3510 }, { "epoch": 0.44174627825623164, "grad_norm": 0.17556731402873993, "learning_rate": 2.878768275326692e-05, "loss": 1.103357982635498, "step": 3520 }, { "epoch": 0.44300123927400503, "grad_norm": 0.22516275942325592, "learning_rate": 2.8722991331349465e-05, "loss": 1.1097342491149902, "step": 3530 }, { "epoch": 0.4442562002917784, "grad_norm": 0.17185984551906586, "learning_rate": 2.865829990943201e-05, "loss": 1.0627781867980957, "step": 3540 }, { "epoch": 0.4455111613095518, "grad_norm": 0.20243440568447113, "learning_rate": 2.8593608487514555e-05, "loss": 1.083481216430664, "step": 3550 }, { "epoch": 0.4467661223273252, "grad_norm": 0.20377662777900696, "learning_rate": 2.8528917065597105e-05, "loss": 1.0517413139343261, "step": 3560 }, { "epoch": 0.4480210833450986, "grad_norm": 0.19456355273723602, "learning_rate": 2.846422564367965e-05, "loss": 1.0656819343566895, "step": 3570 }, { "epoch": 0.449276044362872, "grad_norm": 0.21682517230510712, "learning_rate": 2.83995342217622e-05, "loss": 1.0958752632141113, "step": 3580 }, { "epoch": 0.4505310053806454, "grad_norm": 0.19284336268901825, "learning_rate": 2.8334842799844742e-05, "loss": 1.0725554466247558, "step": 3590 }, { "epoch": 0.4517859663984187, "grad_norm": 0.1780819147825241, "learning_rate": 2.827015137792729e-05, "loss": 1.0786738395690918, "step": 3600 }, { "epoch": 0.4530409274161921, "grad_norm": 0.19883157312870026, "learning_rate": 2.8205459956009832e-05, "loss": 1.066401481628418, "step": 3610 }, { "epoch": 0.4542958884339655, "grad_norm": 0.1856444627046585, "learning_rate": 2.8140768534092382e-05, "loss": 1.1166013717651366, "step": 3620 }, { "epoch": 0.4555508494517389, "grad_norm": 0.2152242809534073, "learning_rate": 2.8076077112174925e-05, "loss": 1.0979412078857422, "step": 3630 }, { "epoch": 0.4568058104695123, "grad_norm": 0.19830797612667084, "learning_rate": 2.8011385690257476e-05, "loss": 1.1257426261901855, "step": 3640 }, { "epoch": 0.4580607714872857, "grad_norm": 0.2238456904888153, "learning_rate": 2.794669426834002e-05, "loss": 1.1249502182006836, "step": 3650 }, { "epoch": 0.4593157325050591, "grad_norm": 0.17598365247249603, "learning_rate": 2.7882002846422566e-05, "loss": 1.0131544113159179, "step": 3660 }, { "epoch": 0.4605706935228324, "grad_norm": 0.2041376829147339, "learning_rate": 2.781731142450511e-05, "loss": 1.0872151374816894, "step": 3670 }, { "epoch": 0.4618256545406058, "grad_norm": 0.20209568738937378, "learning_rate": 2.775262000258766e-05, "loss": 1.0570232391357421, "step": 3680 }, { "epoch": 0.4630806155583792, "grad_norm": 0.1764563024044037, "learning_rate": 2.7687928580670203e-05, "loss": 1.0938175201416016, "step": 3690 }, { "epoch": 0.4643355765761526, "grad_norm": 0.197507843375206, "learning_rate": 2.7623237158752753e-05, "loss": 1.0432971000671387, "step": 3700 }, { "epoch": 0.465590537593926, "grad_norm": 0.1874912828207016, "learning_rate": 2.7558545736835296e-05, "loss": 1.0999769210815429, "step": 3710 }, { "epoch": 0.4668454986116994, "grad_norm": 0.18974807858467102, "learning_rate": 2.7493854314917843e-05, "loss": 1.1539668083190917, "step": 3720 }, { "epoch": 0.4681004596294728, "grad_norm": 0.20754633843898773, "learning_rate": 2.7429162893000386e-05, "loss": 1.0792550086975097, "step": 3730 }, { "epoch": 0.46935542064724617, "grad_norm": 0.18311122059822083, "learning_rate": 2.7364471471082936e-05, "loss": 1.0288339614868165, "step": 3740 }, { "epoch": 0.4706103816650195, "grad_norm": 0.21719680726528168, "learning_rate": 2.729978004916548e-05, "loss": 1.0714326858520509, "step": 3750 }, { "epoch": 0.4718653426827929, "grad_norm": 0.2074572890996933, "learning_rate": 2.723508862724803e-05, "loss": 1.0084345817565918, "step": 3760 }, { "epoch": 0.4731203037005663, "grad_norm": 0.1585981547832489, "learning_rate": 2.7170397205330573e-05, "loss": 1.065823745727539, "step": 3770 }, { "epoch": 0.4743752647183397, "grad_norm": 0.1950136125087738, "learning_rate": 2.710570578341312e-05, "loss": 1.0472920417785645, "step": 3780 }, { "epoch": 0.4756302257361131, "grad_norm": 0.20094197988510132, "learning_rate": 2.7041014361495663e-05, "loss": 1.0385844230651855, "step": 3790 }, { "epoch": 0.47688518675388647, "grad_norm": 0.18639400601387024, "learning_rate": 2.6976322939578213e-05, "loss": 1.065108299255371, "step": 3800 }, { "epoch": 0.47814014777165986, "grad_norm": 0.190430149435997, "learning_rate": 2.6911631517660757e-05, "loss": 1.0249187469482421, "step": 3810 }, { "epoch": 0.47939510878943326, "grad_norm": 0.19305895268917084, "learning_rate": 2.6846940095743307e-05, "loss": 1.0661443710327148, "step": 3820 }, { "epoch": 0.4806500698072066, "grad_norm": 0.1928487867116928, "learning_rate": 2.678224867382585e-05, "loss": 1.0664926528930665, "step": 3830 }, { "epoch": 0.48190503082498, "grad_norm": 0.19009914994239807, "learning_rate": 2.6717557251908397e-05, "loss": 1.0212774276733398, "step": 3840 }, { "epoch": 0.4831599918427534, "grad_norm": 0.18818970024585724, "learning_rate": 2.665286582999094e-05, "loss": 1.0675536155700684, "step": 3850 }, { "epoch": 0.48441495286052677, "grad_norm": 0.19587545096874237, "learning_rate": 2.658817440807349e-05, "loss": 1.1134575843811034, "step": 3860 }, { "epoch": 0.48566991387830016, "grad_norm": 0.1728365272283554, "learning_rate": 2.652348298615604e-05, "loss": 1.0674519538879395, "step": 3870 }, { "epoch": 0.48692487489607356, "grad_norm": 0.18507780134677887, "learning_rate": 2.6458791564238584e-05, "loss": 1.0612947463989257, "step": 3880 }, { "epoch": 0.48817983591384695, "grad_norm": 0.17732012271881104, "learning_rate": 2.639410014232113e-05, "loss": 1.1351598739624023, "step": 3890 }, { "epoch": 0.4894347969316203, "grad_norm": 0.1905921846628189, "learning_rate": 2.6329408720403674e-05, "loss": 1.105655574798584, "step": 3900 }, { "epoch": 0.4906897579493937, "grad_norm": 0.1902066469192505, "learning_rate": 2.6264717298486224e-05, "loss": 1.1006397247314452, "step": 3910 }, { "epoch": 0.49194471896716707, "grad_norm": 0.16983291506767273, "learning_rate": 2.6200025876568768e-05, "loss": 1.0307989120483398, "step": 3920 }, { "epoch": 0.49319967998494046, "grad_norm": 0.16205719113349915, "learning_rate": 2.6135334454651318e-05, "loss": 1.1160860061645508, "step": 3930 }, { "epoch": 0.49445464100271386, "grad_norm": 0.16868488490581512, "learning_rate": 2.607064303273386e-05, "loss": 1.1177806854248047, "step": 3940 }, { "epoch": 0.49570960202048725, "grad_norm": 0.19134441018104553, "learning_rate": 2.6005951610816408e-05, "loss": 1.0095656394958497, "step": 3950 }, { "epoch": 0.49696456303826064, "grad_norm": 0.17027780413627625, "learning_rate": 2.594126018889895e-05, "loss": 1.1302468299865722, "step": 3960 }, { "epoch": 0.49821952405603404, "grad_norm": 0.16149652004241943, "learning_rate": 2.58765687669815e-05, "loss": 1.0545706748962402, "step": 3970 }, { "epoch": 0.4994744850738074, "grad_norm": 0.1791815310716629, "learning_rate": 2.5811877345064045e-05, "loss": 1.0691394805908203, "step": 3980 }, { "epoch": 0.5007294460915808, "grad_norm": 0.19383347034454346, "learning_rate": 2.5747185923146595e-05, "loss": 1.071311855316162, "step": 3990 }, { "epoch": 0.5019844071093542, "grad_norm": 0.20299892127513885, "learning_rate": 2.5682494501229138e-05, "loss": 1.0849023818969727, "step": 4000 }, { "epoch": 0.5032393681271276, "grad_norm": 0.18995410203933716, "learning_rate": 2.5617803079311685e-05, "loss": 1.0594166755676269, "step": 4010 }, { "epoch": 0.5044943291449009, "grad_norm": 0.22304005920886993, "learning_rate": 2.5553111657394228e-05, "loss": 1.0542009353637696, "step": 4020 }, { "epoch": 0.5057492901626743, "grad_norm": 0.22307968139648438, "learning_rate": 2.548842023547678e-05, "loss": 1.107966136932373, "step": 4030 }, { "epoch": 0.5070042511804477, "grad_norm": 0.1840265691280365, "learning_rate": 2.5423728813559322e-05, "loss": 1.1096989631652832, "step": 4040 }, { "epoch": 0.5082592121982211, "grad_norm": 0.21252040565013885, "learning_rate": 2.5359037391641872e-05, "loss": 1.0967977523803711, "step": 4050 }, { "epoch": 0.5095141732159945, "grad_norm": 0.18273372948169708, "learning_rate": 2.5294345969724415e-05, "loss": 1.018519401550293, "step": 4060 }, { "epoch": 0.5107691342337679, "grad_norm": 0.18331177532672882, "learning_rate": 2.5229654547806962e-05, "loss": 1.049039077758789, "step": 4070 }, { "epoch": 0.5120240952515412, "grad_norm": 0.19110405445098877, "learning_rate": 2.5164963125889505e-05, "loss": 1.0827875137329102, "step": 4080 }, { "epoch": 0.5132790562693146, "grad_norm": 0.16529154777526855, "learning_rate": 2.5100271703972055e-05, "loss": 1.0904165267944337, "step": 4090 }, { "epoch": 0.514534017287088, "grad_norm": 0.17822785675525665, "learning_rate": 2.50355802820546e-05, "loss": 1.0597058296203614, "step": 4100 }, { "epoch": 0.5157889783048614, "grad_norm": 0.1769823282957077, "learning_rate": 2.497088886013715e-05, "loss": 1.0827682495117188, "step": 4110 }, { "epoch": 0.5170439393226348, "grad_norm": 0.19942662119865417, "learning_rate": 2.4906197438219696e-05, "loss": 1.0987505912780762, "step": 4120 }, { "epoch": 0.5182989003404082, "grad_norm": 0.19398993253707886, "learning_rate": 2.484150601630224e-05, "loss": 1.0218748092651366, "step": 4130 }, { "epoch": 0.5195538613581816, "grad_norm": 0.21403302252292633, "learning_rate": 2.4776814594384786e-05, "loss": 1.1508358001708985, "step": 4140 }, { "epoch": 0.5208088223759549, "grad_norm": 0.1951906532049179, "learning_rate": 2.4712123172467333e-05, "loss": 1.1334738731384277, "step": 4150 }, { "epoch": 0.5220637833937284, "grad_norm": 0.18993432819843292, "learning_rate": 2.464743175054988e-05, "loss": 1.0698814392089844, "step": 4160 }, { "epoch": 0.5233187444115017, "grad_norm": 0.2009001225233078, "learning_rate": 2.4582740328632426e-05, "loss": 1.1019264221191407, "step": 4170 }, { "epoch": 0.5245737054292751, "grad_norm": 0.1592164784669876, "learning_rate": 2.4518048906714973e-05, "loss": 1.0502429008483887, "step": 4180 }, { "epoch": 0.5258286664470485, "grad_norm": 0.20943738520145416, "learning_rate": 2.4453357484797516e-05, "loss": 1.1068140029907227, "step": 4190 }, { "epoch": 0.5270836274648218, "grad_norm": 0.1841343641281128, "learning_rate": 2.4388666062880063e-05, "loss": 1.0863500595092774, "step": 4200 }, { "epoch": 0.5283385884825953, "grad_norm": 0.21408788859844208, "learning_rate": 2.432397464096261e-05, "loss": 1.0539629936218262, "step": 4210 }, { "epoch": 0.5295935495003686, "grad_norm": 0.2031819224357605, "learning_rate": 2.4259283219045156e-05, "loss": 1.0763005256652831, "step": 4220 }, { "epoch": 0.5308485105181421, "grad_norm": 0.1948988288640976, "learning_rate": 2.4194591797127703e-05, "loss": 1.1096826553344727, "step": 4230 }, { "epoch": 0.5321034715359154, "grad_norm": 0.19453151524066925, "learning_rate": 2.412990037521025e-05, "loss": 1.0530555725097657, "step": 4240 }, { "epoch": 0.5333584325536888, "grad_norm": 0.22539453208446503, "learning_rate": 2.4065208953292793e-05, "loss": 1.0634571075439454, "step": 4250 }, { "epoch": 0.5346133935714622, "grad_norm": 0.2108486145734787, "learning_rate": 2.400051753137534e-05, "loss": 1.0783962249755858, "step": 4260 }, { "epoch": 0.5358683545892355, "grad_norm": 0.22550557553768158, "learning_rate": 2.3935826109457887e-05, "loss": 1.0793485641479492, "step": 4270 }, { "epoch": 0.537123315607009, "grad_norm": 0.18710452318191528, "learning_rate": 2.3871134687540433e-05, "loss": 1.0253171920776367, "step": 4280 }, { "epoch": 0.5383782766247823, "grad_norm": 0.18738947808742523, "learning_rate": 2.380644326562298e-05, "loss": 1.0552565574645996, "step": 4290 }, { "epoch": 0.5396332376425558, "grad_norm": 0.19568362832069397, "learning_rate": 2.3741751843705527e-05, "loss": 1.0678536415100097, "step": 4300 }, { "epoch": 0.5408881986603291, "grad_norm": 0.16987361013889313, "learning_rate": 2.3677060421788074e-05, "loss": 1.1124211311340333, "step": 4310 }, { "epoch": 0.5421431596781024, "grad_norm": 0.2189544439315796, "learning_rate": 2.3612368999870617e-05, "loss": 1.0600645065307617, "step": 4320 }, { "epoch": 0.5433981206958759, "grad_norm": 0.17936541140079498, "learning_rate": 2.3547677577953164e-05, "loss": 1.0915284156799316, "step": 4330 }, { "epoch": 0.5446530817136492, "grad_norm": 0.19402658939361572, "learning_rate": 2.348298615603571e-05, "loss": 1.0507197380065918, "step": 4340 }, { "epoch": 0.5459080427314227, "grad_norm": 0.23639065027236938, "learning_rate": 2.3418294734118257e-05, "loss": 1.1000999450683593, "step": 4350 }, { "epoch": 0.547163003749196, "grad_norm": 0.1931799352169037, "learning_rate": 2.3353603312200804e-05, "loss": 1.0721006393432617, "step": 4360 }, { "epoch": 0.5484179647669695, "grad_norm": 0.20396549999713898, "learning_rate": 2.328891189028335e-05, "loss": 1.1172670364379882, "step": 4370 }, { "epoch": 0.5496729257847428, "grad_norm": 0.2059352695941925, "learning_rate": 2.3224220468365894e-05, "loss": 1.070617389678955, "step": 4380 }, { "epoch": 0.5509278868025161, "grad_norm": 0.2139720320701599, "learning_rate": 2.315952904644844e-05, "loss": 1.0570523262023925, "step": 4390 }, { "epoch": 0.5521828478202896, "grad_norm": 0.16804735362529755, "learning_rate": 2.3094837624530988e-05, "loss": 1.0962253570556642, "step": 4400 }, { "epoch": 0.5534378088380629, "grad_norm": 0.19438262283802032, "learning_rate": 2.3030146202613534e-05, "loss": 1.052304172515869, "step": 4410 }, { "epoch": 0.5546927698558364, "grad_norm": 0.1669342964887619, "learning_rate": 2.296545478069608e-05, "loss": 0.9899564743041992, "step": 4420 }, { "epoch": 0.5559477308736097, "grad_norm": 0.19921936094760895, "learning_rate": 2.2900763358778628e-05, "loss": 1.143895435333252, "step": 4430 }, { "epoch": 0.5572026918913832, "grad_norm": 0.18776285648345947, "learning_rate": 2.283607193686117e-05, "loss": 1.1010767936706543, "step": 4440 }, { "epoch": 0.5584576529091565, "grad_norm": 0.19864770770072937, "learning_rate": 2.2771380514943718e-05, "loss": 1.1463205337524414, "step": 4450 }, { "epoch": 0.55971261392693, "grad_norm": 0.251132607460022, "learning_rate": 2.2706689093026265e-05, "loss": 1.1078590393066405, "step": 4460 }, { "epoch": 0.5609675749447033, "grad_norm": 0.18442977964878082, "learning_rate": 2.264199767110881e-05, "loss": 1.0865904808044433, "step": 4470 }, { "epoch": 0.5622225359624766, "grad_norm": 0.19264988601207733, "learning_rate": 2.2577306249191358e-05, "loss": 1.098098087310791, "step": 4480 }, { "epoch": 0.5634774969802501, "grad_norm": 0.22054506838321686, "learning_rate": 2.2512614827273905e-05, "loss": 1.1162951469421387, "step": 4490 }, { "epoch": 0.5647324579980234, "grad_norm": 0.18666113913059235, "learning_rate": 2.244792340535645e-05, "loss": 0.9729397773742676, "step": 4500 }, { "epoch": 0.5659874190157969, "grad_norm": 0.19067735970020294, "learning_rate": 2.2383231983438995e-05, "loss": 1.0903347969055175, "step": 4510 }, { "epoch": 0.5672423800335702, "grad_norm": 0.18109118938446045, "learning_rate": 2.2318540561521542e-05, "loss": 1.063498592376709, "step": 4520 }, { "epoch": 0.5684973410513436, "grad_norm": 0.18894648551940918, "learning_rate": 2.225384913960409e-05, "loss": 1.0704896926879883, "step": 4530 }, { "epoch": 0.569752302069117, "grad_norm": 0.1963120698928833, "learning_rate": 2.218915771768664e-05, "loss": 1.0621774673461915, "step": 4540 }, { "epoch": 0.5710072630868903, "grad_norm": 0.22396929562091827, "learning_rate": 2.2124466295769182e-05, "loss": 1.084723663330078, "step": 4550 }, { "epoch": 0.5722622241046638, "grad_norm": 0.1798727661371231, "learning_rate": 2.205977487385173e-05, "loss": 1.05871639251709, "step": 4560 }, { "epoch": 0.5735171851224371, "grad_norm": 0.1864812821149826, "learning_rate": 2.1995083451934276e-05, "loss": 1.0498836517333985, "step": 4570 }, { "epoch": 0.5747721461402105, "grad_norm": 0.19118115305900574, "learning_rate": 2.1930392030016822e-05, "loss": 1.099202823638916, "step": 4580 }, { "epoch": 0.5760271071579839, "grad_norm": 0.21500501036643982, "learning_rate": 2.186570060809937e-05, "loss": 1.1167634010314942, "step": 4590 }, { "epoch": 0.5772820681757573, "grad_norm": 0.22220079600811005, "learning_rate": 2.1801009186181916e-05, "loss": 1.1403026580810547, "step": 4600 }, { "epoch": 0.5785370291935307, "grad_norm": 0.1848171502351761, "learning_rate": 2.173631776426446e-05, "loss": 1.0590831756591796, "step": 4610 }, { "epoch": 0.579791990211304, "grad_norm": 0.20435841381549835, "learning_rate": 2.1671626342347006e-05, "loss": 1.090967559814453, "step": 4620 }, { "epoch": 0.5810469512290775, "grad_norm": 0.1693217009305954, "learning_rate": 2.1606934920429553e-05, "loss": 1.066522216796875, "step": 4630 }, { "epoch": 0.5823019122468508, "grad_norm": 0.18368832767009735, "learning_rate": 2.15422434985121e-05, "loss": 1.0151689529418946, "step": 4640 }, { "epoch": 0.5835568732646242, "grad_norm": 0.18117396533489227, "learning_rate": 2.1477552076594646e-05, "loss": 1.1592654228210448, "step": 4650 }, { "epoch": 0.5848118342823976, "grad_norm": 0.17642000317573547, "learning_rate": 2.1412860654677193e-05, "loss": 1.0778505325317382, "step": 4660 }, { "epoch": 0.586066795300171, "grad_norm": 0.1876673847436905, "learning_rate": 2.1348169232759736e-05, "loss": 1.077680492401123, "step": 4670 }, { "epoch": 0.5873217563179444, "grad_norm": 0.20584918558597565, "learning_rate": 2.1283477810842283e-05, "loss": 1.0664028167724608, "step": 4680 }, { "epoch": 0.5885767173357178, "grad_norm": 0.18888245522975922, "learning_rate": 2.121878638892483e-05, "loss": 1.044777774810791, "step": 4690 }, { "epoch": 0.5898316783534912, "grad_norm": 0.18159054219722748, "learning_rate": 2.1154094967007376e-05, "loss": 1.140822982788086, "step": 4700 }, { "epoch": 0.5910866393712645, "grad_norm": 0.209550142288208, "learning_rate": 2.1089403545089923e-05, "loss": 1.1214600563049317, "step": 4710 }, { "epoch": 0.5923416003890379, "grad_norm": 0.19829699397087097, "learning_rate": 2.102471212317247e-05, "loss": 1.0791502952575684, "step": 4720 }, { "epoch": 0.5935965614068113, "grad_norm": 0.18692730367183685, "learning_rate": 2.0960020701255013e-05, "loss": 1.083406352996826, "step": 4730 }, { "epoch": 0.5948515224245847, "grad_norm": 0.203223317861557, "learning_rate": 2.089532927933756e-05, "loss": 1.1177435874938966, "step": 4740 }, { "epoch": 0.5961064834423581, "grad_norm": 0.18921977281570435, "learning_rate": 2.0830637857420107e-05, "loss": 1.1003907203674317, "step": 4750 }, { "epoch": 0.5973614444601315, "grad_norm": 0.21286743879318237, "learning_rate": 2.0765946435502654e-05, "loss": 1.07603759765625, "step": 4760 }, { "epoch": 0.5986164054779048, "grad_norm": 0.20326635241508484, "learning_rate": 2.07012550135852e-05, "loss": 1.0025395393371581, "step": 4770 }, { "epoch": 0.5998713664956782, "grad_norm": 0.19325220584869385, "learning_rate": 2.0636563591667747e-05, "loss": 1.119535255432129, "step": 4780 }, { "epoch": 0.6011263275134516, "grad_norm": 0.19274058938026428, "learning_rate": 2.057187216975029e-05, "loss": 1.073643970489502, "step": 4790 }, { "epoch": 0.602381288531225, "grad_norm": 0.1856803596019745, "learning_rate": 2.0507180747832837e-05, "loss": 1.0840465545654296, "step": 4800 }, { "epoch": 0.6036362495489984, "grad_norm": 0.17938411235809326, "learning_rate": 2.0442489325915384e-05, "loss": 0.9779937744140625, "step": 4810 }, { "epoch": 0.6048912105667718, "grad_norm": 0.22091540694236755, "learning_rate": 2.037779790399793e-05, "loss": 1.023126220703125, "step": 4820 }, { "epoch": 0.6061461715845452, "grad_norm": 0.16854438185691833, "learning_rate": 2.0313106482080477e-05, "loss": 1.0393505096435547, "step": 4830 }, { "epoch": 0.6074011326023185, "grad_norm": 0.18858060240745544, "learning_rate": 2.0248415060163024e-05, "loss": 1.0123619079589843, "step": 4840 }, { "epoch": 0.6086560936200919, "grad_norm": 0.1751975268125534, "learning_rate": 2.0183723638245568e-05, "loss": 1.1000499725341797, "step": 4850 }, { "epoch": 0.6099110546378653, "grad_norm": 0.21609099209308624, "learning_rate": 2.0119032216328114e-05, "loss": 1.0882283210754395, "step": 4860 }, { "epoch": 0.6111660156556387, "grad_norm": 0.18892847001552582, "learning_rate": 2.005434079441066e-05, "loss": 1.0862364768981934, "step": 4870 }, { "epoch": 0.6124209766734121, "grad_norm": 0.16767629981040955, "learning_rate": 1.9989649372493208e-05, "loss": 1.104989719390869, "step": 4880 }, { "epoch": 0.6136759376911854, "grad_norm": 0.16860723495483398, "learning_rate": 1.9924957950575755e-05, "loss": 1.1031203269958496, "step": 4890 }, { "epoch": 0.6149308987089589, "grad_norm": 0.22763581573963165, "learning_rate": 1.98602665286583e-05, "loss": 1.0784507751464845, "step": 4900 }, { "epoch": 0.6161858597267322, "grad_norm": 0.1832362413406372, "learning_rate": 1.9795575106740845e-05, "loss": 1.139822292327881, "step": 4910 }, { "epoch": 0.6174408207445056, "grad_norm": 0.18726925551891327, "learning_rate": 1.973088368482339e-05, "loss": 1.0816995620727539, "step": 4920 }, { "epoch": 0.618695781762279, "grad_norm": 0.21192297339439392, "learning_rate": 1.9666192262905938e-05, "loss": 1.0300673484802245, "step": 4930 }, { "epoch": 0.6199507427800524, "grad_norm": 0.18673470616340637, "learning_rate": 1.9601500840988485e-05, "loss": 1.0630935668945312, "step": 4940 }, { "epoch": 0.6212057037978258, "grad_norm": 0.19416026771068573, "learning_rate": 1.953680941907103e-05, "loss": 1.0864192962646484, "step": 4950 }, { "epoch": 0.6224606648155991, "grad_norm": 0.17337632179260254, "learning_rate": 1.947211799715358e-05, "loss": 1.0580739974975586, "step": 4960 }, { "epoch": 0.6237156258333726, "grad_norm": 0.1994207203388214, "learning_rate": 1.9407426575236125e-05, "loss": 1.0788381576538086, "step": 4970 }, { "epoch": 0.6249705868511459, "grad_norm": 0.1950082629919052, "learning_rate": 1.934273515331867e-05, "loss": 1.0657068252563477, "step": 4980 }, { "epoch": 0.6262255478689194, "grad_norm": 0.19410517811775208, "learning_rate": 1.9278043731401215e-05, "loss": 0.9977058410644531, "step": 4990 }, { "epoch": 0.6274805088866927, "grad_norm": 0.18865294754505157, "learning_rate": 1.9213352309483765e-05, "loss": 1.0384069442749024, "step": 5000 }, { "epoch": 0.628735469904466, "grad_norm": 0.20763295888900757, "learning_rate": 1.9148660887566312e-05, "loss": 1.036677646636963, "step": 5010 }, { "epoch": 0.6299904309222395, "grad_norm": 0.20186074078083038, "learning_rate": 1.9083969465648855e-05, "loss": 1.0434916496276856, "step": 5020 }, { "epoch": 0.6312453919400128, "grad_norm": 0.20196175575256348, "learning_rate": 1.9019278043731402e-05, "loss": 1.0678818702697754, "step": 5030 }, { "epoch": 0.6325003529577863, "grad_norm": 0.18434342741966248, "learning_rate": 1.895458662181395e-05, "loss": 0.9929670333862305, "step": 5040 }, { "epoch": 0.6337553139755596, "grad_norm": 0.17742949724197388, "learning_rate": 1.8889895199896496e-05, "loss": 1.0131173133850098, "step": 5050 }, { "epoch": 0.6350102749933331, "grad_norm": 0.1894538849592209, "learning_rate": 1.8825203777979042e-05, "loss": 0.9884425163269043, "step": 5060 }, { "epoch": 0.6362652360111064, "grad_norm": 0.19760024547576904, "learning_rate": 1.876051235606159e-05, "loss": 1.1242119789123535, "step": 5070 }, { "epoch": 0.6375201970288797, "grad_norm": 0.21608242392539978, "learning_rate": 1.8695820934144136e-05, "loss": 1.0201186180114745, "step": 5080 }, { "epoch": 0.6387751580466532, "grad_norm": 0.19353747367858887, "learning_rate": 1.863112951222668e-05, "loss": 1.0742148399353026, "step": 5090 }, { "epoch": 0.6400301190644265, "grad_norm": 0.1773729920387268, "learning_rate": 1.8566438090309226e-05, "loss": 1.0668268203735352, "step": 5100 }, { "epoch": 0.6412850800822, "grad_norm": 0.16842322051525116, "learning_rate": 1.8501746668391773e-05, "loss": 1.111803913116455, "step": 5110 }, { "epoch": 0.6425400410999733, "grad_norm": 0.17250721156597137, "learning_rate": 1.843705524647432e-05, "loss": 1.0860756874084472, "step": 5120 }, { "epoch": 0.6437950021177468, "grad_norm": 0.20766721665859222, "learning_rate": 1.8372363824556866e-05, "loss": 1.0899797439575196, "step": 5130 }, { "epoch": 0.6450499631355201, "grad_norm": 0.16800430417060852, "learning_rate": 1.8307672402639413e-05, "loss": 1.0710078239440919, "step": 5140 }, { "epoch": 0.6463049241532934, "grad_norm": 0.19757574796676636, "learning_rate": 1.8242980980721956e-05, "loss": 1.1292811393737794, "step": 5150 }, { "epoch": 0.6475598851710669, "grad_norm": 0.20521418750286102, "learning_rate": 1.8178289558804503e-05, "loss": 1.0243899345397949, "step": 5160 }, { "epoch": 0.6488148461888402, "grad_norm": 0.16537714004516602, "learning_rate": 1.811359813688705e-05, "loss": 1.057924747467041, "step": 5170 }, { "epoch": 0.6500698072066137, "grad_norm": 0.18862581253051758, "learning_rate": 1.8048906714969597e-05, "loss": 0.996666145324707, "step": 5180 }, { "epoch": 0.651324768224387, "grad_norm": 0.17373378574848175, "learning_rate": 1.7984215293052143e-05, "loss": 1.0661702156066895, "step": 5190 }, { "epoch": 0.6525797292421605, "grad_norm": 0.20793020725250244, "learning_rate": 1.791952387113469e-05, "loss": 1.081492805480957, "step": 5200 }, { "epoch": 0.6538346902599338, "grad_norm": 0.1953442394733429, "learning_rate": 1.7854832449217233e-05, "loss": 1.0730542182922362, "step": 5210 }, { "epoch": 0.6550896512777072, "grad_norm": 0.22261761128902435, "learning_rate": 1.779014102729978e-05, "loss": 1.0947938919067384, "step": 5220 }, { "epoch": 0.6563446122954806, "grad_norm": 0.18047408759593964, "learning_rate": 1.7725449605382327e-05, "loss": 1.1488112449645995, "step": 5230 }, { "epoch": 0.6575995733132539, "grad_norm": 0.21739941835403442, "learning_rate": 1.7660758183464874e-05, "loss": 1.0836384773254395, "step": 5240 }, { "epoch": 0.6588545343310274, "grad_norm": 0.16749468445777893, "learning_rate": 1.759606676154742e-05, "loss": 1.0533074378967284, "step": 5250 }, { "epoch": 0.6601094953488007, "grad_norm": 0.19696615636348724, "learning_rate": 1.7531375339629967e-05, "loss": 1.0351654052734376, "step": 5260 }, { "epoch": 0.6613644563665741, "grad_norm": 0.206893190741539, "learning_rate": 1.746668391771251e-05, "loss": 1.03566312789917, "step": 5270 }, { "epoch": 0.6626194173843475, "grad_norm": 0.21252737939357758, "learning_rate": 1.7401992495795057e-05, "loss": 1.098302173614502, "step": 5280 }, { "epoch": 0.6638743784021209, "grad_norm": 0.18752658367156982, "learning_rate": 1.7337301073877604e-05, "loss": 1.1029739379882812, "step": 5290 }, { "epoch": 0.6651293394198943, "grad_norm": 0.1916881799697876, "learning_rate": 1.727260965196015e-05, "loss": 1.0457392692565919, "step": 5300 }, { "epoch": 0.6663843004376676, "grad_norm": 0.16342513263225555, "learning_rate": 1.7207918230042698e-05, "loss": 1.0347809791564941, "step": 5310 }, { "epoch": 0.6676392614554411, "grad_norm": 0.1860560029745102, "learning_rate": 1.7143226808125244e-05, "loss": 1.0950869560241698, "step": 5320 }, { "epoch": 0.6688942224732144, "grad_norm": 0.19112198054790497, "learning_rate": 1.7078535386207788e-05, "loss": 1.1099298477172852, "step": 5330 }, { "epoch": 0.6701491834909878, "grad_norm": 0.18603506684303284, "learning_rate": 1.7013843964290334e-05, "loss": 1.0756458282470702, "step": 5340 }, { "epoch": 0.6714041445087612, "grad_norm": 0.1958581954240799, "learning_rate": 1.694915254237288e-05, "loss": 1.1287766456604005, "step": 5350 }, { "epoch": 0.6726591055265346, "grad_norm": 0.1939549297094345, "learning_rate": 1.6884461120455428e-05, "loss": 1.1160104751586915, "step": 5360 }, { "epoch": 0.673914066544308, "grad_norm": 0.17285564541816711, "learning_rate": 1.6819769698537975e-05, "loss": 1.0963159561157227, "step": 5370 }, { "epoch": 0.6751690275620813, "grad_norm": 0.1727699637413025, "learning_rate": 1.675507827662052e-05, "loss": 1.139615821838379, "step": 5380 }, { "epoch": 0.6764239885798548, "grad_norm": 0.19430099427700043, "learning_rate": 1.6690386854703065e-05, "loss": 1.0105334281921388, "step": 5390 }, { "epoch": 0.6776789495976281, "grad_norm": 0.1681535243988037, "learning_rate": 1.662569543278561e-05, "loss": 1.0578609466552735, "step": 5400 }, { "epoch": 0.6789339106154015, "grad_norm": 0.18775980174541473, "learning_rate": 1.6561004010868158e-05, "loss": 1.0771096229553223, "step": 5410 }, { "epoch": 0.6801888716331749, "grad_norm": 0.24977093935012817, "learning_rate": 1.6496312588950705e-05, "loss": 1.0842226028442383, "step": 5420 }, { "epoch": 0.6814438326509483, "grad_norm": 0.21266375482082367, "learning_rate": 1.6431621167033252e-05, "loss": 1.01865816116333, "step": 5430 }, { "epoch": 0.6826987936687217, "grad_norm": 0.20400455594062805, "learning_rate": 1.63669297451158e-05, "loss": 1.0554264068603516, "step": 5440 }, { "epoch": 0.683953754686495, "grad_norm": 0.19339953362941742, "learning_rate": 1.6302238323198342e-05, "loss": 1.0870820045471192, "step": 5450 }, { "epoch": 0.6852087157042684, "grad_norm": 0.20825320482254028, "learning_rate": 1.6237546901280892e-05, "loss": 1.1114639282226562, "step": 5460 }, { "epoch": 0.6864636767220418, "grad_norm": 0.184986412525177, "learning_rate": 1.617285547936344e-05, "loss": 1.1310782432556152, "step": 5470 }, { "epoch": 0.6877186377398152, "grad_norm": 0.1939050555229187, "learning_rate": 1.6108164057445985e-05, "loss": 1.1249576568603517, "step": 5480 }, { "epoch": 0.6889735987575886, "grad_norm": 0.18793314695358276, "learning_rate": 1.6043472635528532e-05, "loss": 1.0266096115112304, "step": 5490 }, { "epoch": 0.690228559775362, "grad_norm": 0.18402604758739471, "learning_rate": 1.5978781213611076e-05, "loss": 1.020193386077881, "step": 5500 }, { "epoch": 0.6914835207931354, "grad_norm": 0.1962927132844925, "learning_rate": 1.5914089791693622e-05, "loss": 1.12153902053833, "step": 5510 }, { "epoch": 0.6927384818109088, "grad_norm": 0.21442580223083496, "learning_rate": 1.584939836977617e-05, "loss": 1.076151180267334, "step": 5520 }, { "epoch": 0.6939934428286821, "grad_norm": 0.18974730372428894, "learning_rate": 1.5784706947858716e-05, "loss": 1.0637462615966797, "step": 5530 }, { "epoch": 0.6952484038464555, "grad_norm": 0.19226610660552979, "learning_rate": 1.5720015525941263e-05, "loss": 1.0863216400146485, "step": 5540 }, { "epoch": 0.6965033648642289, "grad_norm": 0.18693000078201294, "learning_rate": 1.565532410402381e-05, "loss": 1.050516128540039, "step": 5550 }, { "epoch": 0.6977583258820023, "grad_norm": 0.2240767627954483, "learning_rate": 1.5590632682106353e-05, "loss": 1.1396910667419433, "step": 5560 }, { "epoch": 0.6990132868997757, "grad_norm": 0.24488072097301483, "learning_rate": 1.55259412601889e-05, "loss": 1.029798984527588, "step": 5570 }, { "epoch": 0.700268247917549, "grad_norm": 0.1701984703540802, "learning_rate": 1.5461249838271446e-05, "loss": 1.0401766777038575, "step": 5580 }, { "epoch": 0.7015232089353225, "grad_norm": 0.17540504038333893, "learning_rate": 1.5396558416353993e-05, "loss": 1.0871668815612794, "step": 5590 }, { "epoch": 0.7027781699530958, "grad_norm": 0.212332084774971, "learning_rate": 1.533186699443654e-05, "loss": 1.0655070304870606, "step": 5600 }, { "epoch": 0.7040331309708692, "grad_norm": 0.2142452895641327, "learning_rate": 1.5267175572519086e-05, "loss": 1.0310181617736816, "step": 5610 }, { "epoch": 0.7052880919886426, "grad_norm": 0.2183569073677063, "learning_rate": 1.5202484150601631e-05, "loss": 1.0585816383361817, "step": 5620 }, { "epoch": 0.706543053006416, "grad_norm": 0.2261342704296112, "learning_rate": 1.5137792728684178e-05, "loss": 1.0550565719604492, "step": 5630 }, { "epoch": 0.7077980140241894, "grad_norm": 0.20529380440711975, "learning_rate": 1.5073101306766723e-05, "loss": 1.0414663314819337, "step": 5640 }, { "epoch": 0.7090529750419627, "grad_norm": 0.19481569528579712, "learning_rate": 1.500840988484927e-05, "loss": 1.0689180374145508, "step": 5650 }, { "epoch": 0.7103079360597362, "grad_norm": 0.18099959194660187, "learning_rate": 1.4943718462931817e-05, "loss": 1.0106795310974122, "step": 5660 }, { "epoch": 0.7115628970775095, "grad_norm": 0.1786813735961914, "learning_rate": 1.4879027041014362e-05, "loss": 1.071911334991455, "step": 5670 }, { "epoch": 0.7128178580952829, "grad_norm": 0.17721523344516754, "learning_rate": 1.4814335619096909e-05, "loss": 1.0632891654968262, "step": 5680 }, { "epoch": 0.7140728191130563, "grad_norm": 0.2053815722465515, "learning_rate": 1.4749644197179455e-05, "loss": 1.0814258575439453, "step": 5690 }, { "epoch": 0.7153277801308296, "grad_norm": 0.20590366423130035, "learning_rate": 1.4684952775262e-05, "loss": 1.0101026535034179, "step": 5700 }, { "epoch": 0.7165827411486031, "grad_norm": 0.18634529411792755, "learning_rate": 1.4620261353344547e-05, "loss": 1.0727294921875, "step": 5710 }, { "epoch": 0.7178377021663764, "grad_norm": 0.21531324088573456, "learning_rate": 1.4555569931427094e-05, "loss": 1.0686802864074707, "step": 5720 }, { "epoch": 0.7190926631841499, "grad_norm": 0.18119627237319946, "learning_rate": 1.4490878509509639e-05, "loss": 1.001711654663086, "step": 5730 }, { "epoch": 0.7203476242019232, "grad_norm": 0.17979778349399567, "learning_rate": 1.4426187087592186e-05, "loss": 1.0803590774536134, "step": 5740 }, { "epoch": 0.7216025852196967, "grad_norm": 0.20080508291721344, "learning_rate": 1.4361495665674732e-05, "loss": 1.10612154006958, "step": 5750 }, { "epoch": 0.72285754623747, "grad_norm": 0.2107263058423996, "learning_rate": 1.4296804243757277e-05, "loss": 1.1027125358581542, "step": 5760 }, { "epoch": 0.7241125072552433, "grad_norm": 0.23398469388484955, "learning_rate": 1.4232112821839824e-05, "loss": 1.1061213493347168, "step": 5770 }, { "epoch": 0.7253674682730168, "grad_norm": 0.17343750596046448, "learning_rate": 1.4167421399922371e-05, "loss": 1.0411705017089843, "step": 5780 }, { "epoch": 0.7266224292907901, "grad_norm": 0.2107427567243576, "learning_rate": 1.4102729978004916e-05, "loss": 1.0695199966430664, "step": 5790 }, { "epoch": 0.7278773903085636, "grad_norm": 0.2061903476715088, "learning_rate": 1.4038038556087463e-05, "loss": 1.0993520736694335, "step": 5800 }, { "epoch": 0.7291323513263369, "grad_norm": 0.2150152623653412, "learning_rate": 1.397334713417001e-05, "loss": 1.0545531272888184, "step": 5810 }, { "epoch": 0.7303873123441104, "grad_norm": 0.17038771510124207, "learning_rate": 1.3908655712252555e-05, "loss": 1.0232912063598634, "step": 5820 }, { "epoch": 0.7316422733618837, "grad_norm": 0.19553017616271973, "learning_rate": 1.3843964290335101e-05, "loss": 1.051710033416748, "step": 5830 }, { "epoch": 0.732897234379657, "grad_norm": 0.21933381259441376, "learning_rate": 1.3779272868417648e-05, "loss": 1.0953669548034668, "step": 5840 }, { "epoch": 0.7341521953974305, "grad_norm": 0.1644992232322693, "learning_rate": 1.3714581446500193e-05, "loss": 1.0111507415771483, "step": 5850 }, { "epoch": 0.7354071564152038, "grad_norm": 0.1784752458333969, "learning_rate": 1.364989002458274e-05, "loss": 1.042880153656006, "step": 5860 }, { "epoch": 0.7366621174329773, "grad_norm": 0.20959775149822235, "learning_rate": 1.3585198602665287e-05, "loss": 1.022576904296875, "step": 5870 }, { "epoch": 0.7379170784507506, "grad_norm": 0.2106805443763733, "learning_rate": 1.3520507180747832e-05, "loss": 1.046870803833008, "step": 5880 }, { "epoch": 0.7391720394685241, "grad_norm": 0.21699438989162445, "learning_rate": 1.3455815758830378e-05, "loss": 1.1192692756652831, "step": 5890 }, { "epoch": 0.7404270004862974, "grad_norm": 0.19661927223205566, "learning_rate": 1.3391124336912925e-05, "loss": 1.0794744491577148, "step": 5900 }, { "epoch": 0.7416819615040707, "grad_norm": 0.19780300557613373, "learning_rate": 1.332643291499547e-05, "loss": 1.0534735679626466, "step": 5910 }, { "epoch": 0.7429369225218442, "grad_norm": 0.17464447021484375, "learning_rate": 1.326174149307802e-05, "loss": 1.0962336540222168, "step": 5920 }, { "epoch": 0.7441918835396175, "grad_norm": 0.2023210972547531, "learning_rate": 1.3197050071160565e-05, "loss": 1.1575682640075684, "step": 5930 }, { "epoch": 0.745446844557391, "grad_norm": 0.21466964483261108, "learning_rate": 1.3132358649243112e-05, "loss": 1.123062515258789, "step": 5940 }, { "epoch": 0.7467018055751643, "grad_norm": 0.22124673426151276, "learning_rate": 1.3067667227325659e-05, "loss": 1.0591267585754394, "step": 5950 }, { "epoch": 0.7479567665929377, "grad_norm": 0.2065040022134781, "learning_rate": 1.3002975805408204e-05, "loss": 1.0695542335510253, "step": 5960 }, { "epoch": 0.7492117276107111, "grad_norm": 0.1949198693037033, "learning_rate": 1.293828438349075e-05, "loss": 1.1152711868286134, "step": 5970 }, { "epoch": 0.7504666886284844, "grad_norm": 0.20681394636631012, "learning_rate": 1.2873592961573297e-05, "loss": 1.0414956092834473, "step": 5980 }, { "epoch": 0.7517216496462579, "grad_norm": 0.2022944539785385, "learning_rate": 1.2808901539655842e-05, "loss": 1.0200919151306151, "step": 5990 }, { "epoch": 0.7529766106640312, "grad_norm": 0.24508307874202728, "learning_rate": 1.274421011773839e-05, "loss": 1.0655020713806151, "step": 6000 }, { "epoch": 0.7542315716818047, "grad_norm": 0.1903439313173294, "learning_rate": 1.2679518695820936e-05, "loss": 1.0706681251525878, "step": 6010 }, { "epoch": 0.755486532699578, "grad_norm": 0.19773785769939423, "learning_rate": 1.2614827273903481e-05, "loss": 1.086234188079834, "step": 6020 }, { "epoch": 0.7567414937173514, "grad_norm": 0.21088463068008423, "learning_rate": 1.2550135851986028e-05, "loss": 1.069676399230957, "step": 6030 }, { "epoch": 0.7579964547351248, "grad_norm": 0.20454522967338562, "learning_rate": 1.2485444430068574e-05, "loss": 1.0646330833435058, "step": 6040 }, { "epoch": 0.7592514157528982, "grad_norm": 0.19824111461639404, "learning_rate": 1.242075300815112e-05, "loss": 1.0749752044677734, "step": 6050 }, { "epoch": 0.7605063767706716, "grad_norm": 0.19081714749336243, "learning_rate": 1.2356061586233666e-05, "loss": 1.0570926666259766, "step": 6060 }, { "epoch": 0.7617613377884449, "grad_norm": 0.20483826100826263, "learning_rate": 1.2291370164316213e-05, "loss": 1.0441864013671875, "step": 6070 }, { "epoch": 0.7630162988062184, "grad_norm": 0.2218000292778015, "learning_rate": 1.2226678742398758e-05, "loss": 1.0976847648620605, "step": 6080 }, { "epoch": 0.7642712598239917, "grad_norm": 0.19163018465042114, "learning_rate": 1.2161987320481305e-05, "loss": 1.0738938331604004, "step": 6090 }, { "epoch": 0.7655262208417651, "grad_norm": 0.22696638107299805, "learning_rate": 1.2097295898563852e-05, "loss": 1.09117431640625, "step": 6100 }, { "epoch": 0.7667811818595385, "grad_norm": 0.21094925701618195, "learning_rate": 1.2032604476646397e-05, "loss": 1.1293853759765624, "step": 6110 }, { "epoch": 0.7680361428773119, "grad_norm": 0.19011946022510529, "learning_rate": 1.1967913054728943e-05, "loss": 1.0315247535705567, "step": 6120 }, { "epoch": 0.7692911038950853, "grad_norm": 0.22294586896896362, "learning_rate": 1.190322163281149e-05, "loss": 1.0366369247436524, "step": 6130 }, { "epoch": 0.7705460649128586, "grad_norm": 0.19210191071033478, "learning_rate": 1.1838530210894037e-05, "loss": 1.041628646850586, "step": 6140 }, { "epoch": 0.771801025930632, "grad_norm": 0.17535246908664703, "learning_rate": 1.1773838788976582e-05, "loss": 1.0377203941345214, "step": 6150 }, { "epoch": 0.7730559869484054, "grad_norm": 0.1910182386636734, "learning_rate": 1.1709147367059129e-05, "loss": 1.0049347877502441, "step": 6160 }, { "epoch": 0.7743109479661788, "grad_norm": 0.18827295303344727, "learning_rate": 1.1644455945141675e-05, "loss": 1.1181403160095216, "step": 6170 }, { "epoch": 0.7755659089839522, "grad_norm": 0.19850969314575195, "learning_rate": 1.157976452322422e-05, "loss": 1.1215065002441407, "step": 6180 }, { "epoch": 0.7768208700017256, "grad_norm": 0.21341505646705627, "learning_rate": 1.1515073101306767e-05, "loss": 1.0259138107299806, "step": 6190 }, { "epoch": 0.778075831019499, "grad_norm": 0.1731249988079071, "learning_rate": 1.1450381679389314e-05, "loss": 1.0396163940429688, "step": 6200 }, { "epoch": 0.7793307920372723, "grad_norm": 0.20062355697155, "learning_rate": 1.1385690257471859e-05, "loss": 1.0933070182800293, "step": 6210 }, { "epoch": 0.7805857530550457, "grad_norm": 0.214784637093544, "learning_rate": 1.1320998835554406e-05, "loss": 1.038003921508789, "step": 6220 }, { "epoch": 0.7818407140728191, "grad_norm": 0.17985184490680695, "learning_rate": 1.1256307413636952e-05, "loss": 1.0742274284362794, "step": 6230 }, { "epoch": 0.7830956750905925, "grad_norm": 0.19000248610973358, "learning_rate": 1.1191615991719498e-05, "loss": 1.051091480255127, "step": 6240 }, { "epoch": 0.7843506361083659, "grad_norm": 0.21635884046554565, "learning_rate": 1.1126924569802044e-05, "loss": 1.0835393905639648, "step": 6250 }, { "epoch": 0.7856055971261393, "grad_norm": 0.22533932328224182, "learning_rate": 1.1062233147884591e-05, "loss": 1.046684741973877, "step": 6260 }, { "epoch": 0.7868605581439126, "grad_norm": 0.1827181726694107, "learning_rate": 1.0997541725967138e-05, "loss": 1.0681885719299316, "step": 6270 }, { "epoch": 0.7881155191616861, "grad_norm": 0.22982963919639587, "learning_rate": 1.0932850304049685e-05, "loss": 1.0472677230834961, "step": 6280 }, { "epoch": 0.7893704801794594, "grad_norm": 0.19450107216835022, "learning_rate": 1.086815888213223e-05, "loss": 1.0826946258544923, "step": 6290 }, { "epoch": 0.7906254411972328, "grad_norm": 0.19893890619277954, "learning_rate": 1.0803467460214776e-05, "loss": 1.0179737091064454, "step": 6300 }, { "epoch": 0.7918804022150062, "grad_norm": 0.17373962700366974, "learning_rate": 1.0738776038297323e-05, "loss": 1.0917057037353515, "step": 6310 }, { "epoch": 0.7931353632327796, "grad_norm": 0.2234533429145813, "learning_rate": 1.0674084616379868e-05, "loss": 1.0998245239257813, "step": 6320 }, { "epoch": 0.794390324250553, "grad_norm": 0.2080935388803482, "learning_rate": 1.0609393194462415e-05, "loss": 1.021653938293457, "step": 6330 }, { "epoch": 0.7956452852683263, "grad_norm": 0.2437697947025299, "learning_rate": 1.0544701772544962e-05, "loss": 1.083084487915039, "step": 6340 }, { "epoch": 0.7969002462860998, "grad_norm": 0.20368912816047668, "learning_rate": 1.0480010350627507e-05, "loss": 1.0707550048828125, "step": 6350 }, { "epoch": 0.7981552073038731, "grad_norm": 0.16665543615818024, "learning_rate": 1.0415318928710053e-05, "loss": 1.136690044403076, "step": 6360 }, { "epoch": 0.7994101683216465, "grad_norm": 0.17936968803405762, "learning_rate": 1.03506275067926e-05, "loss": 1.0300110816955566, "step": 6370 }, { "epoch": 0.8006651293394199, "grad_norm": 0.17155979573726654, "learning_rate": 1.0285936084875145e-05, "loss": 1.2345178604125977, "step": 6380 }, { "epoch": 0.8019200903571932, "grad_norm": 0.19316871464252472, "learning_rate": 1.0221244662957692e-05, "loss": 1.1063278198242188, "step": 6390 }, { "epoch": 0.8031750513749667, "grad_norm": 0.19231335818767548, "learning_rate": 1.0156553241040239e-05, "loss": 1.1099343299865723, "step": 6400 }, { "epoch": 0.80443001239274, "grad_norm": 0.20124751329421997, "learning_rate": 1.0091861819122784e-05, "loss": 1.0818553924560548, "step": 6410 }, { "epoch": 0.8056849734105135, "grad_norm": 0.23050783574581146, "learning_rate": 1.002717039720533e-05, "loss": 1.0924180030822754, "step": 6420 }, { "epoch": 0.8069399344282868, "grad_norm": 0.19461168348789215, "learning_rate": 9.962478975287877e-06, "loss": 1.0269258499145508, "step": 6430 }, { "epoch": 0.8081948954460602, "grad_norm": 0.1916336864233017, "learning_rate": 9.897787553370422e-06, "loss": 1.1376501083374024, "step": 6440 }, { "epoch": 0.8094498564638336, "grad_norm": 0.18255390226840973, "learning_rate": 9.833096131452969e-06, "loss": 1.0916749000549317, "step": 6450 }, { "epoch": 0.8107048174816069, "grad_norm": 0.1791938841342926, "learning_rate": 9.768404709535516e-06, "loss": 1.0888887405395509, "step": 6460 }, { "epoch": 0.8119597784993804, "grad_norm": 0.1699710339307785, "learning_rate": 9.703713287618063e-06, "loss": 1.0881536483764649, "step": 6470 }, { "epoch": 0.8132147395171537, "grad_norm": 0.19622161984443665, "learning_rate": 9.639021865700608e-06, "loss": 1.0142560958862306, "step": 6480 }, { "epoch": 0.8144697005349272, "grad_norm": 0.18209852278232574, "learning_rate": 9.574330443783156e-06, "loss": 1.0698868751525878, "step": 6490 }, { "epoch": 0.8157246615527005, "grad_norm": 0.16895531117916107, "learning_rate": 9.509639021865701e-06, "loss": 1.1377266883850097, "step": 6500 }, { "epoch": 0.8169796225704739, "grad_norm": 0.1833142638206482, "learning_rate": 9.444947599948248e-06, "loss": 1.0924424171447753, "step": 6510 }, { "epoch": 0.8182345835882473, "grad_norm": 0.2109987437725067, "learning_rate": 9.380256178030795e-06, "loss": 1.0673909187316895, "step": 6520 }, { "epoch": 0.8194895446060206, "grad_norm": 0.17565134167671204, "learning_rate": 9.31556475611334e-06, "loss": 1.0398826599121094, "step": 6530 }, { "epoch": 0.8207445056237941, "grad_norm": 0.22704172134399414, "learning_rate": 9.250873334195886e-06, "loss": 1.1169721603393554, "step": 6540 }, { "epoch": 0.8219994666415674, "grad_norm": 0.21614673733711243, "learning_rate": 9.186181912278433e-06, "loss": 1.040649127960205, "step": 6550 }, { "epoch": 0.8232544276593409, "grad_norm": 0.21649399399757385, "learning_rate": 9.121490490360978e-06, "loss": 1.1043192863464355, "step": 6560 }, { "epoch": 0.8245093886771142, "grad_norm": 0.1912921667098999, "learning_rate": 9.056799068443525e-06, "loss": 1.127028465270996, "step": 6570 }, { "epoch": 0.8257643496948877, "grad_norm": 0.2091410607099533, "learning_rate": 8.992107646526072e-06, "loss": 1.0905369758605956, "step": 6580 }, { "epoch": 0.827019310712661, "grad_norm": 0.22103574872016907, "learning_rate": 8.927416224608617e-06, "loss": 0.9964225769042969, "step": 6590 }, { "epoch": 0.8282742717304343, "grad_norm": 0.16947044432163239, "learning_rate": 8.862724802691163e-06, "loss": 1.1176776885986328, "step": 6600 }, { "epoch": 0.8295292327482078, "grad_norm": 0.19035208225250244, "learning_rate": 8.79803338077371e-06, "loss": 1.1607711791992188, "step": 6610 }, { "epoch": 0.8307841937659811, "grad_norm": 0.22612328827381134, "learning_rate": 8.733341958856255e-06, "loss": 1.0306846618652343, "step": 6620 }, { "epoch": 0.8320391547837546, "grad_norm": 0.19116735458374023, "learning_rate": 8.668650536938802e-06, "loss": 1.0862977981567383, "step": 6630 }, { "epoch": 0.8332941158015279, "grad_norm": 0.19095416367053986, "learning_rate": 8.603959115021349e-06, "loss": 1.0201908111572267, "step": 6640 }, { "epoch": 0.8345490768193013, "grad_norm": 0.18990927934646606, "learning_rate": 8.539267693103894e-06, "loss": 1.0904932975769044, "step": 6650 }, { "epoch": 0.8358040378370747, "grad_norm": 0.21695184707641602, "learning_rate": 8.47457627118644e-06, "loss": 1.0262943267822267, "step": 6660 }, { "epoch": 0.837058998854848, "grad_norm": 0.18091970682144165, "learning_rate": 8.409884849268987e-06, "loss": 1.0900959014892577, "step": 6670 }, { "epoch": 0.8383139598726215, "grad_norm": 0.21363726258277893, "learning_rate": 8.345193427351532e-06, "loss": 1.0485601425170898, "step": 6680 }, { "epoch": 0.8395689208903948, "grad_norm": 0.19334258139133453, "learning_rate": 8.280502005434079e-06, "loss": 1.0345725059509276, "step": 6690 }, { "epoch": 0.8408238819081683, "grad_norm": 0.20086072385311127, "learning_rate": 8.215810583516626e-06, "loss": 1.1097275733947753, "step": 6700 }, { "epoch": 0.8420788429259416, "grad_norm": 0.1856435388326645, "learning_rate": 8.151119161599171e-06, "loss": 1.0502695083618163, "step": 6710 }, { "epoch": 0.843333803943715, "grad_norm": 0.20465126633644104, "learning_rate": 8.08642773968172e-06, "loss": 1.0540699005126952, "step": 6720 }, { "epoch": 0.8445887649614884, "grad_norm": 0.18902461230754852, "learning_rate": 8.021736317764266e-06, "loss": 1.1354016304016112, "step": 6730 }, { "epoch": 0.8458437259792617, "grad_norm": 0.20140230655670166, "learning_rate": 7.957044895846811e-06, "loss": 0.9932634353637695, "step": 6740 }, { "epoch": 0.8470986869970352, "grad_norm": 0.19942064583301544, "learning_rate": 7.892353473929358e-06, "loss": 1.1147568702697754, "step": 6750 }, { "epoch": 0.8483536480148085, "grad_norm": 0.21955204010009766, "learning_rate": 7.827662052011905e-06, "loss": 0.9939888954162598, "step": 6760 }, { "epoch": 0.849608609032582, "grad_norm": 0.2062138170003891, "learning_rate": 7.76297063009445e-06, "loss": 1.0063923835754394, "step": 6770 }, { "epoch": 0.8508635700503553, "grad_norm": 0.23989014327526093, "learning_rate": 7.698279208176996e-06, "loss": 1.0760384559631349, "step": 6780 }, { "epoch": 0.8521185310681287, "grad_norm": 0.20089785754680634, "learning_rate": 7.633587786259543e-06, "loss": 1.028395652770996, "step": 6790 }, { "epoch": 0.8533734920859021, "grad_norm": 0.19919630885124207, "learning_rate": 7.568896364342089e-06, "loss": 1.02840576171875, "step": 6800 }, { "epoch": 0.8546284531036754, "grad_norm": 0.2014761120080948, "learning_rate": 7.504204942424635e-06, "loss": 1.0546991348266601, "step": 6810 }, { "epoch": 0.8558834141214489, "grad_norm": 0.19204354286193848, "learning_rate": 7.439513520507181e-06, "loss": 1.03892183303833, "step": 6820 }, { "epoch": 0.8571383751392222, "grad_norm": 0.22647128999233246, "learning_rate": 7.374822098589728e-06, "loss": 1.0341573715209962, "step": 6830 }, { "epoch": 0.8583933361569956, "grad_norm": 0.2208699733018875, "learning_rate": 7.3101306766722735e-06, "loss": 1.0098363876342773, "step": 6840 }, { "epoch": 0.859648297174769, "grad_norm": 0.219742089509964, "learning_rate": 7.2454392547548194e-06, "loss": 1.0027207374572753, "step": 6850 }, { "epoch": 0.8609032581925424, "grad_norm": 0.20601145923137665, "learning_rate": 7.180747832837366e-06, "loss": 1.0074657440185546, "step": 6860 }, { "epoch": 0.8621582192103158, "grad_norm": 0.18209318816661835, "learning_rate": 7.116056410919912e-06, "loss": 1.0155368804931642, "step": 6870 }, { "epoch": 0.8634131802280892, "grad_norm": 0.23554867506027222, "learning_rate": 7.051364989002458e-06, "loss": 1.047171401977539, "step": 6880 }, { "epoch": 0.8646681412458626, "grad_norm": 0.17976488173007965, "learning_rate": 6.986673567085005e-06, "loss": 1.1108320236206055, "step": 6890 }, { "epoch": 0.8659231022636359, "grad_norm": 0.17746324837207794, "learning_rate": 6.921982145167551e-06, "loss": 1.033961868286133, "step": 6900 }, { "epoch": 0.8671780632814093, "grad_norm": 0.20548702776432037, "learning_rate": 6.8572907232500965e-06, "loss": 0.9573192596435547, "step": 6910 }, { "epoch": 0.8684330242991827, "grad_norm": 0.24221506714820862, "learning_rate": 6.792599301332643e-06, "loss": 1.0891441345214843, "step": 6920 }, { "epoch": 0.8696879853169561, "grad_norm": 0.1774619221687317, "learning_rate": 6.727907879415189e-06, "loss": 1.0774598121643066, "step": 6930 }, { "epoch": 0.8709429463347295, "grad_norm": 0.1890013962984085, "learning_rate": 6.663216457497735e-06, "loss": 1.082331371307373, "step": 6940 }, { "epoch": 0.8721979073525029, "grad_norm": 0.19728416204452515, "learning_rate": 6.598525035580283e-06, "loss": 1.0425737380981446, "step": 6950 }, { "epoch": 0.8734528683702762, "grad_norm": 0.1938554048538208, "learning_rate": 6.533833613662829e-06, "loss": 0.9840812683105469, "step": 6960 }, { "epoch": 0.8747078293880496, "grad_norm": 0.20868176221847534, "learning_rate": 6.469142191745375e-06, "loss": 1.107065486907959, "step": 6970 }, { "epoch": 0.875962790405823, "grad_norm": 0.20235563814640045, "learning_rate": 6.404450769827921e-06, "loss": 1.035740566253662, "step": 6980 }, { "epoch": 0.8772177514235964, "grad_norm": 0.19022290408611298, "learning_rate": 6.339759347910468e-06, "loss": 1.0365036010742188, "step": 6990 }, { "epoch": 0.8784727124413698, "grad_norm": 0.19704179465770721, "learning_rate": 6.275067925993014e-06, "loss": 1.0721055030822755, "step": 7000 }, { "epoch": 0.8797276734591432, "grad_norm": 0.21241937577724457, "learning_rate": 6.21037650407556e-06, "loss": 1.0706797599792481, "step": 7010 }, { "epoch": 0.8809826344769166, "grad_norm": 0.2100059986114502, "learning_rate": 6.1456850821581065e-06, "loss": 1.0387989044189454, "step": 7020 }, { "epoch": 0.8822375954946899, "grad_norm": 0.22000259160995483, "learning_rate": 6.080993660240652e-06, "loss": 1.0321297645568848, "step": 7030 }, { "epoch": 0.8834925565124633, "grad_norm": 0.20955264568328857, "learning_rate": 6.016302238323198e-06, "loss": 1.0901686668395996, "step": 7040 }, { "epoch": 0.8847475175302367, "grad_norm": 0.20897357165813446, "learning_rate": 5.951610816405745e-06, "loss": 1.1447209358215331, "step": 7050 }, { "epoch": 0.8860024785480101, "grad_norm": 0.20097708702087402, "learning_rate": 5.886919394488291e-06, "loss": 0.9972308158874512, "step": 7060 }, { "epoch": 0.8872574395657835, "grad_norm": 0.18822167813777924, "learning_rate": 5.822227972570838e-06, "loss": 1.1192616462707519, "step": 7070 }, { "epoch": 0.8885124005835568, "grad_norm": 0.21870264410972595, "learning_rate": 5.757536550653384e-06, "loss": 1.0673290252685548, "step": 7080 }, { "epoch": 0.8897673616013303, "grad_norm": 0.18924005329608917, "learning_rate": 5.6928451287359295e-06, "loss": 1.0575652122497559, "step": 7090 }, { "epoch": 0.8910223226191036, "grad_norm": 0.1820686012506485, "learning_rate": 5.628153706818476e-06, "loss": 1.0578106880187987, "step": 7100 }, { "epoch": 0.8922772836368771, "grad_norm": 0.21963050961494446, "learning_rate": 5.563462284901022e-06, "loss": 1.036845874786377, "step": 7110 }, { "epoch": 0.8935322446546504, "grad_norm": 0.2212342619895935, "learning_rate": 5.498770862983569e-06, "loss": 1.0462807655334472, "step": 7120 }, { "epoch": 0.8947872056724238, "grad_norm": 0.19352254271507263, "learning_rate": 5.434079441066115e-06, "loss": 1.0695903778076172, "step": 7130 }, { "epoch": 0.8960421666901972, "grad_norm": 0.21432216465473175, "learning_rate": 5.3693880191486615e-06, "loss": 1.0361966133117675, "step": 7140 }, { "epoch": 0.8972971277079705, "grad_norm": 0.193306565284729, "learning_rate": 5.3046965972312074e-06, "loss": 1.119828987121582, "step": 7150 }, { "epoch": 0.898552088725744, "grad_norm": 0.18354584276676178, "learning_rate": 5.240005175313753e-06, "loss": 1.0150964736938477, "step": 7160 }, { "epoch": 0.8998070497435173, "grad_norm": 0.2255086451768875, "learning_rate": 5.1753137533963e-06, "loss": 1.0812021255493165, "step": 7170 }, { "epoch": 0.9010620107612908, "grad_norm": 0.1983460932970047, "learning_rate": 5.110622331478846e-06, "loss": 1.0526619911193849, "step": 7180 }, { "epoch": 0.9023169717790641, "grad_norm": 0.2795880138874054, "learning_rate": 5.045930909561392e-06, "loss": 0.9882980346679687, "step": 7190 }, { "epoch": 0.9035719327968375, "grad_norm": 0.2017148733139038, "learning_rate": 4.981239487643939e-06, "loss": 1.079946517944336, "step": 7200 }, { "epoch": 0.9048268938146109, "grad_norm": 0.19753098487854004, "learning_rate": 4.9165480657264845e-06, "loss": 1.0410076141357423, "step": 7210 }, { "epoch": 0.9060818548323842, "grad_norm": 0.21375982463359833, "learning_rate": 4.851856643809031e-06, "loss": 1.0886569023132324, "step": 7220 }, { "epoch": 0.9073368158501577, "grad_norm": 0.18023167550563812, "learning_rate": 4.787165221891578e-06, "loss": 1.01919584274292, "step": 7230 }, { "epoch": 0.908591776867931, "grad_norm": 0.21556456387043, "learning_rate": 4.722473799974124e-06, "loss": 1.1226733207702637, "step": 7240 }, { "epoch": 0.9098467378857045, "grad_norm": 0.1888088434934616, "learning_rate": 4.65778237805667e-06, "loss": 1.0532227516174317, "step": 7250 }, { "epoch": 0.9111016989034778, "grad_norm": 0.21888945996761322, "learning_rate": 4.5930909561392166e-06, "loss": 1.1242030143737793, "step": 7260 }, { "epoch": 0.9123566599212511, "grad_norm": 0.20070919394493103, "learning_rate": 4.5283995342217625e-06, "loss": 1.0876989364624023, "step": 7270 }, { "epoch": 0.9136116209390246, "grad_norm": 0.21463949978351593, "learning_rate": 4.463708112304308e-06, "loss": 1.0497437477111817, "step": 7280 }, { "epoch": 0.9148665819567979, "grad_norm": 0.21296384930610657, "learning_rate": 4.399016690386855e-06, "loss": 1.0168041229248046, "step": 7290 }, { "epoch": 0.9161215429745714, "grad_norm": 0.2151423692703247, "learning_rate": 4.334325268469401e-06, "loss": 1.0028361320495605, "step": 7300 }, { "epoch": 0.9173765039923447, "grad_norm": 0.17901144921779633, "learning_rate": 4.269633846551947e-06, "loss": 1.0603734970092773, "step": 7310 }, { "epoch": 0.9186314650101182, "grad_norm": 0.22024358808994293, "learning_rate": 4.204942424634494e-06, "loss": 1.133981418609619, "step": 7320 }, { "epoch": 0.9198864260278915, "grad_norm": 0.18600453436374664, "learning_rate": 4.1402510027170396e-06, "loss": 1.043716812133789, "step": 7330 }, { "epoch": 0.9211413870456648, "grad_norm": 0.1897258311510086, "learning_rate": 4.0755595807995855e-06, "loss": 1.0358457565307617, "step": 7340 }, { "epoch": 0.9223963480634383, "grad_norm": 0.2092253863811493, "learning_rate": 4.010868158882133e-06, "loss": 1.0762650489807128, "step": 7350 }, { "epoch": 0.9236513090812116, "grad_norm": 0.1960146427154541, "learning_rate": 3.946176736964679e-06, "loss": 1.0524661064147949, "step": 7360 }, { "epoch": 0.9249062700989851, "grad_norm": 0.21514521539211273, "learning_rate": 3.881485315047225e-06, "loss": 1.0553058624267577, "step": 7370 }, { "epoch": 0.9261612311167584, "grad_norm": 0.18721559643745422, "learning_rate": 3.816793893129772e-06, "loss": 1.120730495452881, "step": 7380 }, { "epoch": 0.9274161921345319, "grad_norm": 0.20185963809490204, "learning_rate": 3.7521024712123175e-06, "loss": 1.0561814308166504, "step": 7390 }, { "epoch": 0.9286711531523052, "grad_norm": 0.18291395902633667, "learning_rate": 3.687411049294864e-06, "loss": 1.065229320526123, "step": 7400 }, { "epoch": 0.9299261141700786, "grad_norm": 0.20202836394309998, "learning_rate": 3.6227196273774097e-06, "loss": 1.092850112915039, "step": 7410 }, { "epoch": 0.931181075187852, "grad_norm": 0.1978691816329956, "learning_rate": 3.558028205459956e-06, "loss": 1.068133544921875, "step": 7420 }, { "epoch": 0.9324360362056253, "grad_norm": 0.20446452498435974, "learning_rate": 3.4933367835425024e-06, "loss": 1.037320613861084, "step": 7430 }, { "epoch": 0.9336909972233988, "grad_norm": 0.17508593201637268, "learning_rate": 3.4286453616250483e-06, "loss": 1.141841220855713, "step": 7440 }, { "epoch": 0.9349459582411721, "grad_norm": 0.1751583069562912, "learning_rate": 3.3639539397075946e-06, "loss": 1.0543017387390137, "step": 7450 }, { "epoch": 0.9362009192589456, "grad_norm": 0.16998153924942017, "learning_rate": 3.2992625177901413e-06, "loss": 1.0321502685546875, "step": 7460 }, { "epoch": 0.9374558802767189, "grad_norm": 0.17314793169498444, "learning_rate": 3.2345710958726877e-06, "loss": 1.0988115310668944, "step": 7470 }, { "epoch": 0.9387108412944923, "grad_norm": 0.19269074499607086, "learning_rate": 3.169879673955234e-06, "loss": 1.070913314819336, "step": 7480 }, { "epoch": 0.9399658023122657, "grad_norm": 0.23561343550682068, "learning_rate": 3.10518825203778e-06, "loss": 1.049428939819336, "step": 7490 }, { "epoch": 0.941220763330039, "grad_norm": 0.2069864720106125, "learning_rate": 3.040496830120326e-06, "loss": 1.003286647796631, "step": 7500 }, { "epoch": 0.9424757243478125, "grad_norm": 0.18430078029632568, "learning_rate": 2.9758054082028725e-06, "loss": 1.0501197814941405, "step": 7510 }, { "epoch": 0.9437306853655858, "grad_norm": 0.20570716261863708, "learning_rate": 2.911113986285419e-06, "loss": 1.0311017990112306, "step": 7520 }, { "epoch": 0.9449856463833592, "grad_norm": 0.23062951862812042, "learning_rate": 2.8464225643679647e-06, "loss": 1.0960111618041992, "step": 7530 }, { "epoch": 0.9462406074011326, "grad_norm": 0.20773741602897644, "learning_rate": 2.781731142450511e-06, "loss": 1.0412386894226073, "step": 7540 }, { "epoch": 0.947495568418906, "grad_norm": 0.24511989951133728, "learning_rate": 2.7170397205330574e-06, "loss": 1.1096014976501465, "step": 7550 }, { "epoch": 0.9487505294366794, "grad_norm": 0.20896856486797333, "learning_rate": 2.6523482986156037e-06, "loss": 1.0784428596496582, "step": 7560 }, { "epoch": 0.9500054904544527, "grad_norm": 0.19488734006881714, "learning_rate": 2.58765687669815e-06, "loss": 1.043881607055664, "step": 7570 }, { "epoch": 0.9512604514722262, "grad_norm": 0.19251015782356262, "learning_rate": 2.522965454780696e-06, "loss": 1.0701610565185546, "step": 7580 }, { "epoch": 0.9525154124899995, "grad_norm": 0.19297641515731812, "learning_rate": 2.4582740328632423e-06, "loss": 1.0647805213928223, "step": 7590 }, { "epoch": 0.9537703735077729, "grad_norm": 0.20945972204208374, "learning_rate": 2.393582610945789e-06, "loss": 1.036386489868164, "step": 7600 }, { "epoch": 0.9550253345255463, "grad_norm": 0.19576741755008698, "learning_rate": 2.328891189028335e-06, "loss": 1.0599888801574706, "step": 7610 }, { "epoch": 0.9562802955433197, "grad_norm": 0.21709538996219635, "learning_rate": 2.2641997671108812e-06, "loss": 1.0713414192199706, "step": 7620 }, { "epoch": 0.9575352565610931, "grad_norm": 0.2199336290359497, "learning_rate": 2.1995083451934276e-06, "loss": 1.071683406829834, "step": 7630 }, { "epoch": 0.9587902175788665, "grad_norm": 0.24035602807998657, "learning_rate": 2.1348169232759735e-06, "loss": 1.074808692932129, "step": 7640 }, { "epoch": 0.9600451785966398, "grad_norm": 0.22513709962368011, "learning_rate": 2.0701255013585198e-06, "loss": 1.080063247680664, "step": 7650 }, { "epoch": 0.9613001396144132, "grad_norm": 0.20860207080841064, "learning_rate": 2.0054340794410665e-06, "loss": 1.0631601333618164, "step": 7660 }, { "epoch": 0.9625551006321866, "grad_norm": 0.20075741410255432, "learning_rate": 1.9407426575236124e-06, "loss": 1.103189468383789, "step": 7670 }, { "epoch": 0.96381006164996, "grad_norm": 0.21978308260440826, "learning_rate": 1.8760512356061587e-06, "loss": 1.0389853477478028, "step": 7680 }, { "epoch": 0.9650650226677334, "grad_norm": 0.1870948076248169, "learning_rate": 1.8113598136887049e-06, "loss": 1.0636377334594727, "step": 7690 }, { "epoch": 0.9663199836855068, "grad_norm": 0.18612495064735413, "learning_rate": 1.7466683917712512e-06, "loss": 1.054550838470459, "step": 7700 }, { "epoch": 0.9675749447032802, "grad_norm": 0.1692449450492859, "learning_rate": 1.6819769698537973e-06, "loss": 1.0402187347412108, "step": 7710 }, { "epoch": 0.9688299057210535, "grad_norm": 0.17843835055828094, "learning_rate": 1.6172855479363438e-06, "loss": 1.0575499534606934, "step": 7720 }, { "epoch": 0.9700848667388269, "grad_norm": 0.22015692293643951, "learning_rate": 1.55259412601889e-06, "loss": 1.0864248275756836, "step": 7730 }, { "epoch": 0.9713398277566003, "grad_norm": 0.20895177125930786, "learning_rate": 1.4879027041014363e-06, "loss": 1.0197461128234864, "step": 7740 }, { "epoch": 0.9725947887743737, "grad_norm": 0.20695802569389343, "learning_rate": 1.4232112821839824e-06, "loss": 1.1009364128112793, "step": 7750 }, { "epoch": 0.9738497497921471, "grad_norm": 0.16815753281116486, "learning_rate": 1.3585198602665287e-06, "loss": 0.9824440956115723, "step": 7760 }, { "epoch": 0.9751047108099204, "grad_norm": 0.20957960188388824, "learning_rate": 1.293828438349075e-06, "loss": 1.1214850425720215, "step": 7770 }, { "epoch": 0.9763596718276939, "grad_norm": 0.2056475281715393, "learning_rate": 1.2291370164316211e-06, "loss": 1.0337078094482421, "step": 7780 }, { "epoch": 0.9776146328454672, "grad_norm": 0.20233125984668732, "learning_rate": 1.1644455945141675e-06, "loss": 1.1166194915771483, "step": 7790 }, { "epoch": 0.9788695938632406, "grad_norm": 0.19711875915527344, "learning_rate": 1.0997541725967138e-06, "loss": 1.0613553047180175, "step": 7800 }, { "epoch": 0.980124554881014, "grad_norm": 0.23265467584133148, "learning_rate": 1.0350627506792599e-06, "loss": 1.0709218978881836, "step": 7810 }, { "epoch": 0.9813795158987874, "grad_norm": 0.1931038498878479, "learning_rate": 9.703713287618062e-07, "loss": 1.0655381202697753, "step": 7820 }, { "epoch": 0.9826344769165608, "grad_norm": 0.19685102999210358, "learning_rate": 9.056799068443524e-07, "loss": 1.0683314323425293, "step": 7830 }, { "epoch": 0.9838894379343341, "grad_norm": 0.18740233778953552, "learning_rate": 8.409884849268986e-07, "loss": 1.0544939041137695, "step": 7840 }, { "epoch": 0.9851443989521076, "grad_norm": 0.2561696767807007, "learning_rate": 7.76297063009445e-07, "loss": 1.092136287689209, "step": 7850 }, { "epoch": 0.9863993599698809, "grad_norm": 0.20890219509601593, "learning_rate": 7.116056410919912e-07, "loss": 1.0438971519470215, "step": 7860 }, { "epoch": 0.9876543209876543, "grad_norm": 0.1991521716117859, "learning_rate": 6.469142191745375e-07, "loss": 1.0979823112487792, "step": 7870 }, { "epoch": 0.9889092820054277, "grad_norm": 0.25123023986816406, "learning_rate": 5.822227972570837e-07, "loss": 1.0025765419006347, "step": 7880 }, { "epoch": 0.990164243023201, "grad_norm": 0.20250628888607025, "learning_rate": 5.175313753396299e-07, "loss": 1.092966651916504, "step": 7890 }, { "epoch": 0.9914192040409745, "grad_norm": 0.2285197377204895, "learning_rate": 4.528399534221762e-07, "loss": 1.0923130989074707, "step": 7900 }, { "epoch": 0.9926741650587478, "grad_norm": 0.1967306137084961, "learning_rate": 3.881485315047225e-07, "loss": 1.0842831611633301, "step": 7910 }, { "epoch": 0.9939291260765213, "grad_norm": 0.20673462748527527, "learning_rate": 3.2345710958726876e-07, "loss": 1.0047653198242188, "step": 7920 }, { "epoch": 0.9951840870942946, "grad_norm": 0.19918572902679443, "learning_rate": 2.5876568766981497e-07, "loss": 1.0727598190307617, "step": 7930 }, { "epoch": 0.9964390481120681, "grad_norm": 0.16394563019275665, "learning_rate": 1.9407426575236124e-07, "loss": 1.0194159507751466, "step": 7940 }, { "epoch": 0.9976940091298414, "grad_norm": 0.22014449536800385, "learning_rate": 1.2938284383490749e-07, "loss": 1.0323500633239746, "step": 7950 }, { "epoch": 0.9989489701476147, "grad_norm": 0.2236953228712082, "learning_rate": 6.469142191745374e-08, "loss": 1.030369472503662, "step": 7960 }, { "epoch": 1.0, "eval_loss": 0.9335432648658752, "eval_runtime": 895.0319, "eval_samples_per_second": 14.994, "eval_steps_per_second": 7.497, "step": 7969 } ], "logging_steps": 10, "max_steps": 7969, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.797602843269349e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }