| { | |
| "best_global_step": 7969, | |
| "best_metric": 0.9335432648658752, | |
| "best_model_checkpoint": "./qwen3-1.7b-text2json-v3/checkpoint-7969", | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 7969, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0012549610177733854, | |
| "grad_norm": 2.7938790321350098, | |
| "learning_rate": 1.875e-06, | |
| "loss": 1.6903125762939453, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0025099220355467708, | |
| "grad_norm": 2.206455945968628, | |
| "learning_rate": 3.958333333333333e-06, | |
| "loss": 1.661090087890625, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.003764883053320156, | |
| "grad_norm": 0.9010259509086609, | |
| "learning_rate": 6.041666666666667e-06, | |
| "loss": 1.5127840042114258, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.0050198440710935415, | |
| "grad_norm": 0.4326864778995514, | |
| "learning_rate": 8.125000000000001e-06, | |
| "loss": 1.3494388580322265, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.006274805088866927, | |
| "grad_norm": 0.41406598687171936, | |
| "learning_rate": 1.0208333333333334e-05, | |
| "loss": 1.3530027389526367, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.007529766106640312, | |
| "grad_norm": 0.3024258613586426, | |
| "learning_rate": 1.2291666666666666e-05, | |
| "loss": 1.2346282958984376, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.008784727124413697, | |
| "grad_norm": 0.25805506110191345, | |
| "learning_rate": 1.4374999999999999e-05, | |
| "loss": 1.2771096229553223, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.010039688142187083, | |
| "grad_norm": 0.2437220960855484, | |
| "learning_rate": 1.6458333333333335e-05, | |
| "loss": 1.261610221862793, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.011294649159960469, | |
| "grad_norm": 0.26054802536964417, | |
| "learning_rate": 1.854166666666667e-05, | |
| "loss": 1.173118495941162, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.012549610177733855, | |
| "grad_norm": 0.28059178590774536, | |
| "learning_rate": 2.0625e-05, | |
| "loss": 1.1928260803222657, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.013804571195507239, | |
| "grad_norm": 0.23667506873607635, | |
| "learning_rate": 2.2708333333333334e-05, | |
| "loss": 1.2138886451721191, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.015059532213280625, | |
| "grad_norm": 0.24474164843559265, | |
| "learning_rate": 2.479166666666667e-05, | |
| "loss": 1.2723681449890136, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.016314493231054012, | |
| "grad_norm": 0.33744102716445923, | |
| "learning_rate": 2.6875e-05, | |
| "loss": 1.267289924621582, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.017569454248827394, | |
| "grad_norm": 0.26319584250450134, | |
| "learning_rate": 2.8958333333333337e-05, | |
| "loss": 1.190561866760254, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.01882441526660078, | |
| "grad_norm": 0.2557962238788605, | |
| "learning_rate": 3.104166666666667e-05, | |
| "loss": 1.1946110725402832, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.020079376284374166, | |
| "grad_norm": 0.27757933735847473, | |
| "learning_rate": 3.3125e-05, | |
| "loss": 1.1806714057922363, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.021334337302147552, | |
| "grad_norm": 0.22700543701648712, | |
| "learning_rate": 3.520833333333334e-05, | |
| "loss": 1.2129878997802734, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.022589298319920938, | |
| "grad_norm": 0.2856610417366028, | |
| "learning_rate": 3.729166666666667e-05, | |
| "loss": 1.1290763854980468, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.023844259337694323, | |
| "grad_norm": 0.26019546389579773, | |
| "learning_rate": 3.9375e-05, | |
| "loss": 1.2425550460815429, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.02509922035546771, | |
| "grad_norm": 0.23760564625263214, | |
| "learning_rate": 4.1458333333333336e-05, | |
| "loss": 1.2198989868164063, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.026354181373241095, | |
| "grad_norm": 0.26171621680259705, | |
| "learning_rate": 4.354166666666667e-05, | |
| "loss": 1.228119468688965, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.027609142391014477, | |
| "grad_norm": 0.23990799486637115, | |
| "learning_rate": 4.5625e-05, | |
| "loss": 1.1751232147216797, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.028864103408787863, | |
| "grad_norm": 0.21109728515148163, | |
| "learning_rate": 4.770833333333334e-05, | |
| "loss": 1.18958740234375, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.03011906442656125, | |
| "grad_norm": 0.22801515460014343, | |
| "learning_rate": 4.979166666666667e-05, | |
| "loss": 1.2031834602355957, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.03137402544433464, | |
| "grad_norm": 0.2113051414489746, | |
| "learning_rate": 4.99417777202743e-05, | |
| "loss": 1.104047679901123, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.032628986462108024, | |
| "grad_norm": 0.2567385733127594, | |
| "learning_rate": 4.987708629835684e-05, | |
| "loss": 1.124267292022705, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.0338839474798814, | |
| "grad_norm": 0.25872015953063965, | |
| "learning_rate": 4.981239487643939e-05, | |
| "loss": 1.1195612907409669, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.03513890849765479, | |
| "grad_norm": 0.22646528482437134, | |
| "learning_rate": 4.974770345452193e-05, | |
| "loss": 1.129570484161377, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.036393869515428175, | |
| "grad_norm": 0.26482081413269043, | |
| "learning_rate": 4.968301203260448e-05, | |
| "loss": 1.1967822074890138, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.03764883053320156, | |
| "grad_norm": 0.2646678686141968, | |
| "learning_rate": 4.9618320610687025e-05, | |
| "loss": 1.1607979774475097, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.038903791550974946, | |
| "grad_norm": 0.22207561135292053, | |
| "learning_rate": 4.955362918876957e-05, | |
| "loss": 1.1674532890319824, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.04015875256874833, | |
| "grad_norm": 0.24940666556358337, | |
| "learning_rate": 4.948893776685212e-05, | |
| "loss": 1.0974184036254884, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.04141371358652172, | |
| "grad_norm": 0.2815345823764801, | |
| "learning_rate": 4.9424246344934665e-05, | |
| "loss": 1.1656072616577149, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.042668674604295104, | |
| "grad_norm": 0.2820636034011841, | |
| "learning_rate": 4.935955492301721e-05, | |
| "loss": 1.1393600463867188, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.04392363562206849, | |
| "grad_norm": 0.21207420527935028, | |
| "learning_rate": 4.929486350109976e-05, | |
| "loss": 1.1813325881958008, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.045178596639841875, | |
| "grad_norm": 0.2317555546760559, | |
| "learning_rate": 4.92301720791823e-05, | |
| "loss": 1.2024839401245118, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.04643355765761526, | |
| "grad_norm": 0.22415006160736084, | |
| "learning_rate": 4.916548065726485e-05, | |
| "loss": 1.1521187782287599, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.04768851867538865, | |
| "grad_norm": 0.2794686257839203, | |
| "learning_rate": 4.910078923534739e-05, | |
| "loss": 1.1735503196716308, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.04894347969316203, | |
| "grad_norm": 0.20266857743263245, | |
| "learning_rate": 4.9036097813429946e-05, | |
| "loss": 1.1360593795776368, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.05019844071093542, | |
| "grad_norm": 0.24181729555130005, | |
| "learning_rate": 4.8971406391512486e-05, | |
| "loss": 1.1604348182678224, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.051453401728708804, | |
| "grad_norm": 0.23283515870571136, | |
| "learning_rate": 4.890671496959503e-05, | |
| "loss": 1.1582366943359375, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.05270836274648219, | |
| "grad_norm": 0.2232106328010559, | |
| "learning_rate": 4.884202354767758e-05, | |
| "loss": 1.1357643127441406, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.053963323764255576, | |
| "grad_norm": 0.20864224433898926, | |
| "learning_rate": 4.8777332125760126e-05, | |
| "loss": 1.1960598945617675, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.055218284782028955, | |
| "grad_norm": 0.21834968030452728, | |
| "learning_rate": 4.871264070384267e-05, | |
| "loss": 1.1870046615600587, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.05647324579980234, | |
| "grad_norm": 0.22693583369255066, | |
| "learning_rate": 4.864794928192522e-05, | |
| "loss": 1.1584319114685058, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.057728206817575727, | |
| "grad_norm": 0.22258850932121277, | |
| "learning_rate": 4.8583257860007766e-05, | |
| "loss": 1.1739194869995118, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.05898316783534911, | |
| "grad_norm": 0.21987426280975342, | |
| "learning_rate": 4.851856643809031e-05, | |
| "loss": 1.2010994911193849, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.0602381288531225, | |
| "grad_norm": 0.21596240997314453, | |
| "learning_rate": 4.845387501617285e-05, | |
| "loss": 1.0946701049804688, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.061493089870895884, | |
| "grad_norm": 0.18262015283107758, | |
| "learning_rate": 4.8389183594255406e-05, | |
| "loss": 1.1652299880981445, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.06274805088866928, | |
| "grad_norm": 0.22206667065620422, | |
| "learning_rate": 4.8324492172337946e-05, | |
| "loss": 1.1554561614990235, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.06400301190644266, | |
| "grad_norm": 0.19047950208187103, | |
| "learning_rate": 4.82598007504205e-05, | |
| "loss": 1.1874130249023438, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.06525797292421605, | |
| "grad_norm": 0.23143501579761505, | |
| "learning_rate": 4.819510932850304e-05, | |
| "loss": 1.1211513519287108, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.06651293394198943, | |
| "grad_norm": 0.21261698007583618, | |
| "learning_rate": 4.8130417906585586e-05, | |
| "loss": 1.1845171928405762, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.0677678949597628, | |
| "grad_norm": 0.22298000752925873, | |
| "learning_rate": 4.806572648466813e-05, | |
| "loss": 1.1463261604309083, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.0690228559775362, | |
| "grad_norm": 0.20260395109653473, | |
| "learning_rate": 4.800103506275068e-05, | |
| "loss": 1.1507064819335937, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.07027781699530958, | |
| "grad_norm": 0.22262270748615265, | |
| "learning_rate": 4.793634364083323e-05, | |
| "loss": 1.1626813888549805, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.07153277801308297, | |
| "grad_norm": 0.18661239743232727, | |
| "learning_rate": 4.7871652218915773e-05, | |
| "loss": 1.1173880577087403, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.07278773903085635, | |
| "grad_norm": 0.17793259024620056, | |
| "learning_rate": 4.780696079699832e-05, | |
| "loss": 1.182445240020752, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.07404270004862974, | |
| "grad_norm": 0.22001393139362335, | |
| "learning_rate": 4.774226937508087e-05, | |
| "loss": 1.0757221221923827, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.07529766106640312, | |
| "grad_norm": 0.16966955363750458, | |
| "learning_rate": 4.7677577953163414e-05, | |
| "loss": 1.1065872192382813, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.07655262208417651, | |
| "grad_norm": 0.18004654347896576, | |
| "learning_rate": 4.761288653124596e-05, | |
| "loss": 1.1050881385803222, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.07780758310194989, | |
| "grad_norm": 0.21302928030490875, | |
| "learning_rate": 4.75481951093285e-05, | |
| "loss": 1.1550076484680176, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.07906254411972329, | |
| "grad_norm": 0.19082143902778625, | |
| "learning_rate": 4.7483503687411054e-05, | |
| "loss": 1.133737564086914, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.08031750513749666, | |
| "grad_norm": 0.18166916072368622, | |
| "learning_rate": 4.7418812265493594e-05, | |
| "loss": 1.1011496543884278, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.08157246615527006, | |
| "grad_norm": 0.2051081359386444, | |
| "learning_rate": 4.735412084357615e-05, | |
| "loss": 1.2461429595947267, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.08282742717304344, | |
| "grad_norm": 0.1807452291250229, | |
| "learning_rate": 4.7289429421658694e-05, | |
| "loss": 1.160153579711914, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.08408238819081683, | |
| "grad_norm": 0.2180749773979187, | |
| "learning_rate": 4.7224737999741234e-05, | |
| "loss": 1.1506930351257325, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.08533734920859021, | |
| "grad_norm": 0.18058760464191437, | |
| "learning_rate": 4.716004657782379e-05, | |
| "loss": 1.1412607192993165, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.0865923102263636, | |
| "grad_norm": 0.1865541785955429, | |
| "learning_rate": 4.709535515590633e-05, | |
| "loss": 1.0827465057373047, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.08784727124413698, | |
| "grad_norm": 0.20097248256206512, | |
| "learning_rate": 4.703066373398888e-05, | |
| "loss": 1.1497159004211426, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.08910223226191036, | |
| "grad_norm": 0.2066005915403366, | |
| "learning_rate": 4.696597231207142e-05, | |
| "loss": 1.1188447952270508, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.09035719327968375, | |
| "grad_norm": 0.23538357019424438, | |
| "learning_rate": 4.690128089015397e-05, | |
| "loss": 1.1101722717285156, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.09161215429745713, | |
| "grad_norm": 0.2259693592786789, | |
| "learning_rate": 4.6836589468236515e-05, | |
| "loss": 1.1163352966308593, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.09286711531523052, | |
| "grad_norm": 0.1654050350189209, | |
| "learning_rate": 4.677189804631906e-05, | |
| "loss": 1.0785648345947265, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.0941220763330039, | |
| "grad_norm": 0.1840600073337555, | |
| "learning_rate": 4.670720662440161e-05, | |
| "loss": 1.114925003051758, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.0953770373507773, | |
| "grad_norm": 0.22095882892608643, | |
| "learning_rate": 4.6642515202484155e-05, | |
| "loss": 1.1339808464050294, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.09663199836855067, | |
| "grad_norm": 0.18420755863189697, | |
| "learning_rate": 4.65778237805667e-05, | |
| "loss": 1.1255131721496583, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.09788695938632407, | |
| "grad_norm": 0.20250706374645233, | |
| "learning_rate": 4.651313235864925e-05, | |
| "loss": 1.0625088691711426, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.09914192040409744, | |
| "grad_norm": 0.20630766451358795, | |
| "learning_rate": 4.644844093673179e-05, | |
| "loss": 1.0966971397399903, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.10039688142187084, | |
| "grad_norm": 0.19232341647148132, | |
| "learning_rate": 4.638374951481434e-05, | |
| "loss": 1.1137746810913085, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.10165184243964422, | |
| "grad_norm": 0.22812579572200775, | |
| "learning_rate": 4.631905809289688e-05, | |
| "loss": 1.157925796508789, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.10290680345741761, | |
| "grad_norm": 0.17667317390441895, | |
| "learning_rate": 4.6254366670979435e-05, | |
| "loss": 1.1550764083862304, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.10416176447519099, | |
| "grad_norm": 0.18233029544353485, | |
| "learning_rate": 4.6189675249061975e-05, | |
| "loss": 1.1150010108947754, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.10541672549296438, | |
| "grad_norm": 0.20302756130695343, | |
| "learning_rate": 4.612498382714452e-05, | |
| "loss": 1.0721470832824707, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.10667168651073776, | |
| "grad_norm": 0.2043253630399704, | |
| "learning_rate": 4.606029240522707e-05, | |
| "loss": 1.1333942413330078, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.10792664752851115, | |
| "grad_norm": 0.19231152534484863, | |
| "learning_rate": 4.5995600983309616e-05, | |
| "loss": 1.1027427673339845, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.10918160854628453, | |
| "grad_norm": 0.20718224346637726, | |
| "learning_rate": 4.593090956139216e-05, | |
| "loss": 1.1255599975585937, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.11043656956405791, | |
| "grad_norm": 0.1926298588514328, | |
| "learning_rate": 4.586621813947471e-05, | |
| "loss": 1.1083927154541016, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.1116915305818313, | |
| "grad_norm": 0.2107614278793335, | |
| "learning_rate": 4.5801526717557256e-05, | |
| "loss": 1.0679313659667968, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.11294649159960468, | |
| "grad_norm": 0.1800786405801773, | |
| "learning_rate": 4.57368352956398e-05, | |
| "loss": 1.2032075881958009, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.11420145261737807, | |
| "grad_norm": 0.18883837759494781, | |
| "learning_rate": 4.567214387372234e-05, | |
| "loss": 1.1553642272949218, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.11545641363515145, | |
| "grad_norm": 0.17952467501163483, | |
| "learning_rate": 4.5607452451804896e-05, | |
| "loss": 1.151566505432129, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.11671137465292485, | |
| "grad_norm": 0.18356452882289886, | |
| "learning_rate": 4.5542761029887436e-05, | |
| "loss": 1.1210906028747558, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.11796633567069822, | |
| "grad_norm": 0.19210585951805115, | |
| "learning_rate": 4.547806960796999e-05, | |
| "loss": 1.1327272415161134, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.11922129668847162, | |
| "grad_norm": 0.2031843215227127, | |
| "learning_rate": 4.541337818605253e-05, | |
| "loss": 1.1083773612976073, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.120476257706245, | |
| "grad_norm": 0.16328449547290802, | |
| "learning_rate": 4.5348686764135076e-05, | |
| "loss": 1.163971519470215, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.12173121872401839, | |
| "grad_norm": 0.17702428996562958, | |
| "learning_rate": 4.528399534221762e-05, | |
| "loss": 1.1527894973754882, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.12298617974179177, | |
| "grad_norm": 0.1729395091533661, | |
| "learning_rate": 4.521930392030017e-05, | |
| "loss": 1.1400185585021974, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.12424114075956516, | |
| "grad_norm": 0.17520761489868164, | |
| "learning_rate": 4.5154612498382716e-05, | |
| "loss": 1.0990144729614257, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.12549610177733855, | |
| "grad_norm": 0.18720504641532898, | |
| "learning_rate": 4.508992107646526e-05, | |
| "loss": 1.1631937980651856, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.12675106279511192, | |
| "grad_norm": 0.19207048416137695, | |
| "learning_rate": 4.502522965454781e-05, | |
| "loss": 1.1027806282043457, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.1280060238128853, | |
| "grad_norm": 0.1836109161376953, | |
| "learning_rate": 4.496053823263036e-05, | |
| "loss": 1.1294532775878907, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.1292609848306587, | |
| "grad_norm": 0.17419582605361938, | |
| "learning_rate": 4.48958468107129e-05, | |
| "loss": 1.1544736862182616, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.1305159458484321, | |
| "grad_norm": 0.20314334332942963, | |
| "learning_rate": 4.483115538879545e-05, | |
| "loss": 1.204758644104004, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.13177090686620546, | |
| "grad_norm": 0.17511610686779022, | |
| "learning_rate": 4.476646396687799e-05, | |
| "loss": 1.107564353942871, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.13302586788397885, | |
| "grad_norm": 0.215122252702713, | |
| "learning_rate": 4.4701772544960544e-05, | |
| "loss": 1.2063298225402832, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.13428082890175225, | |
| "grad_norm": 0.17670315504074097, | |
| "learning_rate": 4.4637081123043084e-05, | |
| "loss": 1.1757978439331054, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.1355357899195256, | |
| "grad_norm": 0.18765057623386383, | |
| "learning_rate": 4.457238970112563e-05, | |
| "loss": 1.1447077751159669, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.136790750937299, | |
| "grad_norm": 0.5525104403495789, | |
| "learning_rate": 4.450769827920818e-05, | |
| "loss": 1.1048051834106445, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.1380457119550724, | |
| "grad_norm": 0.19476006925106049, | |
| "learning_rate": 4.4443006857290724e-05, | |
| "loss": 1.1642623901367188, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.1393006729728458, | |
| "grad_norm": 0.17488926649093628, | |
| "learning_rate": 4.437831543537328e-05, | |
| "loss": 1.0796038627624511, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.14055563399061916, | |
| "grad_norm": 0.20637401938438416, | |
| "learning_rate": 4.431362401345582e-05, | |
| "loss": 1.1843393325805665, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.14181059500839255, | |
| "grad_norm": 0.22406329214572906, | |
| "learning_rate": 4.4248932591538364e-05, | |
| "loss": 1.1924137115478515, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.14306555602616594, | |
| "grad_norm": 0.20387622714042664, | |
| "learning_rate": 4.418424116962091e-05, | |
| "loss": 1.1867655754089355, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.14432051704393933, | |
| "grad_norm": 0.1762738674879074, | |
| "learning_rate": 4.411954974770346e-05, | |
| "loss": 1.0975682258605957, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.1455754780617127, | |
| "grad_norm": 0.21380206942558289, | |
| "learning_rate": 4.4054858325786004e-05, | |
| "loss": 1.1449914932250977, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.1468304390794861, | |
| "grad_norm": 0.19425804913043976, | |
| "learning_rate": 4.399016690386855e-05, | |
| "loss": 1.109062099456787, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.14808540009725948, | |
| "grad_norm": 0.18465885519981384, | |
| "learning_rate": 4.39254754819511e-05, | |
| "loss": 1.0944741249084473, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.14934036111503288, | |
| "grad_norm": 0.18625572323799133, | |
| "learning_rate": 4.3860784060033645e-05, | |
| "loss": 1.100302505493164, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.15059532213280624, | |
| "grad_norm": 0.19639429450035095, | |
| "learning_rate": 4.3796092638116185e-05, | |
| "loss": 1.1336475372314454, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.15185028315057963, | |
| "grad_norm": 0.17792847752571106, | |
| "learning_rate": 4.373140121619874e-05, | |
| "loss": 1.1655400276184082, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.15310524416835303, | |
| "grad_norm": 0.19248999655246735, | |
| "learning_rate": 4.366670979428128e-05, | |
| "loss": 1.0734170913696288, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.1543602051861264, | |
| "grad_norm": 0.16676078736782074, | |
| "learning_rate": 4.360201837236383e-05, | |
| "loss": 1.1042096138000488, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.15561516620389979, | |
| "grad_norm": 0.2144252061843872, | |
| "learning_rate": 4.353732695044637e-05, | |
| "loss": 1.086879348754883, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.15687012722167318, | |
| "grad_norm": 0.191383957862854, | |
| "learning_rate": 4.347263552852892e-05, | |
| "loss": 1.1877761840820313, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.15812508823944657, | |
| "grad_norm": 0.1510523408651352, | |
| "learning_rate": 4.3407944106611465e-05, | |
| "loss": 1.0977872848510741, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.15938004925721994, | |
| "grad_norm": 0.21486221253871918, | |
| "learning_rate": 4.334325268469401e-05, | |
| "loss": 1.1684078216552733, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.16063501027499333, | |
| "grad_norm": 0.20981676876544952, | |
| "learning_rate": 4.327856126277656e-05, | |
| "loss": 1.1553811073303222, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.16188997129276672, | |
| "grad_norm": 0.17923256754875183, | |
| "learning_rate": 4.3213869840859105e-05, | |
| "loss": 1.084774875640869, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.16314493231054011, | |
| "grad_norm": 0.16394007205963135, | |
| "learning_rate": 4.314917841894165e-05, | |
| "loss": 1.1058798789978028, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.16439989332831348, | |
| "grad_norm": 0.18293628096580505, | |
| "learning_rate": 4.30844869970242e-05, | |
| "loss": 1.1011856079101563, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.16565485434608687, | |
| "grad_norm": 0.15914376080036163, | |
| "learning_rate": 4.301979557510674e-05, | |
| "loss": 1.182911205291748, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.16690981536386026, | |
| "grad_norm": 0.19914117455482483, | |
| "learning_rate": 4.295510415318929e-05, | |
| "loss": 1.138012981414795, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.16816477638163366, | |
| "grad_norm": 0.177483931183815, | |
| "learning_rate": 4.289041273127183e-05, | |
| "loss": 1.1617022514343263, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.16941973739940702, | |
| "grad_norm": 0.18268829584121704, | |
| "learning_rate": 4.2825721309354386e-05, | |
| "loss": 1.1040393829345703, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.17067469841718041, | |
| "grad_norm": 0.1875396966934204, | |
| "learning_rate": 4.2761029887436926e-05, | |
| "loss": 1.1711323738098145, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.1719296594349538, | |
| "grad_norm": 0.20280130207538605, | |
| "learning_rate": 4.269633846551947e-05, | |
| "loss": 1.0739377021789551, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.1731846204527272, | |
| "grad_norm": 0.17182214558124542, | |
| "learning_rate": 4.263164704360202e-05, | |
| "loss": 1.1563935279846191, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.17443958147050057, | |
| "grad_norm": 0.1958380937576294, | |
| "learning_rate": 4.2566955621684566e-05, | |
| "loss": 1.152635383605957, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.17569454248827396, | |
| "grad_norm": 0.17894886434078217, | |
| "learning_rate": 4.250226419976711e-05, | |
| "loss": 1.129916000366211, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.17694950350604735, | |
| "grad_norm": 0.1785098761320114, | |
| "learning_rate": 4.243757277784966e-05, | |
| "loss": 1.1258666038513183, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.17820446452382072, | |
| "grad_norm": 0.19660112261772156, | |
| "learning_rate": 4.2372881355932206e-05, | |
| "loss": 1.083286666870117, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.1794594255415941, | |
| "grad_norm": 0.1892261505126953, | |
| "learning_rate": 4.230818993401475e-05, | |
| "loss": 1.12530574798584, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.1807143865593675, | |
| "grad_norm": 0.22547674179077148, | |
| "learning_rate": 4.224349851209729e-05, | |
| "loss": 1.1078373908996582, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.1819693475771409, | |
| "grad_norm": 0.16683579981327057, | |
| "learning_rate": 4.2178807090179846e-05, | |
| "loss": 1.075201416015625, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.18322430859491426, | |
| "grad_norm": 0.19976581633090973, | |
| "learning_rate": 4.2114115668262386e-05, | |
| "loss": 1.0396580696105957, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.18447926961268765, | |
| "grad_norm": 0.18316680192947388, | |
| "learning_rate": 4.204942424634494e-05, | |
| "loss": 1.1359598159790039, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.18573423063046104, | |
| "grad_norm": 0.2085312008857727, | |
| "learning_rate": 4.198473282442748e-05, | |
| "loss": 1.1656570434570312, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.18698919164823444, | |
| "grad_norm": 0.20343416929244995, | |
| "learning_rate": 4.192004140251003e-05, | |
| "loss": 1.110813522338867, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.1882441526660078, | |
| "grad_norm": 0.19183684885501862, | |
| "learning_rate": 4.1855349980592573e-05, | |
| "loss": 1.0822998046875, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.1894991136837812, | |
| "grad_norm": 0.23563328385353088, | |
| "learning_rate": 4.179065855867512e-05, | |
| "loss": 1.0753483772277832, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.1907540747015546, | |
| "grad_norm": 0.1735006421804428, | |
| "learning_rate": 4.172596713675767e-05, | |
| "loss": 1.0641088485717773, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.19200903571932798, | |
| "grad_norm": 0.1672070324420929, | |
| "learning_rate": 4.1661275714840214e-05, | |
| "loss": 1.202446174621582, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.19326399673710135, | |
| "grad_norm": 0.19555087387561798, | |
| "learning_rate": 4.159658429292276e-05, | |
| "loss": 1.0974900245666503, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.19451895775487474, | |
| "grad_norm": 0.1727800965309143, | |
| "learning_rate": 4.153189287100531e-05, | |
| "loss": 1.0884978294372558, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.19577391877264813, | |
| "grad_norm": 0.1973794847726822, | |
| "learning_rate": 4.146720144908785e-05, | |
| "loss": 1.195762825012207, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.19702887979042152, | |
| "grad_norm": 0.20883244276046753, | |
| "learning_rate": 4.14025100271704e-05, | |
| "loss": 1.1073166847229003, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.1982838408081949, | |
| "grad_norm": 0.1726527363061905, | |
| "learning_rate": 4.133781860525295e-05, | |
| "loss": 1.088674545288086, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.19953880182596828, | |
| "grad_norm": 0.15648233890533447, | |
| "learning_rate": 4.1273127183335494e-05, | |
| "loss": 1.1345646858215332, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.20079376284374167, | |
| "grad_norm": 0.1888463795185089, | |
| "learning_rate": 4.120843576141804e-05, | |
| "loss": 1.09979190826416, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.20204872386151504, | |
| "grad_norm": 0.19210949540138245, | |
| "learning_rate": 4.114374433950058e-05, | |
| "loss": 1.1058323860168457, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.20330368487928843, | |
| "grad_norm": 0.19619908928871155, | |
| "learning_rate": 4.1079052917583134e-05, | |
| "loss": 1.1319670677185059, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.20455864589706182, | |
| "grad_norm": 0.1664625108242035, | |
| "learning_rate": 4.1014361495665674e-05, | |
| "loss": 1.0515789985656738, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.20581360691483522, | |
| "grad_norm": 0.19872358441352844, | |
| "learning_rate": 4.094967007374823e-05, | |
| "loss": 1.1266546249389648, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.20706856793260858, | |
| "grad_norm": 0.1681402623653412, | |
| "learning_rate": 4.088497865183077e-05, | |
| "loss": 1.1066089630126954, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.20832352895038198, | |
| "grad_norm": 0.1954943835735321, | |
| "learning_rate": 4.0820287229913315e-05, | |
| "loss": 1.1911808013916017, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.20957848996815537, | |
| "grad_norm": 0.17476460337638855, | |
| "learning_rate": 4.075559580799586e-05, | |
| "loss": 1.038119125366211, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.21083345098592876, | |
| "grad_norm": 0.18508611619472504, | |
| "learning_rate": 4.069090438607841e-05, | |
| "loss": 1.0409876823425293, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.21208841200370213, | |
| "grad_norm": 0.20123597979545593, | |
| "learning_rate": 4.0626212964160955e-05, | |
| "loss": 1.0861782073974608, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.21334337302147552, | |
| "grad_norm": 0.2034509927034378, | |
| "learning_rate": 4.05615215422435e-05, | |
| "loss": 1.1509716033935546, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.2145983340392489, | |
| "grad_norm": 0.16378086805343628, | |
| "learning_rate": 4.049683012032605e-05, | |
| "loss": 1.1095640182495117, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.2158532950570223, | |
| "grad_norm": 0.2010604441165924, | |
| "learning_rate": 4.0432138698408595e-05, | |
| "loss": 1.0584364891052247, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.21710825607479567, | |
| "grad_norm": 0.1633748859167099, | |
| "learning_rate": 4.0367447276491135e-05, | |
| "loss": 1.0981843948364258, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.21836321709256906, | |
| "grad_norm": 0.1888664811849594, | |
| "learning_rate": 4.030275585457369e-05, | |
| "loss": 1.0630824089050293, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.21961817811034245, | |
| "grad_norm": 0.18354853987693787, | |
| "learning_rate": 4.023806443265623e-05, | |
| "loss": 1.1649972915649414, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.22087313912811582, | |
| "grad_norm": 0.18206484615802765, | |
| "learning_rate": 4.017337301073878e-05, | |
| "loss": 1.128775691986084, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.2221281001458892, | |
| "grad_norm": 0.20967255532741547, | |
| "learning_rate": 4.010868158882132e-05, | |
| "loss": 1.0991705894470214, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.2233830611636626, | |
| "grad_norm": 0.173291876912117, | |
| "learning_rate": 4.004399016690387e-05, | |
| "loss": 1.133495807647705, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.224638022181436, | |
| "grad_norm": 0.18495796620845795, | |
| "learning_rate": 3.9979298744986416e-05, | |
| "loss": 1.1338909149169922, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.22589298319920936, | |
| "grad_norm": 0.15466730296611786, | |
| "learning_rate": 3.991460732306896e-05, | |
| "loss": 1.103053092956543, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.22714794421698276, | |
| "grad_norm": 0.2066875696182251, | |
| "learning_rate": 3.984991590115151e-05, | |
| "loss": 1.0687838554382325, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.22840290523475615, | |
| "grad_norm": 0.1611821949481964, | |
| "learning_rate": 3.9785224479234056e-05, | |
| "loss": 1.0882232666015625, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.22965786625252954, | |
| "grad_norm": 0.21903353929519653, | |
| "learning_rate": 3.97205330573166e-05, | |
| "loss": 1.1616278648376466, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.2309128272703029, | |
| "grad_norm": 0.1864372193813324, | |
| "learning_rate": 3.965584163539915e-05, | |
| "loss": 1.0734145164489746, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.2321677882880763, | |
| "grad_norm": 0.18822331726551056, | |
| "learning_rate": 3.959115021348169e-05, | |
| "loss": 1.082595157623291, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.2334227493058497, | |
| "grad_norm": 0.17215979099273682, | |
| "learning_rate": 3.952645879156424e-05, | |
| "loss": 1.0656033515930177, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.23467771032362308, | |
| "grad_norm": 0.19856449961662292, | |
| "learning_rate": 3.946176736964678e-05, | |
| "loss": 1.0940080642700196, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.23593267134139645, | |
| "grad_norm": 0.19286569952964783, | |
| "learning_rate": 3.9397075947729336e-05, | |
| "loss": 1.1294413566589356, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.23718763235916984, | |
| "grad_norm": 0.21396370232105255, | |
| "learning_rate": 3.9332384525811876e-05, | |
| "loss": 1.1367197990417481, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.23844259337694323, | |
| "grad_norm": 0.19455762207508087, | |
| "learning_rate": 3.926769310389442e-05, | |
| "loss": 1.117790412902832, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.23969755439471663, | |
| "grad_norm": 0.19210918247699738, | |
| "learning_rate": 3.920300168197697e-05, | |
| "loss": 1.078984260559082, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.24095251541249, | |
| "grad_norm": 0.17716605961322784, | |
| "learning_rate": 3.9138310260059516e-05, | |
| "loss": 1.1049820899963378, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.24220747643026339, | |
| "grad_norm": 0.1894821971654892, | |
| "learning_rate": 3.907361883814206e-05, | |
| "loss": 1.1215306282043458, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.24346243744803678, | |
| "grad_norm": 0.16090793907642365, | |
| "learning_rate": 3.900892741622461e-05, | |
| "loss": 1.0604351997375487, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.24471739846581014, | |
| "grad_norm": 0.16751867532730103, | |
| "learning_rate": 3.894423599430716e-05, | |
| "loss": 1.1072792053222655, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.24597235948358354, | |
| "grad_norm": 0.18883423507213593, | |
| "learning_rate": 3.8879544572389703e-05, | |
| "loss": 1.1744267463684082, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.24722732050135693, | |
| "grad_norm": 0.17353016138076782, | |
| "learning_rate": 3.881485315047225e-05, | |
| "loss": 1.1146465301513673, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.24848228151913032, | |
| "grad_norm": 0.17569345235824585, | |
| "learning_rate": 3.87501617285548e-05, | |
| "loss": 1.1410762786865234, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.2497372425369037, | |
| "grad_norm": 0.1861223578453064, | |
| "learning_rate": 3.868547030663734e-05, | |
| "loss": 1.1510584831237793, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.2509922035546771, | |
| "grad_norm": 0.1848827749490738, | |
| "learning_rate": 3.862077888471989e-05, | |
| "loss": 1.159224510192871, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.25224716457245044, | |
| "grad_norm": 0.17827710509300232, | |
| "learning_rate": 3.855608746280243e-05, | |
| "loss": 1.0784708976745605, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.25350212559022384, | |
| "grad_norm": 0.2000972479581833, | |
| "learning_rate": 3.849139604088498e-05, | |
| "loss": 1.0981005668640136, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.25475708660799723, | |
| "grad_norm": 0.17901651561260223, | |
| "learning_rate": 3.842670461896753e-05, | |
| "loss": 1.110117530822754, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.2560120476257706, | |
| "grad_norm": 0.16541555523872375, | |
| "learning_rate": 3.836201319705007e-05, | |
| "loss": 1.100083065032959, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.257267008643544, | |
| "grad_norm": 0.1809515506029129, | |
| "learning_rate": 3.8297321775132624e-05, | |
| "loss": 1.0859627723693848, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.2585219696613174, | |
| "grad_norm": 0.17363417148590088, | |
| "learning_rate": 3.8232630353215164e-05, | |
| "loss": 1.1074336051940918, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.2597769306790908, | |
| "grad_norm": 0.19632141292095184, | |
| "learning_rate": 3.816793893129771e-05, | |
| "loss": 1.092854881286621, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.2610318916968642, | |
| "grad_norm": 0.18531949818134308, | |
| "learning_rate": 3.810324750938026e-05, | |
| "loss": 1.1531734466552734, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.26228685271463753, | |
| "grad_norm": 0.2005389928817749, | |
| "learning_rate": 3.8038556087462804e-05, | |
| "loss": 1.0838706016540527, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.2635418137324109, | |
| "grad_norm": 0.2321166694164276, | |
| "learning_rate": 3.797386466554535e-05, | |
| "loss": 1.1546616554260254, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.2647967747501843, | |
| "grad_norm": 0.1765083223581314, | |
| "learning_rate": 3.79091732436279e-05, | |
| "loss": 1.0811034202575684, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.2660517357679577, | |
| "grad_norm": 0.18589410185813904, | |
| "learning_rate": 3.7844481821710445e-05, | |
| "loss": 1.0837715148925782, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.2673066967857311, | |
| "grad_norm": 0.17447498440742493, | |
| "learning_rate": 3.777979039979299e-05, | |
| "loss": 1.124140453338623, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.2685616578035045, | |
| "grad_norm": 0.18152177333831787, | |
| "learning_rate": 3.771509897787554e-05, | |
| "loss": 1.1175949096679687, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.2698166188212779, | |
| "grad_norm": 0.18476802110671997, | |
| "learning_rate": 3.7650407555958085e-05, | |
| "loss": 1.109239387512207, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.2710715798390512, | |
| "grad_norm": 0.1941436380147934, | |
| "learning_rate": 3.7585716134040625e-05, | |
| "loss": 1.121135139465332, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.2723265408568246, | |
| "grad_norm": 0.21705880761146545, | |
| "learning_rate": 3.752102471212318e-05, | |
| "loss": 1.0808409690856933, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.273581501874598, | |
| "grad_norm": 0.20270583033561707, | |
| "learning_rate": 3.745633329020572e-05, | |
| "loss": 1.0374409675598144, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.2748364628923714, | |
| "grad_norm": 0.1888822466135025, | |
| "learning_rate": 3.739164186828827e-05, | |
| "loss": 1.1372817039489747, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.2760914239101448, | |
| "grad_norm": 0.18205349147319794, | |
| "learning_rate": 3.732695044637081e-05, | |
| "loss": 1.063914966583252, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.2773463849279182, | |
| "grad_norm": 0.18658319115638733, | |
| "learning_rate": 3.726225902445336e-05, | |
| "loss": 1.0600374221801758, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.2786013459456916, | |
| "grad_norm": 0.17652806639671326, | |
| "learning_rate": 3.7197567602535905e-05, | |
| "loss": 1.175550937652588, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.279856306963465, | |
| "grad_norm": 0.3145999312400818, | |
| "learning_rate": 3.713287618061845e-05, | |
| "loss": 1.1401835441589356, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.2811112679812383, | |
| "grad_norm": 0.19413627684116364, | |
| "learning_rate": 3.7068184758701e-05, | |
| "loss": 1.0594032287597657, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.2823662289990117, | |
| "grad_norm": 0.19201666116714478, | |
| "learning_rate": 3.7003493336783546e-05, | |
| "loss": 1.1342046737670899, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.2836211900167851, | |
| "grad_norm": 0.15544365346431732, | |
| "learning_rate": 3.693880191486609e-05, | |
| "loss": 1.050437831878662, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.2848761510345585, | |
| "grad_norm": 0.19418646395206451, | |
| "learning_rate": 3.687411049294864e-05, | |
| "loss": 1.1457019805908204, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.2861311120523319, | |
| "grad_norm": 0.1728999763727188, | |
| "learning_rate": 3.680941907103118e-05, | |
| "loss": 1.0303121566772462, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.2873860730701053, | |
| "grad_norm": 0.17530235648155212, | |
| "learning_rate": 3.674472764911373e-05, | |
| "loss": 1.056619358062744, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.28864103408787867, | |
| "grad_norm": 0.17840541899204254, | |
| "learning_rate": 3.668003622719627e-05, | |
| "loss": 1.1103222846984864, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.289895995105652, | |
| "grad_norm": 0.1811124086380005, | |
| "learning_rate": 3.6615344805278826e-05, | |
| "loss": 1.0357528686523438, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.2911509561234254, | |
| "grad_norm": 0.18500889837741852, | |
| "learning_rate": 3.6550653383361366e-05, | |
| "loss": 1.0739376068115234, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.2924059171411988, | |
| "grad_norm": 0.1825813204050064, | |
| "learning_rate": 3.648596196144391e-05, | |
| "loss": 1.0761238098144532, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.2936608781589722, | |
| "grad_norm": 0.16683438420295715, | |
| "learning_rate": 3.642127053952646e-05, | |
| "loss": 1.1081546783447265, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.2949158391767456, | |
| "grad_norm": 0.2046334445476532, | |
| "learning_rate": 3.6356579117609006e-05, | |
| "loss": 1.1215451240539551, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.29617080019451897, | |
| "grad_norm": 0.16321095824241638, | |
| "learning_rate": 3.629188769569155e-05, | |
| "loss": 1.0684563636779785, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.29742576121229236, | |
| "grad_norm": 0.17285047471523285, | |
| "learning_rate": 3.62271962737741e-05, | |
| "loss": 1.117433452606201, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.29868072223006575, | |
| "grad_norm": 0.17733103036880493, | |
| "learning_rate": 3.6162504851856646e-05, | |
| "loss": 1.0875020980834962, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.2999356832478391, | |
| "grad_norm": 0.20012889802455902, | |
| "learning_rate": 3.609781342993919e-05, | |
| "loss": 1.090438175201416, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.3011906442656125, | |
| "grad_norm": 0.2170931100845337, | |
| "learning_rate": 3.603312200802173e-05, | |
| "loss": 1.1388078689575196, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.3024456052833859, | |
| "grad_norm": 0.1782235950231552, | |
| "learning_rate": 3.596843058610429e-05, | |
| "loss": 1.096130657196045, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.30370056630115927, | |
| "grad_norm": 0.1945246011018753, | |
| "learning_rate": 3.590373916418683e-05, | |
| "loss": 1.1477142333984376, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.30495552731893266, | |
| "grad_norm": 0.18570466339588165, | |
| "learning_rate": 3.583904774226938e-05, | |
| "loss": 1.1268895149230957, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.30621048833670605, | |
| "grad_norm": 0.18892186880111694, | |
| "learning_rate": 3.577435632035192e-05, | |
| "loss": 1.0669918060302734, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.30746544935447945, | |
| "grad_norm": 0.1974097490310669, | |
| "learning_rate": 3.570966489843447e-05, | |
| "loss": 1.1709393501281737, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.3087204103722528, | |
| "grad_norm": 0.24947650730609894, | |
| "learning_rate": 3.5644973476517014e-05, | |
| "loss": 1.141524314880371, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.3099753713900262, | |
| "grad_norm": 0.20610560476779938, | |
| "learning_rate": 3.558028205459956e-05, | |
| "loss": 1.1172548294067384, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.31123033240779957, | |
| "grad_norm": 0.1904863864183426, | |
| "learning_rate": 3.551559063268211e-05, | |
| "loss": 1.092203712463379, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.31248529342557296, | |
| "grad_norm": 0.20900042355060577, | |
| "learning_rate": 3.5450899210764654e-05, | |
| "loss": 1.164710235595703, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.31374025444334636, | |
| "grad_norm": 0.19653667509555817, | |
| "learning_rate": 3.53862077888472e-05, | |
| "loss": 1.1293628692626954, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.31499521546111975, | |
| "grad_norm": 0.1921471506357193, | |
| "learning_rate": 3.532151636692975e-05, | |
| "loss": 1.0006118774414063, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.31625017647889314, | |
| "grad_norm": 0.17006602883338928, | |
| "learning_rate": 3.5256824945012294e-05, | |
| "loss": 1.1141504287719726, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.31750513749666653, | |
| "grad_norm": 0.19610007107257843, | |
| "learning_rate": 3.519213352309484e-05, | |
| "loss": 1.0875147819519042, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.31876009851443987, | |
| "grad_norm": 0.18897046148777008, | |
| "learning_rate": 3.512744210117739e-05, | |
| "loss": 1.1723553657531738, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.32001505953221326, | |
| "grad_norm": 0.2167101353406906, | |
| "learning_rate": 3.5062750679259934e-05, | |
| "loss": 1.1133469581604003, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.32127002054998666, | |
| "grad_norm": 0.19870373606681824, | |
| "learning_rate": 3.499805925734248e-05, | |
| "loss": 1.0627870559692383, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.32252498156776005, | |
| "grad_norm": 0.19916154444217682, | |
| "learning_rate": 3.493336783542502e-05, | |
| "loss": 1.1032466888427734, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.32377994258553344, | |
| "grad_norm": 0.17430101335048676, | |
| "learning_rate": 3.4868676413507575e-05, | |
| "loss": 1.0216200828552247, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.32503490360330684, | |
| "grad_norm": 0.17178688943386078, | |
| "learning_rate": 3.4803984991590115e-05, | |
| "loss": 1.072761631011963, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.32628986462108023, | |
| "grad_norm": 0.18140675127506256, | |
| "learning_rate": 3.473929356967267e-05, | |
| "loss": 1.1414005279541015, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.3275448256388536, | |
| "grad_norm": 0.19831927120685577, | |
| "learning_rate": 3.467460214775521e-05, | |
| "loss": 1.1272093772888183, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.32879978665662696, | |
| "grad_norm": 0.1755538433790207, | |
| "learning_rate": 3.4609910725837755e-05, | |
| "loss": 1.102944564819336, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.33005474767440035, | |
| "grad_norm": 0.22686068713665009, | |
| "learning_rate": 3.45452193039203e-05, | |
| "loss": 1.098013973236084, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.33130970869217374, | |
| "grad_norm": 0.15959154069423676, | |
| "learning_rate": 3.448052788200285e-05, | |
| "loss": 1.0637240409851074, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.33256466970994714, | |
| "grad_norm": 0.19935756921768188, | |
| "learning_rate": 3.4415836460085395e-05, | |
| "loss": 1.0774771690368652, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.33381963072772053, | |
| "grad_norm": 0.16935963928699493, | |
| "learning_rate": 3.435114503816794e-05, | |
| "loss": 1.0928321838378907, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.3350745917454939, | |
| "grad_norm": 0.1932937055826187, | |
| "learning_rate": 3.428645361625049e-05, | |
| "loss": 1.034721279144287, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.3363295527632673, | |
| "grad_norm": 0.20397278666496277, | |
| "learning_rate": 3.4221762194333035e-05, | |
| "loss": 1.1345463752746583, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.33758451378104065, | |
| "grad_norm": 0.1837303638458252, | |
| "learning_rate": 3.4157070772415575e-05, | |
| "loss": 1.1856120109558106, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.33883947479881404, | |
| "grad_norm": 0.2031344324350357, | |
| "learning_rate": 3.409237935049813e-05, | |
| "loss": 1.0397148132324219, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.34009443581658744, | |
| "grad_norm": 0.20356477797031403, | |
| "learning_rate": 3.402768792858067e-05, | |
| "loss": 1.0965538024902344, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.34134939683436083, | |
| "grad_norm": 0.17668817937374115, | |
| "learning_rate": 3.396299650666322e-05, | |
| "loss": 1.0809722900390626, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.3426043578521342, | |
| "grad_norm": 0.20212671160697937, | |
| "learning_rate": 3.389830508474576e-05, | |
| "loss": 1.0418371200561523, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.3438593188699076, | |
| "grad_norm": 0.17411935329437256, | |
| "learning_rate": 3.383361366282831e-05, | |
| "loss": 1.0937856674194335, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.345114279887681, | |
| "grad_norm": 0.20167027413845062, | |
| "learning_rate": 3.3768922240910856e-05, | |
| "loss": 1.0584357261657715, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.3463692409054544, | |
| "grad_norm": 0.23438484966754913, | |
| "learning_rate": 3.37042308189934e-05, | |
| "loss": 1.1240981101989747, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.34762420192322774, | |
| "grad_norm": 0.20869354903697968, | |
| "learning_rate": 3.363953939707595e-05, | |
| "loss": 1.0020330429077149, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.34887916294100113, | |
| "grad_norm": 0.17483501136302948, | |
| "learning_rate": 3.3574847975158496e-05, | |
| "loss": 1.0768984794616698, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.3501341239587745, | |
| "grad_norm": 0.19473516941070557, | |
| "learning_rate": 3.351015655324104e-05, | |
| "loss": 1.1058112144470216, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.3513890849765479, | |
| "grad_norm": 0.18327480554580688, | |
| "learning_rate": 3.344546513132359e-05, | |
| "loss": 1.1330906867980957, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.3526440459943213, | |
| "grad_norm": 0.19095930457115173, | |
| "learning_rate": 3.338077370940613e-05, | |
| "loss": 1.0920269012451171, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.3538990070120947, | |
| "grad_norm": 0.20432385802268982, | |
| "learning_rate": 3.331608228748868e-05, | |
| "loss": 1.0859003067016602, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.3551539680298681, | |
| "grad_norm": 0.17931047081947327, | |
| "learning_rate": 3.325139086557122e-05, | |
| "loss": 1.0956171989440917, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.35640892904764143, | |
| "grad_norm": 0.19674962759017944, | |
| "learning_rate": 3.3186699443653776e-05, | |
| "loss": 1.0600525856018066, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.3576638900654148, | |
| "grad_norm": 0.2171870470046997, | |
| "learning_rate": 3.3122008021736316e-05, | |
| "loss": 1.1198451042175293, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.3589188510831882, | |
| "grad_norm": 0.17714793980121613, | |
| "learning_rate": 3.305731659981886e-05, | |
| "loss": 1.0424016952514648, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.3601738121009616, | |
| "grad_norm": 0.17595025897026062, | |
| "learning_rate": 3.299262517790141e-05, | |
| "loss": 1.0837072372436523, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.361428773118735, | |
| "grad_norm": 0.1887323260307312, | |
| "learning_rate": 3.292793375598396e-05, | |
| "loss": 1.0680608749389648, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.3626837341365084, | |
| "grad_norm": 0.194399893283844, | |
| "learning_rate": 3.2863242334066503e-05, | |
| "loss": 0.9988933563232422, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.3639386951542818, | |
| "grad_norm": 0.1708306223154068, | |
| "learning_rate": 3.279855091214905e-05, | |
| "loss": 1.1282541275024414, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.3651936561720552, | |
| "grad_norm": 0.20244979858398438, | |
| "learning_rate": 3.27338594902316e-05, | |
| "loss": 1.1450789451599122, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.3664486171898285, | |
| "grad_norm": 0.1962517648935318, | |
| "learning_rate": 3.2669168068314144e-05, | |
| "loss": 1.0602560997009278, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.3677035782076019, | |
| "grad_norm": 0.20567139983177185, | |
| "learning_rate": 3.2604476646396684e-05, | |
| "loss": 1.1111217498779298, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.3689585392253753, | |
| "grad_norm": 0.20776300132274628, | |
| "learning_rate": 3.253978522447924e-05, | |
| "loss": 1.0344942092895508, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.3702135002431487, | |
| "grad_norm": 0.21871210634708405, | |
| "learning_rate": 3.2475093802561784e-05, | |
| "loss": 1.1111419677734375, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.3714684612609221, | |
| "grad_norm": 0.185679093003273, | |
| "learning_rate": 3.241040238064433e-05, | |
| "loss": 1.1700579643249511, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.3727234222786955, | |
| "grad_norm": 0.19544155895709991, | |
| "learning_rate": 3.234571095872688e-05, | |
| "loss": 1.0801254272460938, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.3739783832964689, | |
| "grad_norm": 0.20506669580936432, | |
| "learning_rate": 3.228101953680942e-05, | |
| "loss": 1.1814807891845702, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.3752333443142422, | |
| "grad_norm": 0.17181119322776794, | |
| "learning_rate": 3.221632811489197e-05, | |
| "loss": 1.0522710800170898, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.3764883053320156, | |
| "grad_norm": 0.23374006152153015, | |
| "learning_rate": 3.215163669297451e-05, | |
| "loss": 1.0902445793151856, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.377743266349789, | |
| "grad_norm": 0.21061059832572937, | |
| "learning_rate": 3.2086945271057064e-05, | |
| "loss": 1.1061354637145997, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.3789982273675624, | |
| "grad_norm": 0.18865549564361572, | |
| "learning_rate": 3.2022253849139604e-05, | |
| "loss": 1.090491485595703, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.3802531883853358, | |
| "grad_norm": 0.19853392243385315, | |
| "learning_rate": 3.195756242722215e-05, | |
| "loss": 1.1597092628479004, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.3815081494031092, | |
| "grad_norm": 0.1734134405851364, | |
| "learning_rate": 3.18928710053047e-05, | |
| "loss": 1.1168682098388671, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.38276311042088257, | |
| "grad_norm": 0.18935444951057434, | |
| "learning_rate": 3.1828179583387245e-05, | |
| "loss": 1.1608116149902343, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.38401807143865596, | |
| "grad_norm": 0.18997101485729218, | |
| "learning_rate": 3.176348816146979e-05, | |
| "loss": 1.094447135925293, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.3852730324564293, | |
| "grad_norm": 0.16133999824523926, | |
| "learning_rate": 3.169879673955234e-05, | |
| "loss": 1.1392805099487304, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.3865279934742027, | |
| "grad_norm": 0.19918449223041534, | |
| "learning_rate": 3.1634105317634885e-05, | |
| "loss": 1.1354280471801759, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.3877829544919761, | |
| "grad_norm": 0.17987719178199768, | |
| "learning_rate": 3.156941389571743e-05, | |
| "loss": 1.0349795341491699, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.3890379155097495, | |
| "grad_norm": 0.21309016644954681, | |
| "learning_rate": 3.150472247379997e-05, | |
| "loss": 1.1042888641357422, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.39029287652752287, | |
| "grad_norm": 0.17456930875778198, | |
| "learning_rate": 3.1440031051882525e-05, | |
| "loss": 1.1372068405151368, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.39154783754529626, | |
| "grad_norm": 0.17959140241146088, | |
| "learning_rate": 3.1375339629965065e-05, | |
| "loss": 1.0295021057128906, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.39280279856306966, | |
| "grad_norm": 0.21057923138141632, | |
| "learning_rate": 3.131064820804762e-05, | |
| "loss": 1.14412841796875, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.39405775958084305, | |
| "grad_norm": 0.18713043630123138, | |
| "learning_rate": 3.124595678613016e-05, | |
| "loss": 1.0776933670043944, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.3953127205986164, | |
| "grad_norm": 0.19994860887527466, | |
| "learning_rate": 3.1181265364212705e-05, | |
| "loss": 1.0793471336364746, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.3965676816163898, | |
| "grad_norm": 0.18249864876270294, | |
| "learning_rate": 3.111657394229525e-05, | |
| "loss": 1.061374568939209, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.39782264263416317, | |
| "grad_norm": 0.2102167159318924, | |
| "learning_rate": 3.10518825203778e-05, | |
| "loss": 1.0960093498229981, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.39907760365193656, | |
| "grad_norm": 0.20832930505275726, | |
| "learning_rate": 3.0987191098460346e-05, | |
| "loss": 1.033592987060547, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.40033256466970996, | |
| "grad_norm": 0.18993455171585083, | |
| "learning_rate": 3.092249967654289e-05, | |
| "loss": 1.1482149124145509, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.40158752568748335, | |
| "grad_norm": 0.16434840857982635, | |
| "learning_rate": 3.085780825462544e-05, | |
| "loss": 1.1257658004760742, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.40284248670525674, | |
| "grad_norm": 0.17940276861190796, | |
| "learning_rate": 3.0793116832707986e-05, | |
| "loss": 1.031651210784912, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.4040974477230301, | |
| "grad_norm": 0.20808145403862, | |
| "learning_rate": 3.0728425410790526e-05, | |
| "loss": 1.0851681709289551, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.40535240874080347, | |
| "grad_norm": 0.1987367421388626, | |
| "learning_rate": 3.066373398887308e-05, | |
| "loss": 1.1109742164611816, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.40660736975857686, | |
| "grad_norm": 0.19617575407028198, | |
| "learning_rate": 3.059904256695562e-05, | |
| "loss": 1.0537912368774414, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.40786233077635026, | |
| "grad_norm": 0.21639147400856018, | |
| "learning_rate": 3.053435114503817e-05, | |
| "loss": 1.125661563873291, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.40911729179412365, | |
| "grad_norm": 0.1862943321466446, | |
| "learning_rate": 3.0469659723120713e-05, | |
| "loss": 1.1534263610839843, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.41037225281189704, | |
| "grad_norm": 0.19426733255386353, | |
| "learning_rate": 3.0404968301203263e-05, | |
| "loss": 1.1007325172424316, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.41162721382967044, | |
| "grad_norm": 0.19233031570911407, | |
| "learning_rate": 3.0340276879285806e-05, | |
| "loss": 1.101203441619873, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.41288217484744383, | |
| "grad_norm": 0.1994284987449646, | |
| "learning_rate": 3.0275585457368356e-05, | |
| "loss": 1.1052302360534667, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.41413713586521717, | |
| "grad_norm": 0.2279525250196457, | |
| "learning_rate": 3.02108940354509e-05, | |
| "loss": 1.061030387878418, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.41539209688299056, | |
| "grad_norm": 0.17196883261203766, | |
| "learning_rate": 3.0146202613533446e-05, | |
| "loss": 0.9545375823974609, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.41664705790076395, | |
| "grad_norm": 0.2149975597858429, | |
| "learning_rate": 3.008151119161599e-05, | |
| "loss": 1.048507022857666, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.41790201891853734, | |
| "grad_norm": 0.16794604063034058, | |
| "learning_rate": 3.001681976969854e-05, | |
| "loss": 1.0982236862182617, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.41915697993631074, | |
| "grad_norm": 0.177035853266716, | |
| "learning_rate": 2.9952128347781083e-05, | |
| "loss": 1.0892048835754395, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.42041194095408413, | |
| "grad_norm": 0.19969920814037323, | |
| "learning_rate": 2.9887436925863633e-05, | |
| "loss": 1.0508790016174316, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.4216669019718575, | |
| "grad_norm": 0.1925683468580246, | |
| "learning_rate": 2.9822745503946177e-05, | |
| "loss": 1.115567111968994, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.42292186298963086, | |
| "grad_norm": 0.1786133497953415, | |
| "learning_rate": 2.9758054082028724e-05, | |
| "loss": 1.1230090141296387, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.42417682400740425, | |
| "grad_norm": 0.18185003101825714, | |
| "learning_rate": 2.9693362660111267e-05, | |
| "loss": 1.0770362854003905, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.42543178502517764, | |
| "grad_norm": 0.18131586909294128, | |
| "learning_rate": 2.9628671238193817e-05, | |
| "loss": 1.0566128730773925, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.42668674604295104, | |
| "grad_norm": 0.1889081597328186, | |
| "learning_rate": 2.9563979816276367e-05, | |
| "loss": 1.1312256813049317, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.42794170706072443, | |
| "grad_norm": 0.19797080755233765, | |
| "learning_rate": 2.949928839435891e-05, | |
| "loss": 1.097049617767334, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.4291966680784978, | |
| "grad_norm": 0.21253708004951477, | |
| "learning_rate": 2.9434596972441457e-05, | |
| "loss": 1.063642692565918, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.4304516290962712, | |
| "grad_norm": 0.18668265640735626, | |
| "learning_rate": 2.9369905550524e-05, | |
| "loss": 1.11463623046875, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.4317065901140446, | |
| "grad_norm": 0.1904267519712448, | |
| "learning_rate": 2.930521412860655e-05, | |
| "loss": 1.0638154029846192, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.43296155113181795, | |
| "grad_norm": 0.15511858463287354, | |
| "learning_rate": 2.9240522706689094e-05, | |
| "loss": 1.1070695877075196, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.43421651214959134, | |
| "grad_norm": 0.18738579750061035, | |
| "learning_rate": 2.9175831284771644e-05, | |
| "loss": 1.0907926559448242, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.43547147316736473, | |
| "grad_norm": 0.20306473970413208, | |
| "learning_rate": 2.9111139862854188e-05, | |
| "loss": 1.0509754180908204, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 0.4367264341851381, | |
| "grad_norm": 0.17330707609653473, | |
| "learning_rate": 2.9046448440936734e-05, | |
| "loss": 1.086292552947998, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.4379813952029115, | |
| "grad_norm": 0.21493610739707947, | |
| "learning_rate": 2.8981757019019278e-05, | |
| "loss": 1.0941463470458985, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 0.4392363562206849, | |
| "grad_norm": 0.20350311696529388, | |
| "learning_rate": 2.8917065597101828e-05, | |
| "loss": 1.1737398147583007, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.4404913172384583, | |
| "grad_norm": 0.19272877275943756, | |
| "learning_rate": 2.885237417518437e-05, | |
| "loss": 1.0502948760986328, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.44174627825623164, | |
| "grad_norm": 0.17556731402873993, | |
| "learning_rate": 2.878768275326692e-05, | |
| "loss": 1.103357982635498, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.44300123927400503, | |
| "grad_norm": 0.22516275942325592, | |
| "learning_rate": 2.8722991331349465e-05, | |
| "loss": 1.1097342491149902, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 0.4442562002917784, | |
| "grad_norm": 0.17185984551906586, | |
| "learning_rate": 2.865829990943201e-05, | |
| "loss": 1.0627781867980957, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.4455111613095518, | |
| "grad_norm": 0.20243440568447113, | |
| "learning_rate": 2.8593608487514555e-05, | |
| "loss": 1.083481216430664, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.4467661223273252, | |
| "grad_norm": 0.20377662777900696, | |
| "learning_rate": 2.8528917065597105e-05, | |
| "loss": 1.0517413139343261, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 0.4480210833450986, | |
| "grad_norm": 0.19456355273723602, | |
| "learning_rate": 2.846422564367965e-05, | |
| "loss": 1.0656819343566895, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.449276044362872, | |
| "grad_norm": 0.21682517230510712, | |
| "learning_rate": 2.83995342217622e-05, | |
| "loss": 1.0958752632141113, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 0.4505310053806454, | |
| "grad_norm": 0.19284336268901825, | |
| "learning_rate": 2.8334842799844742e-05, | |
| "loss": 1.0725554466247558, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 0.4517859663984187, | |
| "grad_norm": 0.1780819147825241, | |
| "learning_rate": 2.827015137792729e-05, | |
| "loss": 1.0786738395690918, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.4530409274161921, | |
| "grad_norm": 0.19883157312870026, | |
| "learning_rate": 2.8205459956009832e-05, | |
| "loss": 1.066401481628418, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 0.4542958884339655, | |
| "grad_norm": 0.1856444627046585, | |
| "learning_rate": 2.8140768534092382e-05, | |
| "loss": 1.1166013717651366, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 0.4555508494517389, | |
| "grad_norm": 0.2152242809534073, | |
| "learning_rate": 2.8076077112174925e-05, | |
| "loss": 1.0979412078857422, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 0.4568058104695123, | |
| "grad_norm": 0.19830797612667084, | |
| "learning_rate": 2.8011385690257476e-05, | |
| "loss": 1.1257426261901855, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 0.4580607714872857, | |
| "grad_norm": 0.2238456904888153, | |
| "learning_rate": 2.794669426834002e-05, | |
| "loss": 1.1249502182006836, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.4593157325050591, | |
| "grad_norm": 0.17598365247249603, | |
| "learning_rate": 2.7882002846422566e-05, | |
| "loss": 1.0131544113159179, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.4605706935228324, | |
| "grad_norm": 0.2041376829147339, | |
| "learning_rate": 2.781731142450511e-05, | |
| "loss": 1.0872151374816894, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 0.4618256545406058, | |
| "grad_norm": 0.20209568738937378, | |
| "learning_rate": 2.775262000258766e-05, | |
| "loss": 1.0570232391357421, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 0.4630806155583792, | |
| "grad_norm": 0.1764563024044037, | |
| "learning_rate": 2.7687928580670203e-05, | |
| "loss": 1.0938175201416016, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 0.4643355765761526, | |
| "grad_norm": 0.197507843375206, | |
| "learning_rate": 2.7623237158752753e-05, | |
| "loss": 1.0432971000671387, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.465590537593926, | |
| "grad_norm": 0.1874912828207016, | |
| "learning_rate": 2.7558545736835296e-05, | |
| "loss": 1.0999769210815429, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 0.4668454986116994, | |
| "grad_norm": 0.18974807858467102, | |
| "learning_rate": 2.7493854314917843e-05, | |
| "loss": 1.1539668083190917, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.4681004596294728, | |
| "grad_norm": 0.20754633843898773, | |
| "learning_rate": 2.7429162893000386e-05, | |
| "loss": 1.0792550086975097, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 0.46935542064724617, | |
| "grad_norm": 0.18311122059822083, | |
| "learning_rate": 2.7364471471082936e-05, | |
| "loss": 1.0288339614868165, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 0.4706103816650195, | |
| "grad_norm": 0.21719680726528168, | |
| "learning_rate": 2.729978004916548e-05, | |
| "loss": 1.0714326858520509, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.4718653426827929, | |
| "grad_norm": 0.2074572890996933, | |
| "learning_rate": 2.723508862724803e-05, | |
| "loss": 1.0084345817565918, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 0.4731203037005663, | |
| "grad_norm": 0.1585981547832489, | |
| "learning_rate": 2.7170397205330573e-05, | |
| "loss": 1.065823745727539, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 0.4743752647183397, | |
| "grad_norm": 0.1950136125087738, | |
| "learning_rate": 2.710570578341312e-05, | |
| "loss": 1.0472920417785645, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.4756302257361131, | |
| "grad_norm": 0.20094197988510132, | |
| "learning_rate": 2.7041014361495663e-05, | |
| "loss": 1.0385844230651855, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 0.47688518675388647, | |
| "grad_norm": 0.18639400601387024, | |
| "learning_rate": 2.6976322939578213e-05, | |
| "loss": 1.065108299255371, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.47814014777165986, | |
| "grad_norm": 0.190430149435997, | |
| "learning_rate": 2.6911631517660757e-05, | |
| "loss": 1.0249187469482421, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 0.47939510878943326, | |
| "grad_norm": 0.19305895268917084, | |
| "learning_rate": 2.6846940095743307e-05, | |
| "loss": 1.0661443710327148, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 0.4806500698072066, | |
| "grad_norm": 0.1928487867116928, | |
| "learning_rate": 2.678224867382585e-05, | |
| "loss": 1.0664926528930665, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 0.48190503082498, | |
| "grad_norm": 0.19009914994239807, | |
| "learning_rate": 2.6717557251908397e-05, | |
| "loss": 1.0212774276733398, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.4831599918427534, | |
| "grad_norm": 0.18818970024585724, | |
| "learning_rate": 2.665286582999094e-05, | |
| "loss": 1.0675536155700684, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.48441495286052677, | |
| "grad_norm": 0.19587545096874237, | |
| "learning_rate": 2.658817440807349e-05, | |
| "loss": 1.1134575843811034, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 0.48566991387830016, | |
| "grad_norm": 0.1728365272283554, | |
| "learning_rate": 2.652348298615604e-05, | |
| "loss": 1.0674519538879395, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 0.48692487489607356, | |
| "grad_norm": 0.18507780134677887, | |
| "learning_rate": 2.6458791564238584e-05, | |
| "loss": 1.0612947463989257, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 0.48817983591384695, | |
| "grad_norm": 0.17732012271881104, | |
| "learning_rate": 2.639410014232113e-05, | |
| "loss": 1.1351598739624023, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 0.4894347969316203, | |
| "grad_norm": 0.1905921846628189, | |
| "learning_rate": 2.6329408720403674e-05, | |
| "loss": 1.105655574798584, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.4906897579493937, | |
| "grad_norm": 0.1902066469192505, | |
| "learning_rate": 2.6264717298486224e-05, | |
| "loss": 1.1006397247314452, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 0.49194471896716707, | |
| "grad_norm": 0.16983291506767273, | |
| "learning_rate": 2.6200025876568768e-05, | |
| "loss": 1.0307989120483398, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 0.49319967998494046, | |
| "grad_norm": 0.16205719113349915, | |
| "learning_rate": 2.6135334454651318e-05, | |
| "loss": 1.1160860061645508, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 0.49445464100271386, | |
| "grad_norm": 0.16868488490581512, | |
| "learning_rate": 2.607064303273386e-05, | |
| "loss": 1.1177806854248047, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 0.49570960202048725, | |
| "grad_norm": 0.19134441018104553, | |
| "learning_rate": 2.6005951610816408e-05, | |
| "loss": 1.0095656394958497, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.49696456303826064, | |
| "grad_norm": 0.17027780413627625, | |
| "learning_rate": 2.594126018889895e-05, | |
| "loss": 1.1302468299865722, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 0.49821952405603404, | |
| "grad_norm": 0.16149652004241943, | |
| "learning_rate": 2.58765687669815e-05, | |
| "loss": 1.0545706748962402, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 0.4994744850738074, | |
| "grad_norm": 0.1791815310716629, | |
| "learning_rate": 2.5811877345064045e-05, | |
| "loss": 1.0691394805908203, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 0.5007294460915808, | |
| "grad_norm": 0.19383347034454346, | |
| "learning_rate": 2.5747185923146595e-05, | |
| "loss": 1.071311855316162, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 0.5019844071093542, | |
| "grad_norm": 0.20299892127513885, | |
| "learning_rate": 2.5682494501229138e-05, | |
| "loss": 1.0849023818969727, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.5032393681271276, | |
| "grad_norm": 0.18995410203933716, | |
| "learning_rate": 2.5617803079311685e-05, | |
| "loss": 1.0594166755676269, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 0.5044943291449009, | |
| "grad_norm": 0.22304005920886993, | |
| "learning_rate": 2.5553111657394228e-05, | |
| "loss": 1.0542009353637696, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 0.5057492901626743, | |
| "grad_norm": 0.22307968139648438, | |
| "learning_rate": 2.548842023547678e-05, | |
| "loss": 1.107966136932373, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 0.5070042511804477, | |
| "grad_norm": 0.1840265691280365, | |
| "learning_rate": 2.5423728813559322e-05, | |
| "loss": 1.1096989631652832, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 0.5082592121982211, | |
| "grad_norm": 0.21252040565013885, | |
| "learning_rate": 2.5359037391641872e-05, | |
| "loss": 1.0967977523803711, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.5095141732159945, | |
| "grad_norm": 0.18273372948169708, | |
| "learning_rate": 2.5294345969724415e-05, | |
| "loss": 1.018519401550293, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 0.5107691342337679, | |
| "grad_norm": 0.18331177532672882, | |
| "learning_rate": 2.5229654547806962e-05, | |
| "loss": 1.049039077758789, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 0.5120240952515412, | |
| "grad_norm": 0.19110405445098877, | |
| "learning_rate": 2.5164963125889505e-05, | |
| "loss": 1.0827875137329102, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 0.5132790562693146, | |
| "grad_norm": 0.16529154777526855, | |
| "learning_rate": 2.5100271703972055e-05, | |
| "loss": 1.0904165267944337, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 0.514534017287088, | |
| "grad_norm": 0.17822785675525665, | |
| "learning_rate": 2.50355802820546e-05, | |
| "loss": 1.0597058296203614, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.5157889783048614, | |
| "grad_norm": 0.1769823282957077, | |
| "learning_rate": 2.497088886013715e-05, | |
| "loss": 1.0827682495117188, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 0.5170439393226348, | |
| "grad_norm": 0.19942662119865417, | |
| "learning_rate": 2.4906197438219696e-05, | |
| "loss": 1.0987505912780762, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 0.5182989003404082, | |
| "grad_norm": 0.19398993253707886, | |
| "learning_rate": 2.484150601630224e-05, | |
| "loss": 1.0218748092651366, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 0.5195538613581816, | |
| "grad_norm": 0.21403302252292633, | |
| "learning_rate": 2.4776814594384786e-05, | |
| "loss": 1.1508358001708985, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 0.5208088223759549, | |
| "grad_norm": 0.1951906532049179, | |
| "learning_rate": 2.4712123172467333e-05, | |
| "loss": 1.1334738731384277, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.5220637833937284, | |
| "grad_norm": 0.18993432819843292, | |
| "learning_rate": 2.464743175054988e-05, | |
| "loss": 1.0698814392089844, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 0.5233187444115017, | |
| "grad_norm": 0.2009001225233078, | |
| "learning_rate": 2.4582740328632426e-05, | |
| "loss": 1.1019264221191407, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 0.5245737054292751, | |
| "grad_norm": 0.1592164784669876, | |
| "learning_rate": 2.4518048906714973e-05, | |
| "loss": 1.0502429008483887, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 0.5258286664470485, | |
| "grad_norm": 0.20943738520145416, | |
| "learning_rate": 2.4453357484797516e-05, | |
| "loss": 1.1068140029907227, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 0.5270836274648218, | |
| "grad_norm": 0.1841343641281128, | |
| "learning_rate": 2.4388666062880063e-05, | |
| "loss": 1.0863500595092774, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.5283385884825953, | |
| "grad_norm": 0.21408788859844208, | |
| "learning_rate": 2.432397464096261e-05, | |
| "loss": 1.0539629936218262, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 0.5295935495003686, | |
| "grad_norm": 0.2031819224357605, | |
| "learning_rate": 2.4259283219045156e-05, | |
| "loss": 1.0763005256652831, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 0.5308485105181421, | |
| "grad_norm": 0.1948988288640976, | |
| "learning_rate": 2.4194591797127703e-05, | |
| "loss": 1.1096826553344727, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 0.5321034715359154, | |
| "grad_norm": 0.19453151524066925, | |
| "learning_rate": 2.412990037521025e-05, | |
| "loss": 1.0530555725097657, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 0.5333584325536888, | |
| "grad_norm": 0.22539453208446503, | |
| "learning_rate": 2.4065208953292793e-05, | |
| "loss": 1.0634571075439454, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.5346133935714622, | |
| "grad_norm": 0.2108486145734787, | |
| "learning_rate": 2.400051753137534e-05, | |
| "loss": 1.0783962249755858, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 0.5358683545892355, | |
| "grad_norm": 0.22550557553768158, | |
| "learning_rate": 2.3935826109457887e-05, | |
| "loss": 1.0793485641479492, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 0.537123315607009, | |
| "grad_norm": 0.18710452318191528, | |
| "learning_rate": 2.3871134687540433e-05, | |
| "loss": 1.0253171920776367, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 0.5383782766247823, | |
| "grad_norm": 0.18738947808742523, | |
| "learning_rate": 2.380644326562298e-05, | |
| "loss": 1.0552565574645996, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 0.5396332376425558, | |
| "grad_norm": 0.19568362832069397, | |
| "learning_rate": 2.3741751843705527e-05, | |
| "loss": 1.0678536415100097, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.5408881986603291, | |
| "grad_norm": 0.16987361013889313, | |
| "learning_rate": 2.3677060421788074e-05, | |
| "loss": 1.1124211311340333, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 0.5421431596781024, | |
| "grad_norm": 0.2189544439315796, | |
| "learning_rate": 2.3612368999870617e-05, | |
| "loss": 1.0600645065307617, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 0.5433981206958759, | |
| "grad_norm": 0.17936541140079498, | |
| "learning_rate": 2.3547677577953164e-05, | |
| "loss": 1.0915284156799316, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 0.5446530817136492, | |
| "grad_norm": 0.19402658939361572, | |
| "learning_rate": 2.348298615603571e-05, | |
| "loss": 1.0507197380065918, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 0.5459080427314227, | |
| "grad_norm": 0.23639065027236938, | |
| "learning_rate": 2.3418294734118257e-05, | |
| "loss": 1.1000999450683593, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.547163003749196, | |
| "grad_norm": 0.1931799352169037, | |
| "learning_rate": 2.3353603312200804e-05, | |
| "loss": 1.0721006393432617, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 0.5484179647669695, | |
| "grad_norm": 0.20396549999713898, | |
| "learning_rate": 2.328891189028335e-05, | |
| "loss": 1.1172670364379882, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 0.5496729257847428, | |
| "grad_norm": 0.2059352695941925, | |
| "learning_rate": 2.3224220468365894e-05, | |
| "loss": 1.070617389678955, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 0.5509278868025161, | |
| "grad_norm": 0.2139720320701599, | |
| "learning_rate": 2.315952904644844e-05, | |
| "loss": 1.0570523262023925, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 0.5521828478202896, | |
| "grad_norm": 0.16804735362529755, | |
| "learning_rate": 2.3094837624530988e-05, | |
| "loss": 1.0962253570556642, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.5534378088380629, | |
| "grad_norm": 0.19438262283802032, | |
| "learning_rate": 2.3030146202613534e-05, | |
| "loss": 1.052304172515869, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 0.5546927698558364, | |
| "grad_norm": 0.1669342964887619, | |
| "learning_rate": 2.296545478069608e-05, | |
| "loss": 0.9899564743041992, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 0.5559477308736097, | |
| "grad_norm": 0.19921936094760895, | |
| "learning_rate": 2.2900763358778628e-05, | |
| "loss": 1.143895435333252, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 0.5572026918913832, | |
| "grad_norm": 0.18776285648345947, | |
| "learning_rate": 2.283607193686117e-05, | |
| "loss": 1.1010767936706543, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 0.5584576529091565, | |
| "grad_norm": 0.19864770770072937, | |
| "learning_rate": 2.2771380514943718e-05, | |
| "loss": 1.1463205337524414, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.55971261392693, | |
| "grad_norm": 0.251132607460022, | |
| "learning_rate": 2.2706689093026265e-05, | |
| "loss": 1.1078590393066405, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 0.5609675749447033, | |
| "grad_norm": 0.18442977964878082, | |
| "learning_rate": 2.264199767110881e-05, | |
| "loss": 1.0865904808044433, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 0.5622225359624766, | |
| "grad_norm": 0.19264988601207733, | |
| "learning_rate": 2.2577306249191358e-05, | |
| "loss": 1.098098087310791, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 0.5634774969802501, | |
| "grad_norm": 0.22054506838321686, | |
| "learning_rate": 2.2512614827273905e-05, | |
| "loss": 1.1162951469421387, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 0.5647324579980234, | |
| "grad_norm": 0.18666113913059235, | |
| "learning_rate": 2.244792340535645e-05, | |
| "loss": 0.9729397773742676, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.5659874190157969, | |
| "grad_norm": 0.19067735970020294, | |
| "learning_rate": 2.2383231983438995e-05, | |
| "loss": 1.0903347969055175, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 0.5672423800335702, | |
| "grad_norm": 0.18109118938446045, | |
| "learning_rate": 2.2318540561521542e-05, | |
| "loss": 1.063498592376709, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 0.5684973410513436, | |
| "grad_norm": 0.18894648551940918, | |
| "learning_rate": 2.225384913960409e-05, | |
| "loss": 1.0704896926879883, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 0.569752302069117, | |
| "grad_norm": 0.1963120698928833, | |
| "learning_rate": 2.218915771768664e-05, | |
| "loss": 1.0621774673461915, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 0.5710072630868903, | |
| "grad_norm": 0.22396929562091827, | |
| "learning_rate": 2.2124466295769182e-05, | |
| "loss": 1.084723663330078, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.5722622241046638, | |
| "grad_norm": 0.1798727661371231, | |
| "learning_rate": 2.205977487385173e-05, | |
| "loss": 1.05871639251709, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 0.5735171851224371, | |
| "grad_norm": 0.1864812821149826, | |
| "learning_rate": 2.1995083451934276e-05, | |
| "loss": 1.0498836517333985, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 0.5747721461402105, | |
| "grad_norm": 0.19118115305900574, | |
| "learning_rate": 2.1930392030016822e-05, | |
| "loss": 1.099202823638916, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 0.5760271071579839, | |
| "grad_norm": 0.21500501036643982, | |
| "learning_rate": 2.186570060809937e-05, | |
| "loss": 1.1167634010314942, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 0.5772820681757573, | |
| "grad_norm": 0.22220079600811005, | |
| "learning_rate": 2.1801009186181916e-05, | |
| "loss": 1.1403026580810547, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.5785370291935307, | |
| "grad_norm": 0.1848171502351761, | |
| "learning_rate": 2.173631776426446e-05, | |
| "loss": 1.0590831756591796, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 0.579791990211304, | |
| "grad_norm": 0.20435841381549835, | |
| "learning_rate": 2.1671626342347006e-05, | |
| "loss": 1.090967559814453, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 0.5810469512290775, | |
| "grad_norm": 0.1693217009305954, | |
| "learning_rate": 2.1606934920429553e-05, | |
| "loss": 1.066522216796875, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 0.5823019122468508, | |
| "grad_norm": 0.18368832767009735, | |
| "learning_rate": 2.15422434985121e-05, | |
| "loss": 1.0151689529418946, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 0.5835568732646242, | |
| "grad_norm": 0.18117396533489227, | |
| "learning_rate": 2.1477552076594646e-05, | |
| "loss": 1.1592654228210448, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.5848118342823976, | |
| "grad_norm": 0.17642000317573547, | |
| "learning_rate": 2.1412860654677193e-05, | |
| "loss": 1.0778505325317382, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 0.586066795300171, | |
| "grad_norm": 0.1876673847436905, | |
| "learning_rate": 2.1348169232759736e-05, | |
| "loss": 1.077680492401123, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 0.5873217563179444, | |
| "grad_norm": 0.20584918558597565, | |
| "learning_rate": 2.1283477810842283e-05, | |
| "loss": 1.0664028167724608, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 0.5885767173357178, | |
| "grad_norm": 0.18888245522975922, | |
| "learning_rate": 2.121878638892483e-05, | |
| "loss": 1.044777774810791, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 0.5898316783534912, | |
| "grad_norm": 0.18159054219722748, | |
| "learning_rate": 2.1154094967007376e-05, | |
| "loss": 1.140822982788086, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.5910866393712645, | |
| "grad_norm": 0.209550142288208, | |
| "learning_rate": 2.1089403545089923e-05, | |
| "loss": 1.1214600563049317, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 0.5923416003890379, | |
| "grad_norm": 0.19829699397087097, | |
| "learning_rate": 2.102471212317247e-05, | |
| "loss": 1.0791502952575684, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 0.5935965614068113, | |
| "grad_norm": 0.18692730367183685, | |
| "learning_rate": 2.0960020701255013e-05, | |
| "loss": 1.083406352996826, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 0.5948515224245847, | |
| "grad_norm": 0.203223317861557, | |
| "learning_rate": 2.089532927933756e-05, | |
| "loss": 1.1177435874938966, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 0.5961064834423581, | |
| "grad_norm": 0.18921977281570435, | |
| "learning_rate": 2.0830637857420107e-05, | |
| "loss": 1.1003907203674317, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.5973614444601315, | |
| "grad_norm": 0.21286743879318237, | |
| "learning_rate": 2.0765946435502654e-05, | |
| "loss": 1.07603759765625, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 0.5986164054779048, | |
| "grad_norm": 0.20326635241508484, | |
| "learning_rate": 2.07012550135852e-05, | |
| "loss": 1.0025395393371581, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 0.5998713664956782, | |
| "grad_norm": 0.19325220584869385, | |
| "learning_rate": 2.0636563591667747e-05, | |
| "loss": 1.119535255432129, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 0.6011263275134516, | |
| "grad_norm": 0.19274058938026428, | |
| "learning_rate": 2.057187216975029e-05, | |
| "loss": 1.073643970489502, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 0.602381288531225, | |
| "grad_norm": 0.1856803596019745, | |
| "learning_rate": 2.0507180747832837e-05, | |
| "loss": 1.0840465545654296, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.6036362495489984, | |
| "grad_norm": 0.17938411235809326, | |
| "learning_rate": 2.0442489325915384e-05, | |
| "loss": 0.9779937744140625, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 0.6048912105667718, | |
| "grad_norm": 0.22091540694236755, | |
| "learning_rate": 2.037779790399793e-05, | |
| "loss": 1.023126220703125, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 0.6061461715845452, | |
| "grad_norm": 0.16854438185691833, | |
| "learning_rate": 2.0313106482080477e-05, | |
| "loss": 1.0393505096435547, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 0.6074011326023185, | |
| "grad_norm": 0.18858060240745544, | |
| "learning_rate": 2.0248415060163024e-05, | |
| "loss": 1.0123619079589843, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 0.6086560936200919, | |
| "grad_norm": 0.1751975268125534, | |
| "learning_rate": 2.0183723638245568e-05, | |
| "loss": 1.1000499725341797, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.6099110546378653, | |
| "grad_norm": 0.21609099209308624, | |
| "learning_rate": 2.0119032216328114e-05, | |
| "loss": 1.0882283210754395, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 0.6111660156556387, | |
| "grad_norm": 0.18892847001552582, | |
| "learning_rate": 2.005434079441066e-05, | |
| "loss": 1.0862364768981934, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 0.6124209766734121, | |
| "grad_norm": 0.16767629981040955, | |
| "learning_rate": 1.9989649372493208e-05, | |
| "loss": 1.104989719390869, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 0.6136759376911854, | |
| "grad_norm": 0.16860723495483398, | |
| "learning_rate": 1.9924957950575755e-05, | |
| "loss": 1.1031203269958496, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 0.6149308987089589, | |
| "grad_norm": 0.22763581573963165, | |
| "learning_rate": 1.98602665286583e-05, | |
| "loss": 1.0784507751464845, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.6161858597267322, | |
| "grad_norm": 0.1832362413406372, | |
| "learning_rate": 1.9795575106740845e-05, | |
| "loss": 1.139822292327881, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 0.6174408207445056, | |
| "grad_norm": 0.18726925551891327, | |
| "learning_rate": 1.973088368482339e-05, | |
| "loss": 1.0816995620727539, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 0.618695781762279, | |
| "grad_norm": 0.21192297339439392, | |
| "learning_rate": 1.9666192262905938e-05, | |
| "loss": 1.0300673484802245, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 0.6199507427800524, | |
| "grad_norm": 0.18673470616340637, | |
| "learning_rate": 1.9601500840988485e-05, | |
| "loss": 1.0630935668945312, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 0.6212057037978258, | |
| "grad_norm": 0.19416026771068573, | |
| "learning_rate": 1.953680941907103e-05, | |
| "loss": 1.0864192962646484, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.6224606648155991, | |
| "grad_norm": 0.17337632179260254, | |
| "learning_rate": 1.947211799715358e-05, | |
| "loss": 1.0580739974975586, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 0.6237156258333726, | |
| "grad_norm": 0.1994207203388214, | |
| "learning_rate": 1.9407426575236125e-05, | |
| "loss": 1.0788381576538086, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 0.6249705868511459, | |
| "grad_norm": 0.1950082629919052, | |
| "learning_rate": 1.934273515331867e-05, | |
| "loss": 1.0657068252563477, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 0.6262255478689194, | |
| "grad_norm": 0.19410517811775208, | |
| "learning_rate": 1.9278043731401215e-05, | |
| "loss": 0.9977058410644531, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 0.6274805088866927, | |
| "grad_norm": 0.18865294754505157, | |
| "learning_rate": 1.9213352309483765e-05, | |
| "loss": 1.0384069442749024, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.628735469904466, | |
| "grad_norm": 0.20763295888900757, | |
| "learning_rate": 1.9148660887566312e-05, | |
| "loss": 1.036677646636963, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 0.6299904309222395, | |
| "grad_norm": 0.20186074078083038, | |
| "learning_rate": 1.9083969465648855e-05, | |
| "loss": 1.0434916496276856, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 0.6312453919400128, | |
| "grad_norm": 0.20196175575256348, | |
| "learning_rate": 1.9019278043731402e-05, | |
| "loss": 1.0678818702697754, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 0.6325003529577863, | |
| "grad_norm": 0.18434342741966248, | |
| "learning_rate": 1.895458662181395e-05, | |
| "loss": 0.9929670333862305, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 0.6337553139755596, | |
| "grad_norm": 0.17742949724197388, | |
| "learning_rate": 1.8889895199896496e-05, | |
| "loss": 1.0131173133850098, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.6350102749933331, | |
| "grad_norm": 0.1894538849592209, | |
| "learning_rate": 1.8825203777979042e-05, | |
| "loss": 0.9884425163269043, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 0.6362652360111064, | |
| "grad_norm": 0.19760024547576904, | |
| "learning_rate": 1.876051235606159e-05, | |
| "loss": 1.1242119789123535, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 0.6375201970288797, | |
| "grad_norm": 0.21608242392539978, | |
| "learning_rate": 1.8695820934144136e-05, | |
| "loss": 1.0201186180114745, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 0.6387751580466532, | |
| "grad_norm": 0.19353747367858887, | |
| "learning_rate": 1.863112951222668e-05, | |
| "loss": 1.0742148399353026, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 0.6400301190644265, | |
| "grad_norm": 0.1773729920387268, | |
| "learning_rate": 1.8566438090309226e-05, | |
| "loss": 1.0668268203735352, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.6412850800822, | |
| "grad_norm": 0.16842322051525116, | |
| "learning_rate": 1.8501746668391773e-05, | |
| "loss": 1.111803913116455, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 0.6425400410999733, | |
| "grad_norm": 0.17250721156597137, | |
| "learning_rate": 1.843705524647432e-05, | |
| "loss": 1.0860756874084472, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 0.6437950021177468, | |
| "grad_norm": 0.20766721665859222, | |
| "learning_rate": 1.8372363824556866e-05, | |
| "loss": 1.0899797439575196, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 0.6450499631355201, | |
| "grad_norm": 0.16800430417060852, | |
| "learning_rate": 1.8307672402639413e-05, | |
| "loss": 1.0710078239440919, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 0.6463049241532934, | |
| "grad_norm": 0.19757574796676636, | |
| "learning_rate": 1.8242980980721956e-05, | |
| "loss": 1.1292811393737794, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.6475598851710669, | |
| "grad_norm": 0.20521418750286102, | |
| "learning_rate": 1.8178289558804503e-05, | |
| "loss": 1.0243899345397949, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 0.6488148461888402, | |
| "grad_norm": 0.16537714004516602, | |
| "learning_rate": 1.811359813688705e-05, | |
| "loss": 1.057924747467041, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 0.6500698072066137, | |
| "grad_norm": 0.18862581253051758, | |
| "learning_rate": 1.8048906714969597e-05, | |
| "loss": 0.996666145324707, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 0.651324768224387, | |
| "grad_norm": 0.17373378574848175, | |
| "learning_rate": 1.7984215293052143e-05, | |
| "loss": 1.0661702156066895, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 0.6525797292421605, | |
| "grad_norm": 0.20793020725250244, | |
| "learning_rate": 1.791952387113469e-05, | |
| "loss": 1.081492805480957, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.6538346902599338, | |
| "grad_norm": 0.1953442394733429, | |
| "learning_rate": 1.7854832449217233e-05, | |
| "loss": 1.0730542182922362, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 0.6550896512777072, | |
| "grad_norm": 0.22261761128902435, | |
| "learning_rate": 1.779014102729978e-05, | |
| "loss": 1.0947938919067384, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 0.6563446122954806, | |
| "grad_norm": 0.18047408759593964, | |
| "learning_rate": 1.7725449605382327e-05, | |
| "loss": 1.1488112449645995, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 0.6575995733132539, | |
| "grad_norm": 0.21739941835403442, | |
| "learning_rate": 1.7660758183464874e-05, | |
| "loss": 1.0836384773254395, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 0.6588545343310274, | |
| "grad_norm": 0.16749468445777893, | |
| "learning_rate": 1.759606676154742e-05, | |
| "loss": 1.0533074378967284, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.6601094953488007, | |
| "grad_norm": 0.19696615636348724, | |
| "learning_rate": 1.7531375339629967e-05, | |
| "loss": 1.0351654052734376, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 0.6613644563665741, | |
| "grad_norm": 0.206893190741539, | |
| "learning_rate": 1.746668391771251e-05, | |
| "loss": 1.03566312789917, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 0.6626194173843475, | |
| "grad_norm": 0.21252737939357758, | |
| "learning_rate": 1.7401992495795057e-05, | |
| "loss": 1.098302173614502, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 0.6638743784021209, | |
| "grad_norm": 0.18752658367156982, | |
| "learning_rate": 1.7337301073877604e-05, | |
| "loss": 1.1029739379882812, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 0.6651293394198943, | |
| "grad_norm": 0.1916881799697876, | |
| "learning_rate": 1.727260965196015e-05, | |
| "loss": 1.0457392692565919, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.6663843004376676, | |
| "grad_norm": 0.16342513263225555, | |
| "learning_rate": 1.7207918230042698e-05, | |
| "loss": 1.0347809791564941, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 0.6676392614554411, | |
| "grad_norm": 0.1860560029745102, | |
| "learning_rate": 1.7143226808125244e-05, | |
| "loss": 1.0950869560241698, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 0.6688942224732144, | |
| "grad_norm": 0.19112198054790497, | |
| "learning_rate": 1.7078535386207788e-05, | |
| "loss": 1.1099298477172852, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 0.6701491834909878, | |
| "grad_norm": 0.18603506684303284, | |
| "learning_rate": 1.7013843964290334e-05, | |
| "loss": 1.0756458282470702, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 0.6714041445087612, | |
| "grad_norm": 0.1958581954240799, | |
| "learning_rate": 1.694915254237288e-05, | |
| "loss": 1.1287766456604005, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 0.6726591055265346, | |
| "grad_norm": 0.1939549297094345, | |
| "learning_rate": 1.6884461120455428e-05, | |
| "loss": 1.1160104751586915, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 0.673914066544308, | |
| "grad_norm": 0.17285564541816711, | |
| "learning_rate": 1.6819769698537975e-05, | |
| "loss": 1.0963159561157227, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 0.6751690275620813, | |
| "grad_norm": 0.1727699637413025, | |
| "learning_rate": 1.675507827662052e-05, | |
| "loss": 1.139615821838379, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 0.6764239885798548, | |
| "grad_norm": 0.19430099427700043, | |
| "learning_rate": 1.6690386854703065e-05, | |
| "loss": 1.0105334281921388, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 0.6776789495976281, | |
| "grad_norm": 0.1681535243988037, | |
| "learning_rate": 1.662569543278561e-05, | |
| "loss": 1.0578609466552735, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.6789339106154015, | |
| "grad_norm": 0.18775980174541473, | |
| "learning_rate": 1.6561004010868158e-05, | |
| "loss": 1.0771096229553223, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 0.6801888716331749, | |
| "grad_norm": 0.24977093935012817, | |
| "learning_rate": 1.6496312588950705e-05, | |
| "loss": 1.0842226028442383, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 0.6814438326509483, | |
| "grad_norm": 0.21266375482082367, | |
| "learning_rate": 1.6431621167033252e-05, | |
| "loss": 1.01865816116333, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 0.6826987936687217, | |
| "grad_norm": 0.20400455594062805, | |
| "learning_rate": 1.63669297451158e-05, | |
| "loss": 1.0554264068603516, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 0.683953754686495, | |
| "grad_norm": 0.19339953362941742, | |
| "learning_rate": 1.6302238323198342e-05, | |
| "loss": 1.0870820045471192, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 0.6852087157042684, | |
| "grad_norm": 0.20825320482254028, | |
| "learning_rate": 1.6237546901280892e-05, | |
| "loss": 1.1114639282226562, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 0.6864636767220418, | |
| "grad_norm": 0.184986412525177, | |
| "learning_rate": 1.617285547936344e-05, | |
| "loss": 1.1310782432556152, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 0.6877186377398152, | |
| "grad_norm": 0.1939050555229187, | |
| "learning_rate": 1.6108164057445985e-05, | |
| "loss": 1.1249576568603517, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 0.6889735987575886, | |
| "grad_norm": 0.18793314695358276, | |
| "learning_rate": 1.6043472635528532e-05, | |
| "loss": 1.0266096115112304, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 0.690228559775362, | |
| "grad_norm": 0.18402604758739471, | |
| "learning_rate": 1.5978781213611076e-05, | |
| "loss": 1.020193386077881, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.6914835207931354, | |
| "grad_norm": 0.1962927132844925, | |
| "learning_rate": 1.5914089791693622e-05, | |
| "loss": 1.12153902053833, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 0.6927384818109088, | |
| "grad_norm": 0.21442580223083496, | |
| "learning_rate": 1.584939836977617e-05, | |
| "loss": 1.076151180267334, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 0.6939934428286821, | |
| "grad_norm": 0.18974730372428894, | |
| "learning_rate": 1.5784706947858716e-05, | |
| "loss": 1.0637462615966797, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 0.6952484038464555, | |
| "grad_norm": 0.19226610660552979, | |
| "learning_rate": 1.5720015525941263e-05, | |
| "loss": 1.0863216400146485, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 0.6965033648642289, | |
| "grad_norm": 0.18693000078201294, | |
| "learning_rate": 1.565532410402381e-05, | |
| "loss": 1.050516128540039, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 0.6977583258820023, | |
| "grad_norm": 0.2240767627954483, | |
| "learning_rate": 1.5590632682106353e-05, | |
| "loss": 1.1396910667419433, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 0.6990132868997757, | |
| "grad_norm": 0.24488072097301483, | |
| "learning_rate": 1.55259412601889e-05, | |
| "loss": 1.029798984527588, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 0.700268247917549, | |
| "grad_norm": 0.1701984703540802, | |
| "learning_rate": 1.5461249838271446e-05, | |
| "loss": 1.0401766777038575, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 0.7015232089353225, | |
| "grad_norm": 0.17540504038333893, | |
| "learning_rate": 1.5396558416353993e-05, | |
| "loss": 1.0871668815612794, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 0.7027781699530958, | |
| "grad_norm": 0.212332084774971, | |
| "learning_rate": 1.533186699443654e-05, | |
| "loss": 1.0655070304870606, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.7040331309708692, | |
| "grad_norm": 0.2142452895641327, | |
| "learning_rate": 1.5267175572519086e-05, | |
| "loss": 1.0310181617736816, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 0.7052880919886426, | |
| "grad_norm": 0.2183569073677063, | |
| "learning_rate": 1.5202484150601631e-05, | |
| "loss": 1.0585816383361817, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 0.706543053006416, | |
| "grad_norm": 0.2261342704296112, | |
| "learning_rate": 1.5137792728684178e-05, | |
| "loss": 1.0550565719604492, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 0.7077980140241894, | |
| "grad_norm": 0.20529380440711975, | |
| "learning_rate": 1.5073101306766723e-05, | |
| "loss": 1.0414663314819337, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 0.7090529750419627, | |
| "grad_norm": 0.19481569528579712, | |
| "learning_rate": 1.500840988484927e-05, | |
| "loss": 1.0689180374145508, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 0.7103079360597362, | |
| "grad_norm": 0.18099959194660187, | |
| "learning_rate": 1.4943718462931817e-05, | |
| "loss": 1.0106795310974122, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 0.7115628970775095, | |
| "grad_norm": 0.1786813735961914, | |
| "learning_rate": 1.4879027041014362e-05, | |
| "loss": 1.071911334991455, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 0.7128178580952829, | |
| "grad_norm": 0.17721523344516754, | |
| "learning_rate": 1.4814335619096909e-05, | |
| "loss": 1.0632891654968262, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 0.7140728191130563, | |
| "grad_norm": 0.2053815722465515, | |
| "learning_rate": 1.4749644197179455e-05, | |
| "loss": 1.0814258575439453, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 0.7153277801308296, | |
| "grad_norm": 0.20590366423130035, | |
| "learning_rate": 1.4684952775262e-05, | |
| "loss": 1.0101026535034179, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.7165827411486031, | |
| "grad_norm": 0.18634529411792755, | |
| "learning_rate": 1.4620261353344547e-05, | |
| "loss": 1.0727294921875, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 0.7178377021663764, | |
| "grad_norm": 0.21531324088573456, | |
| "learning_rate": 1.4555569931427094e-05, | |
| "loss": 1.0686802864074707, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 0.7190926631841499, | |
| "grad_norm": 0.18119627237319946, | |
| "learning_rate": 1.4490878509509639e-05, | |
| "loss": 1.001711654663086, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 0.7203476242019232, | |
| "grad_norm": 0.17979778349399567, | |
| "learning_rate": 1.4426187087592186e-05, | |
| "loss": 1.0803590774536134, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 0.7216025852196967, | |
| "grad_norm": 0.20080508291721344, | |
| "learning_rate": 1.4361495665674732e-05, | |
| "loss": 1.10612154006958, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 0.72285754623747, | |
| "grad_norm": 0.2107263058423996, | |
| "learning_rate": 1.4296804243757277e-05, | |
| "loss": 1.1027125358581542, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 0.7241125072552433, | |
| "grad_norm": 0.23398469388484955, | |
| "learning_rate": 1.4232112821839824e-05, | |
| "loss": 1.1061213493347168, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 0.7253674682730168, | |
| "grad_norm": 0.17343750596046448, | |
| "learning_rate": 1.4167421399922371e-05, | |
| "loss": 1.0411705017089843, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 0.7266224292907901, | |
| "grad_norm": 0.2107427567243576, | |
| "learning_rate": 1.4102729978004916e-05, | |
| "loss": 1.0695199966430664, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 0.7278773903085636, | |
| "grad_norm": 0.2061903476715088, | |
| "learning_rate": 1.4038038556087463e-05, | |
| "loss": 1.0993520736694335, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.7291323513263369, | |
| "grad_norm": 0.2150152623653412, | |
| "learning_rate": 1.397334713417001e-05, | |
| "loss": 1.0545531272888184, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 0.7303873123441104, | |
| "grad_norm": 0.17038771510124207, | |
| "learning_rate": 1.3908655712252555e-05, | |
| "loss": 1.0232912063598634, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 0.7316422733618837, | |
| "grad_norm": 0.19553017616271973, | |
| "learning_rate": 1.3843964290335101e-05, | |
| "loss": 1.051710033416748, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 0.732897234379657, | |
| "grad_norm": 0.21933381259441376, | |
| "learning_rate": 1.3779272868417648e-05, | |
| "loss": 1.0953669548034668, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 0.7341521953974305, | |
| "grad_norm": 0.1644992232322693, | |
| "learning_rate": 1.3714581446500193e-05, | |
| "loss": 1.0111507415771483, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 0.7354071564152038, | |
| "grad_norm": 0.1784752458333969, | |
| "learning_rate": 1.364989002458274e-05, | |
| "loss": 1.042880153656006, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 0.7366621174329773, | |
| "grad_norm": 0.20959775149822235, | |
| "learning_rate": 1.3585198602665287e-05, | |
| "loss": 1.022576904296875, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 0.7379170784507506, | |
| "grad_norm": 0.2106805443763733, | |
| "learning_rate": 1.3520507180747832e-05, | |
| "loss": 1.046870803833008, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 0.7391720394685241, | |
| "grad_norm": 0.21699438989162445, | |
| "learning_rate": 1.3455815758830378e-05, | |
| "loss": 1.1192692756652831, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 0.7404270004862974, | |
| "grad_norm": 0.19661927223205566, | |
| "learning_rate": 1.3391124336912925e-05, | |
| "loss": 1.0794744491577148, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.7416819615040707, | |
| "grad_norm": 0.19780300557613373, | |
| "learning_rate": 1.332643291499547e-05, | |
| "loss": 1.0534735679626466, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 0.7429369225218442, | |
| "grad_norm": 0.17464447021484375, | |
| "learning_rate": 1.326174149307802e-05, | |
| "loss": 1.0962336540222168, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 0.7441918835396175, | |
| "grad_norm": 0.2023210972547531, | |
| "learning_rate": 1.3197050071160565e-05, | |
| "loss": 1.1575682640075684, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 0.745446844557391, | |
| "grad_norm": 0.21466964483261108, | |
| "learning_rate": 1.3132358649243112e-05, | |
| "loss": 1.123062515258789, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 0.7467018055751643, | |
| "grad_norm": 0.22124673426151276, | |
| "learning_rate": 1.3067667227325659e-05, | |
| "loss": 1.0591267585754394, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 0.7479567665929377, | |
| "grad_norm": 0.2065040022134781, | |
| "learning_rate": 1.3002975805408204e-05, | |
| "loss": 1.0695542335510253, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 0.7492117276107111, | |
| "grad_norm": 0.1949198693037033, | |
| "learning_rate": 1.293828438349075e-05, | |
| "loss": 1.1152711868286134, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 0.7504666886284844, | |
| "grad_norm": 0.20681394636631012, | |
| "learning_rate": 1.2873592961573297e-05, | |
| "loss": 1.0414956092834473, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 0.7517216496462579, | |
| "grad_norm": 0.2022944539785385, | |
| "learning_rate": 1.2808901539655842e-05, | |
| "loss": 1.0200919151306151, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 0.7529766106640312, | |
| "grad_norm": 0.24508307874202728, | |
| "learning_rate": 1.274421011773839e-05, | |
| "loss": 1.0655020713806151, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.7542315716818047, | |
| "grad_norm": 0.1903439313173294, | |
| "learning_rate": 1.2679518695820936e-05, | |
| "loss": 1.0706681251525878, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 0.755486532699578, | |
| "grad_norm": 0.19773785769939423, | |
| "learning_rate": 1.2614827273903481e-05, | |
| "loss": 1.086234188079834, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 0.7567414937173514, | |
| "grad_norm": 0.21088463068008423, | |
| "learning_rate": 1.2550135851986028e-05, | |
| "loss": 1.069676399230957, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 0.7579964547351248, | |
| "grad_norm": 0.20454522967338562, | |
| "learning_rate": 1.2485444430068574e-05, | |
| "loss": 1.0646330833435058, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 0.7592514157528982, | |
| "grad_norm": 0.19824111461639404, | |
| "learning_rate": 1.242075300815112e-05, | |
| "loss": 1.0749752044677734, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 0.7605063767706716, | |
| "grad_norm": 0.19081714749336243, | |
| "learning_rate": 1.2356061586233666e-05, | |
| "loss": 1.0570926666259766, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 0.7617613377884449, | |
| "grad_norm": 0.20483826100826263, | |
| "learning_rate": 1.2291370164316213e-05, | |
| "loss": 1.0441864013671875, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 0.7630162988062184, | |
| "grad_norm": 0.2218000292778015, | |
| "learning_rate": 1.2226678742398758e-05, | |
| "loss": 1.0976847648620605, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 0.7642712598239917, | |
| "grad_norm": 0.19163018465042114, | |
| "learning_rate": 1.2161987320481305e-05, | |
| "loss": 1.0738938331604004, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 0.7655262208417651, | |
| "grad_norm": 0.22696638107299805, | |
| "learning_rate": 1.2097295898563852e-05, | |
| "loss": 1.09117431640625, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.7667811818595385, | |
| "grad_norm": 0.21094925701618195, | |
| "learning_rate": 1.2032604476646397e-05, | |
| "loss": 1.1293853759765624, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 0.7680361428773119, | |
| "grad_norm": 0.19011946022510529, | |
| "learning_rate": 1.1967913054728943e-05, | |
| "loss": 1.0315247535705567, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 0.7692911038950853, | |
| "grad_norm": 0.22294586896896362, | |
| "learning_rate": 1.190322163281149e-05, | |
| "loss": 1.0366369247436524, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 0.7705460649128586, | |
| "grad_norm": 0.19210191071033478, | |
| "learning_rate": 1.1838530210894037e-05, | |
| "loss": 1.041628646850586, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 0.771801025930632, | |
| "grad_norm": 0.17535246908664703, | |
| "learning_rate": 1.1773838788976582e-05, | |
| "loss": 1.0377203941345214, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 0.7730559869484054, | |
| "grad_norm": 0.1910182386636734, | |
| "learning_rate": 1.1709147367059129e-05, | |
| "loss": 1.0049347877502441, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 0.7743109479661788, | |
| "grad_norm": 0.18827295303344727, | |
| "learning_rate": 1.1644455945141675e-05, | |
| "loss": 1.1181403160095216, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 0.7755659089839522, | |
| "grad_norm": 0.19850969314575195, | |
| "learning_rate": 1.157976452322422e-05, | |
| "loss": 1.1215065002441407, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 0.7768208700017256, | |
| "grad_norm": 0.21341505646705627, | |
| "learning_rate": 1.1515073101306767e-05, | |
| "loss": 1.0259138107299806, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 0.778075831019499, | |
| "grad_norm": 0.1731249988079071, | |
| "learning_rate": 1.1450381679389314e-05, | |
| "loss": 1.0396163940429688, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.7793307920372723, | |
| "grad_norm": 0.20062355697155, | |
| "learning_rate": 1.1385690257471859e-05, | |
| "loss": 1.0933070182800293, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 0.7805857530550457, | |
| "grad_norm": 0.214784637093544, | |
| "learning_rate": 1.1320998835554406e-05, | |
| "loss": 1.038003921508789, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 0.7818407140728191, | |
| "grad_norm": 0.17985184490680695, | |
| "learning_rate": 1.1256307413636952e-05, | |
| "loss": 1.0742274284362794, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 0.7830956750905925, | |
| "grad_norm": 0.19000248610973358, | |
| "learning_rate": 1.1191615991719498e-05, | |
| "loss": 1.051091480255127, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 0.7843506361083659, | |
| "grad_norm": 0.21635884046554565, | |
| "learning_rate": 1.1126924569802044e-05, | |
| "loss": 1.0835393905639648, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 0.7856055971261393, | |
| "grad_norm": 0.22533932328224182, | |
| "learning_rate": 1.1062233147884591e-05, | |
| "loss": 1.046684741973877, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 0.7868605581439126, | |
| "grad_norm": 0.1827181726694107, | |
| "learning_rate": 1.0997541725967138e-05, | |
| "loss": 1.0681885719299316, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 0.7881155191616861, | |
| "grad_norm": 0.22982963919639587, | |
| "learning_rate": 1.0932850304049685e-05, | |
| "loss": 1.0472677230834961, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 0.7893704801794594, | |
| "grad_norm": 0.19450107216835022, | |
| "learning_rate": 1.086815888213223e-05, | |
| "loss": 1.0826946258544923, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 0.7906254411972328, | |
| "grad_norm": 0.19893890619277954, | |
| "learning_rate": 1.0803467460214776e-05, | |
| "loss": 1.0179737091064454, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.7918804022150062, | |
| "grad_norm": 0.17373962700366974, | |
| "learning_rate": 1.0738776038297323e-05, | |
| "loss": 1.0917057037353515, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 0.7931353632327796, | |
| "grad_norm": 0.2234533429145813, | |
| "learning_rate": 1.0674084616379868e-05, | |
| "loss": 1.0998245239257813, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 0.794390324250553, | |
| "grad_norm": 0.2080935388803482, | |
| "learning_rate": 1.0609393194462415e-05, | |
| "loss": 1.021653938293457, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 0.7956452852683263, | |
| "grad_norm": 0.2437697947025299, | |
| "learning_rate": 1.0544701772544962e-05, | |
| "loss": 1.083084487915039, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 0.7969002462860998, | |
| "grad_norm": 0.20368912816047668, | |
| "learning_rate": 1.0480010350627507e-05, | |
| "loss": 1.0707550048828125, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 0.7981552073038731, | |
| "grad_norm": 0.16665543615818024, | |
| "learning_rate": 1.0415318928710053e-05, | |
| "loss": 1.136690044403076, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 0.7994101683216465, | |
| "grad_norm": 0.17936968803405762, | |
| "learning_rate": 1.03506275067926e-05, | |
| "loss": 1.0300110816955566, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 0.8006651293394199, | |
| "grad_norm": 0.17155979573726654, | |
| "learning_rate": 1.0285936084875145e-05, | |
| "loss": 1.2345178604125977, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 0.8019200903571932, | |
| "grad_norm": 0.19316871464252472, | |
| "learning_rate": 1.0221244662957692e-05, | |
| "loss": 1.1063278198242188, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 0.8031750513749667, | |
| "grad_norm": 0.19231335818767548, | |
| "learning_rate": 1.0156553241040239e-05, | |
| "loss": 1.1099343299865723, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.80443001239274, | |
| "grad_norm": 0.20124751329421997, | |
| "learning_rate": 1.0091861819122784e-05, | |
| "loss": 1.0818553924560548, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 0.8056849734105135, | |
| "grad_norm": 0.23050783574581146, | |
| "learning_rate": 1.002717039720533e-05, | |
| "loss": 1.0924180030822754, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 0.8069399344282868, | |
| "grad_norm": 0.19461168348789215, | |
| "learning_rate": 9.962478975287877e-06, | |
| "loss": 1.0269258499145508, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 0.8081948954460602, | |
| "grad_norm": 0.1916336864233017, | |
| "learning_rate": 9.897787553370422e-06, | |
| "loss": 1.1376501083374024, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 0.8094498564638336, | |
| "grad_norm": 0.18255390226840973, | |
| "learning_rate": 9.833096131452969e-06, | |
| "loss": 1.0916749000549317, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 0.8107048174816069, | |
| "grad_norm": 0.1791938841342926, | |
| "learning_rate": 9.768404709535516e-06, | |
| "loss": 1.0888887405395509, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 0.8119597784993804, | |
| "grad_norm": 0.1699710339307785, | |
| "learning_rate": 9.703713287618063e-06, | |
| "loss": 1.0881536483764649, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 0.8132147395171537, | |
| "grad_norm": 0.19622161984443665, | |
| "learning_rate": 9.639021865700608e-06, | |
| "loss": 1.0142560958862306, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 0.8144697005349272, | |
| "grad_norm": 0.18209852278232574, | |
| "learning_rate": 9.574330443783156e-06, | |
| "loss": 1.0698868751525878, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 0.8157246615527005, | |
| "grad_norm": 0.16895531117916107, | |
| "learning_rate": 9.509639021865701e-06, | |
| "loss": 1.1377266883850097, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.8169796225704739, | |
| "grad_norm": 0.1833142638206482, | |
| "learning_rate": 9.444947599948248e-06, | |
| "loss": 1.0924424171447753, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 0.8182345835882473, | |
| "grad_norm": 0.2109987437725067, | |
| "learning_rate": 9.380256178030795e-06, | |
| "loss": 1.0673909187316895, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 0.8194895446060206, | |
| "grad_norm": 0.17565134167671204, | |
| "learning_rate": 9.31556475611334e-06, | |
| "loss": 1.0398826599121094, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 0.8207445056237941, | |
| "grad_norm": 0.22704172134399414, | |
| "learning_rate": 9.250873334195886e-06, | |
| "loss": 1.1169721603393554, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 0.8219994666415674, | |
| "grad_norm": 0.21614673733711243, | |
| "learning_rate": 9.186181912278433e-06, | |
| "loss": 1.040649127960205, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 0.8232544276593409, | |
| "grad_norm": 0.21649399399757385, | |
| "learning_rate": 9.121490490360978e-06, | |
| "loss": 1.1043192863464355, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 0.8245093886771142, | |
| "grad_norm": 0.1912921667098999, | |
| "learning_rate": 9.056799068443525e-06, | |
| "loss": 1.127028465270996, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 0.8257643496948877, | |
| "grad_norm": 0.2091410607099533, | |
| "learning_rate": 8.992107646526072e-06, | |
| "loss": 1.0905369758605956, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 0.827019310712661, | |
| "grad_norm": 0.22103574872016907, | |
| "learning_rate": 8.927416224608617e-06, | |
| "loss": 0.9964225769042969, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 0.8282742717304343, | |
| "grad_norm": 0.16947044432163239, | |
| "learning_rate": 8.862724802691163e-06, | |
| "loss": 1.1176776885986328, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.8295292327482078, | |
| "grad_norm": 0.19035208225250244, | |
| "learning_rate": 8.79803338077371e-06, | |
| "loss": 1.1607711791992188, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 0.8307841937659811, | |
| "grad_norm": 0.22612328827381134, | |
| "learning_rate": 8.733341958856255e-06, | |
| "loss": 1.0306846618652343, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 0.8320391547837546, | |
| "grad_norm": 0.19116735458374023, | |
| "learning_rate": 8.668650536938802e-06, | |
| "loss": 1.0862977981567383, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 0.8332941158015279, | |
| "grad_norm": 0.19095416367053986, | |
| "learning_rate": 8.603959115021349e-06, | |
| "loss": 1.0201908111572267, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 0.8345490768193013, | |
| "grad_norm": 0.18990927934646606, | |
| "learning_rate": 8.539267693103894e-06, | |
| "loss": 1.0904932975769044, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 0.8358040378370747, | |
| "grad_norm": 0.21695184707641602, | |
| "learning_rate": 8.47457627118644e-06, | |
| "loss": 1.0262943267822267, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 0.837058998854848, | |
| "grad_norm": 0.18091970682144165, | |
| "learning_rate": 8.409884849268987e-06, | |
| "loss": 1.0900959014892577, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 0.8383139598726215, | |
| "grad_norm": 0.21363726258277893, | |
| "learning_rate": 8.345193427351532e-06, | |
| "loss": 1.0485601425170898, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 0.8395689208903948, | |
| "grad_norm": 0.19334258139133453, | |
| "learning_rate": 8.280502005434079e-06, | |
| "loss": 1.0345725059509276, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 0.8408238819081683, | |
| "grad_norm": 0.20086072385311127, | |
| "learning_rate": 8.215810583516626e-06, | |
| "loss": 1.1097275733947753, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.8420788429259416, | |
| "grad_norm": 0.1856435388326645, | |
| "learning_rate": 8.151119161599171e-06, | |
| "loss": 1.0502695083618163, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 0.843333803943715, | |
| "grad_norm": 0.20465126633644104, | |
| "learning_rate": 8.08642773968172e-06, | |
| "loss": 1.0540699005126952, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 0.8445887649614884, | |
| "grad_norm": 0.18902461230754852, | |
| "learning_rate": 8.021736317764266e-06, | |
| "loss": 1.1354016304016112, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 0.8458437259792617, | |
| "grad_norm": 0.20140230655670166, | |
| "learning_rate": 7.957044895846811e-06, | |
| "loss": 0.9932634353637695, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 0.8470986869970352, | |
| "grad_norm": 0.19942064583301544, | |
| "learning_rate": 7.892353473929358e-06, | |
| "loss": 1.1147568702697754, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 0.8483536480148085, | |
| "grad_norm": 0.21955204010009766, | |
| "learning_rate": 7.827662052011905e-06, | |
| "loss": 0.9939888954162598, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 0.849608609032582, | |
| "grad_norm": 0.2062138170003891, | |
| "learning_rate": 7.76297063009445e-06, | |
| "loss": 1.0063923835754394, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 0.8508635700503553, | |
| "grad_norm": 0.23989014327526093, | |
| "learning_rate": 7.698279208176996e-06, | |
| "loss": 1.0760384559631349, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 0.8521185310681287, | |
| "grad_norm": 0.20089785754680634, | |
| "learning_rate": 7.633587786259543e-06, | |
| "loss": 1.028395652770996, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 0.8533734920859021, | |
| "grad_norm": 0.19919630885124207, | |
| "learning_rate": 7.568896364342089e-06, | |
| "loss": 1.02840576171875, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.8546284531036754, | |
| "grad_norm": 0.2014761120080948, | |
| "learning_rate": 7.504204942424635e-06, | |
| "loss": 1.0546991348266601, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 0.8558834141214489, | |
| "grad_norm": 0.19204354286193848, | |
| "learning_rate": 7.439513520507181e-06, | |
| "loss": 1.03892183303833, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 0.8571383751392222, | |
| "grad_norm": 0.22647128999233246, | |
| "learning_rate": 7.374822098589728e-06, | |
| "loss": 1.0341573715209962, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 0.8583933361569956, | |
| "grad_norm": 0.2208699733018875, | |
| "learning_rate": 7.3101306766722735e-06, | |
| "loss": 1.0098363876342773, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 0.859648297174769, | |
| "grad_norm": 0.219742089509964, | |
| "learning_rate": 7.2454392547548194e-06, | |
| "loss": 1.0027207374572753, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 0.8609032581925424, | |
| "grad_norm": 0.20601145923137665, | |
| "learning_rate": 7.180747832837366e-06, | |
| "loss": 1.0074657440185546, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 0.8621582192103158, | |
| "grad_norm": 0.18209318816661835, | |
| "learning_rate": 7.116056410919912e-06, | |
| "loss": 1.0155368804931642, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 0.8634131802280892, | |
| "grad_norm": 0.23554867506027222, | |
| "learning_rate": 7.051364989002458e-06, | |
| "loss": 1.047171401977539, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 0.8646681412458626, | |
| "grad_norm": 0.17976488173007965, | |
| "learning_rate": 6.986673567085005e-06, | |
| "loss": 1.1108320236206055, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 0.8659231022636359, | |
| "grad_norm": 0.17746324837207794, | |
| "learning_rate": 6.921982145167551e-06, | |
| "loss": 1.033961868286133, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.8671780632814093, | |
| "grad_norm": 0.20548702776432037, | |
| "learning_rate": 6.8572907232500965e-06, | |
| "loss": 0.9573192596435547, | |
| "step": 6910 | |
| }, | |
| { | |
| "epoch": 0.8684330242991827, | |
| "grad_norm": 0.24221506714820862, | |
| "learning_rate": 6.792599301332643e-06, | |
| "loss": 1.0891441345214843, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 0.8696879853169561, | |
| "grad_norm": 0.1774619221687317, | |
| "learning_rate": 6.727907879415189e-06, | |
| "loss": 1.0774598121643066, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 0.8709429463347295, | |
| "grad_norm": 0.1890013962984085, | |
| "learning_rate": 6.663216457497735e-06, | |
| "loss": 1.082331371307373, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 0.8721979073525029, | |
| "grad_norm": 0.19728416204452515, | |
| "learning_rate": 6.598525035580283e-06, | |
| "loss": 1.0425737380981446, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 0.8734528683702762, | |
| "grad_norm": 0.1938554048538208, | |
| "learning_rate": 6.533833613662829e-06, | |
| "loss": 0.9840812683105469, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 0.8747078293880496, | |
| "grad_norm": 0.20868176221847534, | |
| "learning_rate": 6.469142191745375e-06, | |
| "loss": 1.107065486907959, | |
| "step": 6970 | |
| }, | |
| { | |
| "epoch": 0.875962790405823, | |
| "grad_norm": 0.20235563814640045, | |
| "learning_rate": 6.404450769827921e-06, | |
| "loss": 1.035740566253662, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 0.8772177514235964, | |
| "grad_norm": 0.19022290408611298, | |
| "learning_rate": 6.339759347910468e-06, | |
| "loss": 1.0365036010742188, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 0.8784727124413698, | |
| "grad_norm": 0.19704179465770721, | |
| "learning_rate": 6.275067925993014e-06, | |
| "loss": 1.0721055030822755, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.8797276734591432, | |
| "grad_norm": 0.21241937577724457, | |
| "learning_rate": 6.21037650407556e-06, | |
| "loss": 1.0706797599792481, | |
| "step": 7010 | |
| }, | |
| { | |
| "epoch": 0.8809826344769166, | |
| "grad_norm": 0.2100059986114502, | |
| "learning_rate": 6.1456850821581065e-06, | |
| "loss": 1.0387989044189454, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 0.8822375954946899, | |
| "grad_norm": 0.22000259160995483, | |
| "learning_rate": 6.080993660240652e-06, | |
| "loss": 1.0321297645568848, | |
| "step": 7030 | |
| }, | |
| { | |
| "epoch": 0.8834925565124633, | |
| "grad_norm": 0.20955264568328857, | |
| "learning_rate": 6.016302238323198e-06, | |
| "loss": 1.0901686668395996, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 0.8847475175302367, | |
| "grad_norm": 0.20897357165813446, | |
| "learning_rate": 5.951610816405745e-06, | |
| "loss": 1.1447209358215331, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 0.8860024785480101, | |
| "grad_norm": 0.20097708702087402, | |
| "learning_rate": 5.886919394488291e-06, | |
| "loss": 0.9972308158874512, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 0.8872574395657835, | |
| "grad_norm": 0.18822167813777924, | |
| "learning_rate": 5.822227972570838e-06, | |
| "loss": 1.1192616462707519, | |
| "step": 7070 | |
| }, | |
| { | |
| "epoch": 0.8885124005835568, | |
| "grad_norm": 0.21870264410972595, | |
| "learning_rate": 5.757536550653384e-06, | |
| "loss": 1.0673290252685548, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 0.8897673616013303, | |
| "grad_norm": 0.18924005329608917, | |
| "learning_rate": 5.6928451287359295e-06, | |
| "loss": 1.0575652122497559, | |
| "step": 7090 | |
| }, | |
| { | |
| "epoch": 0.8910223226191036, | |
| "grad_norm": 0.1820686012506485, | |
| "learning_rate": 5.628153706818476e-06, | |
| "loss": 1.0578106880187987, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.8922772836368771, | |
| "grad_norm": 0.21963050961494446, | |
| "learning_rate": 5.563462284901022e-06, | |
| "loss": 1.036845874786377, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 0.8935322446546504, | |
| "grad_norm": 0.2212342619895935, | |
| "learning_rate": 5.498770862983569e-06, | |
| "loss": 1.0462807655334472, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 0.8947872056724238, | |
| "grad_norm": 0.19352254271507263, | |
| "learning_rate": 5.434079441066115e-06, | |
| "loss": 1.0695903778076172, | |
| "step": 7130 | |
| }, | |
| { | |
| "epoch": 0.8960421666901972, | |
| "grad_norm": 0.21432216465473175, | |
| "learning_rate": 5.3693880191486615e-06, | |
| "loss": 1.0361966133117675, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 0.8972971277079705, | |
| "grad_norm": 0.193306565284729, | |
| "learning_rate": 5.3046965972312074e-06, | |
| "loss": 1.119828987121582, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 0.898552088725744, | |
| "grad_norm": 0.18354584276676178, | |
| "learning_rate": 5.240005175313753e-06, | |
| "loss": 1.0150964736938477, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 0.8998070497435173, | |
| "grad_norm": 0.2255086451768875, | |
| "learning_rate": 5.1753137533963e-06, | |
| "loss": 1.0812021255493165, | |
| "step": 7170 | |
| }, | |
| { | |
| "epoch": 0.9010620107612908, | |
| "grad_norm": 0.1983460932970047, | |
| "learning_rate": 5.110622331478846e-06, | |
| "loss": 1.0526619911193849, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 0.9023169717790641, | |
| "grad_norm": 0.2795880138874054, | |
| "learning_rate": 5.045930909561392e-06, | |
| "loss": 0.9882980346679687, | |
| "step": 7190 | |
| }, | |
| { | |
| "epoch": 0.9035719327968375, | |
| "grad_norm": 0.2017148733139038, | |
| "learning_rate": 4.981239487643939e-06, | |
| "loss": 1.079946517944336, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.9048268938146109, | |
| "grad_norm": 0.19753098487854004, | |
| "learning_rate": 4.9165480657264845e-06, | |
| "loss": 1.0410076141357423, | |
| "step": 7210 | |
| }, | |
| { | |
| "epoch": 0.9060818548323842, | |
| "grad_norm": 0.21375982463359833, | |
| "learning_rate": 4.851856643809031e-06, | |
| "loss": 1.0886569023132324, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 0.9073368158501577, | |
| "grad_norm": 0.18023167550563812, | |
| "learning_rate": 4.787165221891578e-06, | |
| "loss": 1.01919584274292, | |
| "step": 7230 | |
| }, | |
| { | |
| "epoch": 0.908591776867931, | |
| "grad_norm": 0.21556456387043, | |
| "learning_rate": 4.722473799974124e-06, | |
| "loss": 1.1226733207702637, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 0.9098467378857045, | |
| "grad_norm": 0.1888088434934616, | |
| "learning_rate": 4.65778237805667e-06, | |
| "loss": 1.0532227516174317, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 0.9111016989034778, | |
| "grad_norm": 0.21888945996761322, | |
| "learning_rate": 4.5930909561392166e-06, | |
| "loss": 1.1242030143737793, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 0.9123566599212511, | |
| "grad_norm": 0.20070919394493103, | |
| "learning_rate": 4.5283995342217625e-06, | |
| "loss": 1.0876989364624023, | |
| "step": 7270 | |
| }, | |
| { | |
| "epoch": 0.9136116209390246, | |
| "grad_norm": 0.21463949978351593, | |
| "learning_rate": 4.463708112304308e-06, | |
| "loss": 1.0497437477111817, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 0.9148665819567979, | |
| "grad_norm": 0.21296384930610657, | |
| "learning_rate": 4.399016690386855e-06, | |
| "loss": 1.0168041229248046, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 0.9161215429745714, | |
| "grad_norm": 0.2151423692703247, | |
| "learning_rate": 4.334325268469401e-06, | |
| "loss": 1.0028361320495605, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.9173765039923447, | |
| "grad_norm": 0.17901144921779633, | |
| "learning_rate": 4.269633846551947e-06, | |
| "loss": 1.0603734970092773, | |
| "step": 7310 | |
| }, | |
| { | |
| "epoch": 0.9186314650101182, | |
| "grad_norm": 0.22024358808994293, | |
| "learning_rate": 4.204942424634494e-06, | |
| "loss": 1.133981418609619, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 0.9198864260278915, | |
| "grad_norm": 0.18600453436374664, | |
| "learning_rate": 4.1402510027170396e-06, | |
| "loss": 1.043716812133789, | |
| "step": 7330 | |
| }, | |
| { | |
| "epoch": 0.9211413870456648, | |
| "grad_norm": 0.1897258311510086, | |
| "learning_rate": 4.0755595807995855e-06, | |
| "loss": 1.0358457565307617, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 0.9223963480634383, | |
| "grad_norm": 0.2092253863811493, | |
| "learning_rate": 4.010868158882133e-06, | |
| "loss": 1.0762650489807128, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 0.9236513090812116, | |
| "grad_norm": 0.1960146427154541, | |
| "learning_rate": 3.946176736964679e-06, | |
| "loss": 1.0524661064147949, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 0.9249062700989851, | |
| "grad_norm": 0.21514521539211273, | |
| "learning_rate": 3.881485315047225e-06, | |
| "loss": 1.0553058624267577, | |
| "step": 7370 | |
| }, | |
| { | |
| "epoch": 0.9261612311167584, | |
| "grad_norm": 0.18721559643745422, | |
| "learning_rate": 3.816793893129772e-06, | |
| "loss": 1.120730495452881, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 0.9274161921345319, | |
| "grad_norm": 0.20185963809490204, | |
| "learning_rate": 3.7521024712123175e-06, | |
| "loss": 1.0561814308166504, | |
| "step": 7390 | |
| }, | |
| { | |
| "epoch": 0.9286711531523052, | |
| "grad_norm": 0.18291395902633667, | |
| "learning_rate": 3.687411049294864e-06, | |
| "loss": 1.065229320526123, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.9299261141700786, | |
| "grad_norm": 0.20202836394309998, | |
| "learning_rate": 3.6227196273774097e-06, | |
| "loss": 1.092850112915039, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 0.931181075187852, | |
| "grad_norm": 0.1978691816329956, | |
| "learning_rate": 3.558028205459956e-06, | |
| "loss": 1.068133544921875, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 0.9324360362056253, | |
| "grad_norm": 0.20446452498435974, | |
| "learning_rate": 3.4933367835425024e-06, | |
| "loss": 1.037320613861084, | |
| "step": 7430 | |
| }, | |
| { | |
| "epoch": 0.9336909972233988, | |
| "grad_norm": 0.17508593201637268, | |
| "learning_rate": 3.4286453616250483e-06, | |
| "loss": 1.141841220855713, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 0.9349459582411721, | |
| "grad_norm": 0.1751583069562912, | |
| "learning_rate": 3.3639539397075946e-06, | |
| "loss": 1.0543017387390137, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 0.9362009192589456, | |
| "grad_norm": 0.16998153924942017, | |
| "learning_rate": 3.2992625177901413e-06, | |
| "loss": 1.0321502685546875, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 0.9374558802767189, | |
| "grad_norm": 0.17314793169498444, | |
| "learning_rate": 3.2345710958726877e-06, | |
| "loss": 1.0988115310668944, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 0.9387108412944923, | |
| "grad_norm": 0.19269074499607086, | |
| "learning_rate": 3.169879673955234e-06, | |
| "loss": 1.070913314819336, | |
| "step": 7480 | |
| }, | |
| { | |
| "epoch": 0.9399658023122657, | |
| "grad_norm": 0.23561343550682068, | |
| "learning_rate": 3.10518825203778e-06, | |
| "loss": 1.049428939819336, | |
| "step": 7490 | |
| }, | |
| { | |
| "epoch": 0.941220763330039, | |
| "grad_norm": 0.2069864720106125, | |
| "learning_rate": 3.040496830120326e-06, | |
| "loss": 1.003286647796631, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.9424757243478125, | |
| "grad_norm": 0.18430078029632568, | |
| "learning_rate": 2.9758054082028725e-06, | |
| "loss": 1.0501197814941405, | |
| "step": 7510 | |
| }, | |
| { | |
| "epoch": 0.9437306853655858, | |
| "grad_norm": 0.20570716261863708, | |
| "learning_rate": 2.911113986285419e-06, | |
| "loss": 1.0311017990112306, | |
| "step": 7520 | |
| }, | |
| { | |
| "epoch": 0.9449856463833592, | |
| "grad_norm": 0.23062951862812042, | |
| "learning_rate": 2.8464225643679647e-06, | |
| "loss": 1.0960111618041992, | |
| "step": 7530 | |
| }, | |
| { | |
| "epoch": 0.9462406074011326, | |
| "grad_norm": 0.20773741602897644, | |
| "learning_rate": 2.781731142450511e-06, | |
| "loss": 1.0412386894226073, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 0.947495568418906, | |
| "grad_norm": 0.24511989951133728, | |
| "learning_rate": 2.7170397205330574e-06, | |
| "loss": 1.1096014976501465, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 0.9487505294366794, | |
| "grad_norm": 0.20896856486797333, | |
| "learning_rate": 2.6523482986156037e-06, | |
| "loss": 1.0784428596496582, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 0.9500054904544527, | |
| "grad_norm": 0.19488734006881714, | |
| "learning_rate": 2.58765687669815e-06, | |
| "loss": 1.043881607055664, | |
| "step": 7570 | |
| }, | |
| { | |
| "epoch": 0.9512604514722262, | |
| "grad_norm": 0.19251015782356262, | |
| "learning_rate": 2.522965454780696e-06, | |
| "loss": 1.0701610565185546, | |
| "step": 7580 | |
| }, | |
| { | |
| "epoch": 0.9525154124899995, | |
| "grad_norm": 0.19297641515731812, | |
| "learning_rate": 2.4582740328632423e-06, | |
| "loss": 1.0647805213928223, | |
| "step": 7590 | |
| }, | |
| { | |
| "epoch": 0.9537703735077729, | |
| "grad_norm": 0.20945972204208374, | |
| "learning_rate": 2.393582610945789e-06, | |
| "loss": 1.036386489868164, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.9550253345255463, | |
| "grad_norm": 0.19576741755008698, | |
| "learning_rate": 2.328891189028335e-06, | |
| "loss": 1.0599888801574706, | |
| "step": 7610 | |
| }, | |
| { | |
| "epoch": 0.9562802955433197, | |
| "grad_norm": 0.21709538996219635, | |
| "learning_rate": 2.2641997671108812e-06, | |
| "loss": 1.0713414192199706, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 0.9575352565610931, | |
| "grad_norm": 0.2199336290359497, | |
| "learning_rate": 2.1995083451934276e-06, | |
| "loss": 1.071683406829834, | |
| "step": 7630 | |
| }, | |
| { | |
| "epoch": 0.9587902175788665, | |
| "grad_norm": 0.24035602807998657, | |
| "learning_rate": 2.1348169232759735e-06, | |
| "loss": 1.074808692932129, | |
| "step": 7640 | |
| }, | |
| { | |
| "epoch": 0.9600451785966398, | |
| "grad_norm": 0.22513709962368011, | |
| "learning_rate": 2.0701255013585198e-06, | |
| "loss": 1.080063247680664, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 0.9613001396144132, | |
| "grad_norm": 0.20860207080841064, | |
| "learning_rate": 2.0054340794410665e-06, | |
| "loss": 1.0631601333618164, | |
| "step": 7660 | |
| }, | |
| { | |
| "epoch": 0.9625551006321866, | |
| "grad_norm": 0.20075741410255432, | |
| "learning_rate": 1.9407426575236124e-06, | |
| "loss": 1.103189468383789, | |
| "step": 7670 | |
| }, | |
| { | |
| "epoch": 0.96381006164996, | |
| "grad_norm": 0.21978308260440826, | |
| "learning_rate": 1.8760512356061587e-06, | |
| "loss": 1.0389853477478028, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 0.9650650226677334, | |
| "grad_norm": 0.1870948076248169, | |
| "learning_rate": 1.8113598136887049e-06, | |
| "loss": 1.0636377334594727, | |
| "step": 7690 | |
| }, | |
| { | |
| "epoch": 0.9663199836855068, | |
| "grad_norm": 0.18612495064735413, | |
| "learning_rate": 1.7466683917712512e-06, | |
| "loss": 1.054550838470459, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.9675749447032802, | |
| "grad_norm": 0.1692449450492859, | |
| "learning_rate": 1.6819769698537973e-06, | |
| "loss": 1.0402187347412108, | |
| "step": 7710 | |
| }, | |
| { | |
| "epoch": 0.9688299057210535, | |
| "grad_norm": 0.17843835055828094, | |
| "learning_rate": 1.6172855479363438e-06, | |
| "loss": 1.0575499534606934, | |
| "step": 7720 | |
| }, | |
| { | |
| "epoch": 0.9700848667388269, | |
| "grad_norm": 0.22015692293643951, | |
| "learning_rate": 1.55259412601889e-06, | |
| "loss": 1.0864248275756836, | |
| "step": 7730 | |
| }, | |
| { | |
| "epoch": 0.9713398277566003, | |
| "grad_norm": 0.20895177125930786, | |
| "learning_rate": 1.4879027041014363e-06, | |
| "loss": 1.0197461128234864, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 0.9725947887743737, | |
| "grad_norm": 0.20695802569389343, | |
| "learning_rate": 1.4232112821839824e-06, | |
| "loss": 1.1009364128112793, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 0.9738497497921471, | |
| "grad_norm": 0.16815753281116486, | |
| "learning_rate": 1.3585198602665287e-06, | |
| "loss": 0.9824440956115723, | |
| "step": 7760 | |
| }, | |
| { | |
| "epoch": 0.9751047108099204, | |
| "grad_norm": 0.20957960188388824, | |
| "learning_rate": 1.293828438349075e-06, | |
| "loss": 1.1214850425720215, | |
| "step": 7770 | |
| }, | |
| { | |
| "epoch": 0.9763596718276939, | |
| "grad_norm": 0.2056475281715393, | |
| "learning_rate": 1.2291370164316211e-06, | |
| "loss": 1.0337078094482421, | |
| "step": 7780 | |
| }, | |
| { | |
| "epoch": 0.9776146328454672, | |
| "grad_norm": 0.20233125984668732, | |
| "learning_rate": 1.1644455945141675e-06, | |
| "loss": 1.1166194915771483, | |
| "step": 7790 | |
| }, | |
| { | |
| "epoch": 0.9788695938632406, | |
| "grad_norm": 0.19711875915527344, | |
| "learning_rate": 1.0997541725967138e-06, | |
| "loss": 1.0613553047180175, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.980124554881014, | |
| "grad_norm": 0.23265467584133148, | |
| "learning_rate": 1.0350627506792599e-06, | |
| "loss": 1.0709218978881836, | |
| "step": 7810 | |
| }, | |
| { | |
| "epoch": 0.9813795158987874, | |
| "grad_norm": 0.1931038498878479, | |
| "learning_rate": 9.703713287618062e-07, | |
| "loss": 1.0655381202697753, | |
| "step": 7820 | |
| }, | |
| { | |
| "epoch": 0.9826344769165608, | |
| "grad_norm": 0.19685102999210358, | |
| "learning_rate": 9.056799068443524e-07, | |
| "loss": 1.0683314323425293, | |
| "step": 7830 | |
| }, | |
| { | |
| "epoch": 0.9838894379343341, | |
| "grad_norm": 0.18740233778953552, | |
| "learning_rate": 8.409884849268986e-07, | |
| "loss": 1.0544939041137695, | |
| "step": 7840 | |
| }, | |
| { | |
| "epoch": 0.9851443989521076, | |
| "grad_norm": 0.2561696767807007, | |
| "learning_rate": 7.76297063009445e-07, | |
| "loss": 1.092136287689209, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 0.9863993599698809, | |
| "grad_norm": 0.20890219509601593, | |
| "learning_rate": 7.116056410919912e-07, | |
| "loss": 1.0438971519470215, | |
| "step": 7860 | |
| }, | |
| { | |
| "epoch": 0.9876543209876543, | |
| "grad_norm": 0.1991521716117859, | |
| "learning_rate": 6.469142191745375e-07, | |
| "loss": 1.0979823112487792, | |
| "step": 7870 | |
| }, | |
| { | |
| "epoch": 0.9889092820054277, | |
| "grad_norm": 0.25123023986816406, | |
| "learning_rate": 5.822227972570837e-07, | |
| "loss": 1.0025765419006347, | |
| "step": 7880 | |
| }, | |
| { | |
| "epoch": 0.990164243023201, | |
| "grad_norm": 0.20250628888607025, | |
| "learning_rate": 5.175313753396299e-07, | |
| "loss": 1.092966651916504, | |
| "step": 7890 | |
| }, | |
| { | |
| "epoch": 0.9914192040409745, | |
| "grad_norm": 0.2285197377204895, | |
| "learning_rate": 4.528399534221762e-07, | |
| "loss": 1.0923130989074707, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.9926741650587478, | |
| "grad_norm": 0.1967306137084961, | |
| "learning_rate": 3.881485315047225e-07, | |
| "loss": 1.0842831611633301, | |
| "step": 7910 | |
| }, | |
| { | |
| "epoch": 0.9939291260765213, | |
| "grad_norm": 0.20673462748527527, | |
| "learning_rate": 3.2345710958726876e-07, | |
| "loss": 1.0047653198242188, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 0.9951840870942946, | |
| "grad_norm": 0.19918572902679443, | |
| "learning_rate": 2.5876568766981497e-07, | |
| "loss": 1.0727598190307617, | |
| "step": 7930 | |
| }, | |
| { | |
| "epoch": 0.9964390481120681, | |
| "grad_norm": 0.16394563019275665, | |
| "learning_rate": 1.9407426575236124e-07, | |
| "loss": 1.0194159507751466, | |
| "step": 7940 | |
| }, | |
| { | |
| "epoch": 0.9976940091298414, | |
| "grad_norm": 0.22014449536800385, | |
| "learning_rate": 1.2938284383490749e-07, | |
| "loss": 1.0323500633239746, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 0.9989489701476147, | |
| "grad_norm": 0.2236953228712082, | |
| "learning_rate": 6.469142191745374e-08, | |
| "loss": 1.030369472503662, | |
| "step": 7960 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 0.9335432648658752, | |
| "eval_runtime": 895.0319, | |
| "eval_samples_per_second": 14.994, | |
| "eval_steps_per_second": 7.497, | |
| "step": 7969 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 7969, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.797602843269349e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |