| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9946007388462631, | |
| "eval_steps": 500, | |
| "global_step": 3500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.002841716396703609, | |
| "grad_norm": 0.20229442417621613, | |
| "learning_rate": 0.00019948849104859336, | |
| "loss": 1.523, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.005683432793407218, | |
| "grad_norm": 0.24781420826911926, | |
| "learning_rate": 0.00019892014776925262, | |
| "loss": 1.3539, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.008525149190110827, | |
| "grad_norm": 0.21505358815193176, | |
| "learning_rate": 0.0001983518044899119, | |
| "loss": 1.3182, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.011366865586814436, | |
| "grad_norm": 0.2120037078857422, | |
| "learning_rate": 0.00019778346121057119, | |
| "loss": 1.3647, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.014208581983518044, | |
| "grad_norm": 0.19157598912715912, | |
| "learning_rate": 0.00019721511793123047, | |
| "loss": 1.4083, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.017050298380221655, | |
| "grad_norm": 0.2424042969942093, | |
| "learning_rate": 0.00019664677465188975, | |
| "loss": 1.2882, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.019892014776925263, | |
| "grad_norm": 0.25576573610305786, | |
| "learning_rate": 0.000196078431372549, | |
| "loss": 1.2469, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.022733731173628872, | |
| "grad_norm": 0.3163057267665863, | |
| "learning_rate": 0.0001955100880932083, | |
| "loss": 1.3234, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.02557544757033248, | |
| "grad_norm": 0.20805688202381134, | |
| "learning_rate": 0.00019494174481386758, | |
| "loss": 1.2203, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.02841716396703609, | |
| "grad_norm": 0.26067784428596497, | |
| "learning_rate": 0.00019437340153452686, | |
| "loss": 1.235, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0312588803637397, | |
| "grad_norm": 0.25863921642303467, | |
| "learning_rate": 0.00019380505825518612, | |
| "loss": 1.2209, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.03410059676044331, | |
| "grad_norm": 0.2780992090702057, | |
| "learning_rate": 0.0001932367149758454, | |
| "loss": 1.2806, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.03694231315714692, | |
| "grad_norm": 0.2662423253059387, | |
| "learning_rate": 0.0001926683716965047, | |
| "loss": 1.2075, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.039784029553850526, | |
| "grad_norm": 0.35407036542892456, | |
| "learning_rate": 0.00019210002841716397, | |
| "loss": 1.2721, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.042625745950554135, | |
| "grad_norm": 0.2910842001438141, | |
| "learning_rate": 0.00019153168513782326, | |
| "loss": 1.2716, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.045467462347257744, | |
| "grad_norm": 0.2619645893573761, | |
| "learning_rate": 0.00019096334185848252, | |
| "loss": 1.186, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.04830917874396135, | |
| "grad_norm": 0.2996160686016083, | |
| "learning_rate": 0.0001903949985791418, | |
| "loss": 1.1886, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.05115089514066496, | |
| "grad_norm": 0.227691650390625, | |
| "learning_rate": 0.00018982665529980108, | |
| "loss": 1.2613, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.05399261153736857, | |
| "grad_norm": 0.2601442039012909, | |
| "learning_rate": 0.00018925831202046037, | |
| "loss": 1.1767, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.05683432793407218, | |
| "grad_norm": 0.28762301802635193, | |
| "learning_rate": 0.00018868996874111963, | |
| "loss": 1.2628, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.059676044330775786, | |
| "grad_norm": 0.23256859183311462, | |
| "learning_rate": 0.0001881216254617789, | |
| "loss": 1.1098, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.0625177607274794, | |
| "grad_norm": 0.2880021333694458, | |
| "learning_rate": 0.0001875532821824382, | |
| "loss": 1.1575, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.06535947712418301, | |
| "grad_norm": 0.25147515535354614, | |
| "learning_rate": 0.00018698493890309748, | |
| "loss": 1.1868, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.06820119352088662, | |
| "grad_norm": 0.29358601570129395, | |
| "learning_rate": 0.00018641659562375676, | |
| "loss": 1.241, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.07104290991759023, | |
| "grad_norm": 0.24878720939159393, | |
| "learning_rate": 0.00018584825234441602, | |
| "loss": 1.1583, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.07388462631429384, | |
| "grad_norm": 0.23219600319862366, | |
| "learning_rate": 0.0001852799090650753, | |
| "loss": 1.155, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.07672634271099744, | |
| "grad_norm": 0.2404685616493225, | |
| "learning_rate": 0.0001847115657857346, | |
| "loss": 1.1896, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.07956805910770105, | |
| "grad_norm": 0.21366341412067413, | |
| "learning_rate": 0.00018414322250639387, | |
| "loss": 1.1522, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.08240977550440466, | |
| "grad_norm": 0.30190715193748474, | |
| "learning_rate": 0.00018357487922705313, | |
| "loss": 1.2053, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.08525149190110827, | |
| "grad_norm": 0.253252238035202, | |
| "learning_rate": 0.00018300653594771241, | |
| "loss": 1.1812, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.08809320829781188, | |
| "grad_norm": 0.2292664349079132, | |
| "learning_rate": 0.0001824381926683717, | |
| "loss": 1.112, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.09093492469451549, | |
| "grad_norm": 0.28798526525497437, | |
| "learning_rate": 0.00018186984938903098, | |
| "loss": 1.2313, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.0937766410912191, | |
| "grad_norm": 0.28377199172973633, | |
| "learning_rate": 0.00018130150610969027, | |
| "loss": 1.108, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.0966183574879227, | |
| "grad_norm": 0.25983279943466187, | |
| "learning_rate": 0.00018073316283034952, | |
| "loss": 1.1511, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.09946007388462631, | |
| "grad_norm": 0.25927045941352844, | |
| "learning_rate": 0.0001801648195510088, | |
| "loss": 1.2269, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.10230179028132992, | |
| "grad_norm": 0.2704865634441376, | |
| "learning_rate": 0.0001795964762716681, | |
| "loss": 1.1827, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.10514350667803353, | |
| "grad_norm": 0.30205655097961426, | |
| "learning_rate": 0.00017902813299232738, | |
| "loss": 1.2028, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.10798522307473714, | |
| "grad_norm": 0.3334643244743347, | |
| "learning_rate": 0.00017845978971298663, | |
| "loss": 1.1631, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.11082693947144075, | |
| "grad_norm": 0.25340893864631653, | |
| "learning_rate": 0.00017789144643364592, | |
| "loss": 1.1823, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.11366865586814436, | |
| "grad_norm": 0.2417430877685547, | |
| "learning_rate": 0.0001773231031543052, | |
| "loss": 1.1956, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.11651037226484796, | |
| "grad_norm": 0.238485187292099, | |
| "learning_rate": 0.0001767547598749645, | |
| "loss": 1.2436, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.11935208866155157, | |
| "grad_norm": 0.2162630409002304, | |
| "learning_rate": 0.00017618641659562377, | |
| "loss": 1.1831, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.12219380505825518, | |
| "grad_norm": 0.25849658250808716, | |
| "learning_rate": 0.00017561807331628303, | |
| "loss": 1.2077, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.1250355214549588, | |
| "grad_norm": 0.3160068392753601, | |
| "learning_rate": 0.0001750497300369423, | |
| "loss": 1.2443, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.1278772378516624, | |
| "grad_norm": 0.25949159264564514, | |
| "learning_rate": 0.0001744813867576016, | |
| "loss": 1.2738, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.13071895424836602, | |
| "grad_norm": 0.2856585383415222, | |
| "learning_rate": 0.00017391304347826088, | |
| "loss": 1.1611, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.13356067064506963, | |
| "grad_norm": 0.27770936489105225, | |
| "learning_rate": 0.00017334470019892014, | |
| "loss": 1.1796, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.13640238704177324, | |
| "grad_norm": 0.2558460831642151, | |
| "learning_rate": 0.00017277635691957942, | |
| "loss": 1.1277, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.13924410343847685, | |
| "grad_norm": 0.23918330669403076, | |
| "learning_rate": 0.0001722080136402387, | |
| "loss": 1.1651, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.14208581983518045, | |
| "grad_norm": 0.24087974429130554, | |
| "learning_rate": 0.000171639670360898, | |
| "loss": 1.2795, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.14492753623188406, | |
| "grad_norm": 0.22798433899879456, | |
| "learning_rate": 0.00017107132708155728, | |
| "loss": 1.1636, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.14776925262858767, | |
| "grad_norm": 0.25570231676101685, | |
| "learning_rate": 0.00017050298380221653, | |
| "loss": 1.0991, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.15061096902529128, | |
| "grad_norm": 0.3161047101020813, | |
| "learning_rate": 0.00016993464052287582, | |
| "loss": 1.1638, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.1534526854219949, | |
| "grad_norm": 0.34027767181396484, | |
| "learning_rate": 0.0001693662972435351, | |
| "loss": 1.1913, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.1562944018186985, | |
| "grad_norm": 0.5431545972824097, | |
| "learning_rate": 0.00016879795396419439, | |
| "loss": 1.2059, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.1591361182154021, | |
| "grad_norm": 0.3261612057685852, | |
| "learning_rate": 0.00016822961068485364, | |
| "loss": 1.1779, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.16197783461210571, | |
| "grad_norm": 0.25675147771835327, | |
| "learning_rate": 0.00016766126740551293, | |
| "loss": 1.2524, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.16481955100880932, | |
| "grad_norm": 0.2560219168663025, | |
| "learning_rate": 0.0001670929241261722, | |
| "loss": 1.0579, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.16766126740551293, | |
| "grad_norm": 0.29484283924102783, | |
| "learning_rate": 0.0001665245808468315, | |
| "loss": 1.0802, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.17050298380221654, | |
| "grad_norm": 0.22986841201782227, | |
| "learning_rate": 0.00016595623756749078, | |
| "loss": 1.1268, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.17334470019892015, | |
| "grad_norm": 0.23908346891403198, | |
| "learning_rate": 0.00016538789428815004, | |
| "loss": 1.1326, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.17618641659562376, | |
| "grad_norm": 0.30050793290138245, | |
| "learning_rate": 0.00016481955100880932, | |
| "loss": 1.0501, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.17902813299232737, | |
| "grad_norm": 0.24418674409389496, | |
| "learning_rate": 0.0001642512077294686, | |
| "loss": 1.1718, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.18186984938903097, | |
| "grad_norm": 0.4306769073009491, | |
| "learning_rate": 0.0001636828644501279, | |
| "loss": 1.1343, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.18471156578573458, | |
| "grad_norm": 0.2366916537284851, | |
| "learning_rate": 0.00016311452117078715, | |
| "loss": 1.1766, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.1875532821824382, | |
| "grad_norm": 0.281982958316803, | |
| "learning_rate": 0.00016254617789144643, | |
| "loss": 1.2057, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.1903949985791418, | |
| "grad_norm": 0.2688102424144745, | |
| "learning_rate": 0.00016197783461210572, | |
| "loss": 1.1958, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.1932367149758454, | |
| "grad_norm": 0.28181755542755127, | |
| "learning_rate": 0.000161409491332765, | |
| "loss": 1.1662, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.19607843137254902, | |
| "grad_norm": 0.23462365567684174, | |
| "learning_rate": 0.00016084114805342428, | |
| "loss": 1.1193, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.19892014776925263, | |
| "grad_norm": 0.27968090772628784, | |
| "learning_rate": 0.00016027280477408354, | |
| "loss": 1.1678, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.20176186416595623, | |
| "grad_norm": 0.2571905851364136, | |
| "learning_rate": 0.00015970446149474283, | |
| "loss": 1.1775, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.20460358056265984, | |
| "grad_norm": 0.2821357250213623, | |
| "learning_rate": 0.0001591361182154021, | |
| "loss": 1.1649, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.20744529695936345, | |
| "grad_norm": 0.2606565058231354, | |
| "learning_rate": 0.0001585677749360614, | |
| "loss": 1.0562, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.21028701335606706, | |
| "grad_norm": 0.27794864773750305, | |
| "learning_rate": 0.00015799943165672065, | |
| "loss": 1.1366, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.21312872975277067, | |
| "grad_norm": 0.2107602059841156, | |
| "learning_rate": 0.00015743108837737994, | |
| "loss": 1.1106, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.21597044614947428, | |
| "grad_norm": 0.2524462640285492, | |
| "learning_rate": 0.00015686274509803922, | |
| "loss": 1.1777, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.21881216254617789, | |
| "grad_norm": 0.22253374755382538, | |
| "learning_rate": 0.0001562944018186985, | |
| "loss": 1.0856, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.2216538789428815, | |
| "grad_norm": 0.2423143982887268, | |
| "learning_rate": 0.0001557260585393578, | |
| "loss": 1.1836, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.2244955953395851, | |
| "grad_norm": 0.2595592737197876, | |
| "learning_rate": 0.00015515771526001705, | |
| "loss": 1.1698, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.2273373117362887, | |
| "grad_norm": 0.2568744421005249, | |
| "learning_rate": 0.00015458937198067633, | |
| "loss": 1.1707, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.23017902813299232, | |
| "grad_norm": 0.30398836731910706, | |
| "learning_rate": 0.00015402102870133561, | |
| "loss": 1.1547, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.23302074452969593, | |
| "grad_norm": 0.300106406211853, | |
| "learning_rate": 0.0001534526854219949, | |
| "loss": 1.1925, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.23586246092639954, | |
| "grad_norm": 0.2774117588996887, | |
| "learning_rate": 0.00015288434214265416, | |
| "loss": 1.1404, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.23870417732310314, | |
| "grad_norm": 0.28219330310821533, | |
| "learning_rate": 0.00015231599886331344, | |
| "loss": 1.122, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.24154589371980675, | |
| "grad_norm": 0.3004832863807678, | |
| "learning_rate": 0.00015174765558397272, | |
| "loss": 1.2315, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.24438761011651036, | |
| "grad_norm": 0.24674588441848755, | |
| "learning_rate": 0.000151179312304632, | |
| "loss": 1.1663, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.24722932651321397, | |
| "grad_norm": 0.21483612060546875, | |
| "learning_rate": 0.0001506109690252913, | |
| "loss": 1.173, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.2500710429099176, | |
| "grad_norm": 0.3079366683959961, | |
| "learning_rate": 0.00015004262574595055, | |
| "loss": 1.1695, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.2529127593066212, | |
| "grad_norm": 0.2626102566719055, | |
| "learning_rate": 0.00014947428246660983, | |
| "loss": 1.1552, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.2557544757033248, | |
| "grad_norm": 0.3606242537498474, | |
| "learning_rate": 0.00014890593918726912, | |
| "loss": 1.0921, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.25859619210002843, | |
| "grad_norm": 0.3001089096069336, | |
| "learning_rate": 0.0001483375959079284, | |
| "loss": 1.1911, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.26143790849673204, | |
| "grad_norm": 0.2784072458744049, | |
| "learning_rate": 0.00014776925262858766, | |
| "loss": 1.1979, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.26427962489343565, | |
| "grad_norm": 0.21513643860816956, | |
| "learning_rate": 0.00014720090934924694, | |
| "loss": 1.0723, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.26712134129013926, | |
| "grad_norm": 0.253627210855484, | |
| "learning_rate": 0.00014663256606990623, | |
| "loss": 1.1883, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.26996305768684287, | |
| "grad_norm": 0.2411937564611435, | |
| "learning_rate": 0.0001460642227905655, | |
| "loss": 1.1786, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.2728047740835465, | |
| "grad_norm": 0.2327292114496231, | |
| "learning_rate": 0.0001454958795112248, | |
| "loss": 1.2151, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.2756464904802501, | |
| "grad_norm": 0.26323702931404114, | |
| "learning_rate": 0.00014492753623188405, | |
| "loss": 1.1002, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.2784882068769537, | |
| "grad_norm": 0.2239420861005783, | |
| "learning_rate": 0.00014435919295254334, | |
| "loss": 1.0453, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.2813299232736573, | |
| "grad_norm": 0.2786525785923004, | |
| "learning_rate": 0.00014379084967320262, | |
| "loss": 1.2195, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.2841716396703609, | |
| "grad_norm": 0.23764382302761078, | |
| "learning_rate": 0.0001432225063938619, | |
| "loss": 1.1263, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.2870133560670645, | |
| "grad_norm": 0.2542721927165985, | |
| "learning_rate": 0.00014265416311452116, | |
| "loss": 1.1603, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.2898550724637681, | |
| "grad_norm": 0.26884064078330994, | |
| "learning_rate": 0.00014208581983518045, | |
| "loss": 1.1759, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.29269678886047173, | |
| "grad_norm": 0.2652948498725891, | |
| "learning_rate": 0.00014151747655583973, | |
| "loss": 1.0826, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.29553850525717534, | |
| "grad_norm": 0.3024510443210602, | |
| "learning_rate": 0.00014094913327649902, | |
| "loss": 1.1451, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.29838022165387895, | |
| "grad_norm": 0.22471967339515686, | |
| "learning_rate": 0.0001403807899971583, | |
| "loss": 1.1412, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.30122193805058256, | |
| "grad_norm": 0.32953527569770813, | |
| "learning_rate": 0.00013981244671781756, | |
| "loss": 1.1233, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.30406365444728617, | |
| "grad_norm": 0.3743230700492859, | |
| "learning_rate": 0.00013924410343847684, | |
| "loss": 1.1403, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.3069053708439898, | |
| "grad_norm": 0.36825114488601685, | |
| "learning_rate": 0.00013867576015913613, | |
| "loss": 1.2296, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.3097470872406934, | |
| "grad_norm": 0.3129768669605255, | |
| "learning_rate": 0.0001381074168797954, | |
| "loss": 1.1697, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.312588803637397, | |
| "grad_norm": 0.31028246879577637, | |
| "learning_rate": 0.00013753907360045467, | |
| "loss": 1.096, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.3154305200341006, | |
| "grad_norm": 0.2524968981742859, | |
| "learning_rate": 0.00013697073032111395, | |
| "loss": 1.1114, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.3182722364308042, | |
| "grad_norm": 0.23146887123584747, | |
| "learning_rate": 0.00013640238704177324, | |
| "loss": 1.0634, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.3211139528275078, | |
| "grad_norm": 0.26541128754615784, | |
| "learning_rate": 0.00013583404376243252, | |
| "loss": 1.0631, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.32395566922421143, | |
| "grad_norm": 0.24775725603103638, | |
| "learning_rate": 0.0001352657004830918, | |
| "loss": 1.1456, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.32679738562091504, | |
| "grad_norm": 0.25310489535331726, | |
| "learning_rate": 0.00013469735720375106, | |
| "loss": 1.2405, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.32963910201761865, | |
| "grad_norm": 0.2598433792591095, | |
| "learning_rate": 0.00013412901392441035, | |
| "loss": 1.2455, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.33248081841432225, | |
| "grad_norm": 0.2735394835472107, | |
| "learning_rate": 0.00013356067064506963, | |
| "loss": 1.1014, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.33532253481102586, | |
| "grad_norm": 0.26913198828697205, | |
| "learning_rate": 0.00013299232736572892, | |
| "loss": 1.1476, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.33816425120772947, | |
| "grad_norm": 0.22991891205310822, | |
| "learning_rate": 0.00013242398408638817, | |
| "loss": 1.0964, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.3410059676044331, | |
| "grad_norm": 0.2543002963066101, | |
| "learning_rate": 0.00013185564080704746, | |
| "loss": 1.1409, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.3438476840011367, | |
| "grad_norm": 0.2660631537437439, | |
| "learning_rate": 0.00013128729752770674, | |
| "loss": 1.0375, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.3466894003978403, | |
| "grad_norm": 0.25068119168281555, | |
| "learning_rate": 0.00013071895424836603, | |
| "loss": 1.1708, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.3495311167945439, | |
| "grad_norm": 0.27296605706214905, | |
| "learning_rate": 0.0001301506109690253, | |
| "loss": 1.1449, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.3523728331912475, | |
| "grad_norm": 0.23561522364616394, | |
| "learning_rate": 0.00012958226768968457, | |
| "loss": 1.1954, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.3552145495879511, | |
| "grad_norm": 0.2912009358406067, | |
| "learning_rate": 0.00012901392441034385, | |
| "loss": 1.1254, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.35805626598465473, | |
| "grad_norm": 0.24392102658748627, | |
| "learning_rate": 0.00012844558113100314, | |
| "loss": 1.1263, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.36089798238135834, | |
| "grad_norm": 0.30842769145965576, | |
| "learning_rate": 0.00012787723785166242, | |
| "loss": 1.0731, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.36373969877806195, | |
| "grad_norm": 0.2747196853160858, | |
| "learning_rate": 0.0001273088945723217, | |
| "loss": 1.1913, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.36658141517476556, | |
| "grad_norm": 0.31838688254356384, | |
| "learning_rate": 0.00012674055129298096, | |
| "loss": 1.222, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.36942313157146917, | |
| "grad_norm": 0.3127056062221527, | |
| "learning_rate": 0.00012617220801364025, | |
| "loss": 1.0872, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.3722648479681728, | |
| "grad_norm": 0.2799491882324219, | |
| "learning_rate": 0.00012560386473429953, | |
| "loss": 1.1277, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.3751065643648764, | |
| "grad_norm": 0.2875117063522339, | |
| "learning_rate": 0.00012503552145495881, | |
| "loss": 1.153, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.37794828076158, | |
| "grad_norm": 0.3062177300453186, | |
| "learning_rate": 0.00012446717817561807, | |
| "loss": 1.1142, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.3807899971582836, | |
| "grad_norm": 0.23725247383117676, | |
| "learning_rate": 0.00012389883489627736, | |
| "loss": 1.0559, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.3836317135549872, | |
| "grad_norm": 0.28051912784576416, | |
| "learning_rate": 0.00012333049161693664, | |
| "loss": 1.0642, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.3864734299516908, | |
| "grad_norm": 0.2455301284790039, | |
| "learning_rate": 0.00012276214833759592, | |
| "loss": 1.175, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.3893151463483944, | |
| "grad_norm": 0.26085713505744934, | |
| "learning_rate": 0.0001221938050582552, | |
| "loss": 1.1368, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.39215686274509803, | |
| "grad_norm": 0.4084455370903015, | |
| "learning_rate": 0.00012162546177891448, | |
| "loss": 1.1346, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.39499857914180164, | |
| "grad_norm": 0.28224310278892517, | |
| "learning_rate": 0.00012105711849957375, | |
| "loss": 1.0693, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.39784029553850525, | |
| "grad_norm": 0.3237653970718384, | |
| "learning_rate": 0.00012048877522023303, | |
| "loss": 1.0585, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.40068201193520886, | |
| "grad_norm": 0.26249366998672485, | |
| "learning_rate": 0.0001199204319408923, | |
| "loss": 1.1227, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.40352372833191247, | |
| "grad_norm": 0.29983624815940857, | |
| "learning_rate": 0.00011935208866155159, | |
| "loss": 1.1473, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.4063654447286161, | |
| "grad_norm": 0.26302003860473633, | |
| "learning_rate": 0.00011878374538221086, | |
| "loss": 1.2398, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.4092071611253197, | |
| "grad_norm": 0.2707955539226532, | |
| "learning_rate": 0.00011821540210287014, | |
| "loss": 1.1299, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.4120488775220233, | |
| "grad_norm": 0.47745946049690247, | |
| "learning_rate": 0.00011764705882352942, | |
| "loss": 1.138, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.4148905939187269, | |
| "grad_norm": 0.24607343971729279, | |
| "learning_rate": 0.0001170787155441887, | |
| "loss": 1.1444, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.4177323103154305, | |
| "grad_norm": 0.2819903492927551, | |
| "learning_rate": 0.00011651037226484798, | |
| "loss": 1.0861, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.4205740267121341, | |
| "grad_norm": 0.2718110978603363, | |
| "learning_rate": 0.00011594202898550725, | |
| "loss": 1.1225, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.4234157431088377, | |
| "grad_norm": 0.2966226041316986, | |
| "learning_rate": 0.00011537368570616654, | |
| "loss": 1.1901, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.42625745950554134, | |
| "grad_norm": 0.38320621848106384, | |
| "learning_rate": 0.00011480534242682581, | |
| "loss": 1.134, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.42909917590224494, | |
| "grad_norm": 0.2895069718360901, | |
| "learning_rate": 0.0001142369991474851, | |
| "loss": 1.0505, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.43194089229894855, | |
| "grad_norm": 0.32522544264793396, | |
| "learning_rate": 0.00011366865586814436, | |
| "loss": 1.119, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.43478260869565216, | |
| "grad_norm": 0.3680785298347473, | |
| "learning_rate": 0.00011310031258880365, | |
| "loss": 1.154, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.43762432509235577, | |
| "grad_norm": 0.24093805253505707, | |
| "learning_rate": 0.00011253196930946292, | |
| "loss": 1.1294, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.4404660414890594, | |
| "grad_norm": 0.3424024283885956, | |
| "learning_rate": 0.0001119636260301222, | |
| "loss": 1.1875, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.443307757885763, | |
| "grad_norm": 0.24656616151332855, | |
| "learning_rate": 0.00011139528275078149, | |
| "loss": 1.111, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.4461494742824666, | |
| "grad_norm": 0.289628803730011, | |
| "learning_rate": 0.00011082693947144076, | |
| "loss": 1.1301, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.4489911906791702, | |
| "grad_norm": 0.33433884382247925, | |
| "learning_rate": 0.00011025859619210004, | |
| "loss": 1.1408, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.4518329070758738, | |
| "grad_norm": 0.32477903366088867, | |
| "learning_rate": 0.00010969025291275931, | |
| "loss": 1.1735, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.4546746234725774, | |
| "grad_norm": 0.2586929202079773, | |
| "learning_rate": 0.0001091219096334186, | |
| "loss": 1.1209, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.45751633986928103, | |
| "grad_norm": 0.3958762586116791, | |
| "learning_rate": 0.00010855356635407787, | |
| "loss": 1.1538, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.46035805626598464, | |
| "grad_norm": 0.2704383134841919, | |
| "learning_rate": 0.00010798522307473715, | |
| "loss": 1.1352, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.46319977266268825, | |
| "grad_norm": 0.3805047869682312, | |
| "learning_rate": 0.00010741687979539642, | |
| "loss": 1.0875, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.46604148905939186, | |
| "grad_norm": 0.2563640773296356, | |
| "learning_rate": 0.00010684853651605571, | |
| "loss": 1.0743, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.46888320545609546, | |
| "grad_norm": 0.2729780972003937, | |
| "learning_rate": 0.00010628019323671499, | |
| "loss": 1.082, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.4717249218527991, | |
| "grad_norm": 0.31282857060432434, | |
| "learning_rate": 0.00010571184995737426, | |
| "loss": 1.1308, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.4745666382495027, | |
| "grad_norm": 0.30954697728157043, | |
| "learning_rate": 0.00010514350667803355, | |
| "loss": 1.1608, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.4774083546462063, | |
| "grad_norm": 0.2565356194972992, | |
| "learning_rate": 0.00010457516339869282, | |
| "loss": 1.0761, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.4802500710429099, | |
| "grad_norm": 0.25005489587783813, | |
| "learning_rate": 0.0001040068201193521, | |
| "loss": 1.129, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.4830917874396135, | |
| "grad_norm": 0.2812393605709076, | |
| "learning_rate": 0.00010343847684001137, | |
| "loss": 1.1722, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.4859335038363171, | |
| "grad_norm": 0.2592753469944, | |
| "learning_rate": 0.00010287013356067066, | |
| "loss": 1.0974, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.4887752202330207, | |
| "grad_norm": 0.4299195408821106, | |
| "learning_rate": 0.00010230179028132993, | |
| "loss": 1.1636, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.49161693662972433, | |
| "grad_norm": 0.2507430613040924, | |
| "learning_rate": 0.00010173344700198921, | |
| "loss": 1.1259, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.49445865302642794, | |
| "grad_norm": 0.25915494561195374, | |
| "learning_rate": 0.0001011651037226485, | |
| "loss": 1.1513, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.49730036942313155, | |
| "grad_norm": 0.4046742022037506, | |
| "learning_rate": 0.00010059676044330777, | |
| "loss": 1.1431, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.5001420858198352, | |
| "grad_norm": 0.269300639629364, | |
| "learning_rate": 0.00010002841716396705, | |
| "loss": 1.1176, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.5029838022165388, | |
| "grad_norm": 0.27633029222488403, | |
| "learning_rate": 9.946007388462631e-05, | |
| "loss": 1.1394, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.5058255186132424, | |
| "grad_norm": 0.2624037563800812, | |
| "learning_rate": 9.889173060528559e-05, | |
| "loss": 1.1789, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.508667235009946, | |
| "grad_norm": 0.2688696086406708, | |
| "learning_rate": 9.832338732594488e-05, | |
| "loss": 1.0863, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.5115089514066496, | |
| "grad_norm": 0.41462844610214233, | |
| "learning_rate": 9.775504404660415e-05, | |
| "loss": 1.1191, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.5143506678033533, | |
| "grad_norm": 0.26823553442955017, | |
| "learning_rate": 9.718670076726343e-05, | |
| "loss": 1.0279, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.5171923842000569, | |
| "grad_norm": 0.37709948420524597, | |
| "learning_rate": 9.66183574879227e-05, | |
| "loss": 1.1478, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.5200341005967605, | |
| "grad_norm": 0.3214150369167328, | |
| "learning_rate": 9.605001420858199e-05, | |
| "loss": 1.0844, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.5228758169934641, | |
| "grad_norm": 0.27517786622047424, | |
| "learning_rate": 9.548167092924126e-05, | |
| "loss": 1.1664, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.5257175333901677, | |
| "grad_norm": 0.2494751513004303, | |
| "learning_rate": 9.491332764990054e-05, | |
| "loss": 1.1159, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.5285592497868713, | |
| "grad_norm": 0.3077758252620697, | |
| "learning_rate": 9.434498437055981e-05, | |
| "loss": 1.1658, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.5314009661835749, | |
| "grad_norm": 0.34368380904197693, | |
| "learning_rate": 9.37766410912191e-05, | |
| "loss": 1.1377, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.5342426825802785, | |
| "grad_norm": 0.2457958608865738, | |
| "learning_rate": 9.320829781187838e-05, | |
| "loss": 1.1323, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.5370843989769821, | |
| "grad_norm": 0.2676566243171692, | |
| "learning_rate": 9.263995453253765e-05, | |
| "loss": 1.1968, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.5399261153736857, | |
| "grad_norm": 0.23688159883022308, | |
| "learning_rate": 9.207161125319694e-05, | |
| "loss": 1.093, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.5427678317703893, | |
| "grad_norm": 0.3742455542087555, | |
| "learning_rate": 9.150326797385621e-05, | |
| "loss": 1.1396, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.545609548167093, | |
| "grad_norm": 0.31831708550453186, | |
| "learning_rate": 9.093492469451549e-05, | |
| "loss": 1.169, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.5484512645637966, | |
| "grad_norm": 0.2743741273880005, | |
| "learning_rate": 9.036658141517476e-05, | |
| "loss": 1.0958, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.5512929809605002, | |
| "grad_norm": 0.3475467562675476, | |
| "learning_rate": 8.979823813583405e-05, | |
| "loss": 1.0955, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.5541346973572038, | |
| "grad_norm": 0.245005264878273, | |
| "learning_rate": 8.922989485649332e-05, | |
| "loss": 1.1146, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.5569764137539074, | |
| "grad_norm": 0.2580576241016388, | |
| "learning_rate": 8.86615515771526e-05, | |
| "loss": 1.1223, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.559818130150611, | |
| "grad_norm": 0.3004942536354065, | |
| "learning_rate": 8.809320829781189e-05, | |
| "loss": 1.0554, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.5626598465473146, | |
| "grad_norm": 0.27015358209609985, | |
| "learning_rate": 8.752486501847116e-05, | |
| "loss": 1.1463, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.5655015629440182, | |
| "grad_norm": 0.26687344908714294, | |
| "learning_rate": 8.695652173913044e-05, | |
| "loss": 1.1709, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.5683432793407218, | |
| "grad_norm": 0.2721405625343323, | |
| "learning_rate": 8.638817845978971e-05, | |
| "loss": 1.1327, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.5711849957374254, | |
| "grad_norm": 0.287565678358078, | |
| "learning_rate": 8.5819835180449e-05, | |
| "loss": 1.1123, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.574026712134129, | |
| "grad_norm": 0.3454604148864746, | |
| "learning_rate": 8.525149190110827e-05, | |
| "loss": 1.1508, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.5768684285308326, | |
| "grad_norm": 0.26024872064590454, | |
| "learning_rate": 8.468314862176755e-05, | |
| "loss": 1.0406, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.5797101449275363, | |
| "grad_norm": 0.2535635828971863, | |
| "learning_rate": 8.411480534242682e-05, | |
| "loss": 1.1191, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.5825518613242399, | |
| "grad_norm": 0.334521621465683, | |
| "learning_rate": 8.35464620630861e-05, | |
| "loss": 1.1982, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.5853935777209435, | |
| "grad_norm": 0.27677032351493835, | |
| "learning_rate": 8.297811878374539e-05, | |
| "loss": 1.0914, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.5882352941176471, | |
| "grad_norm": 0.360412061214447, | |
| "learning_rate": 8.240977550440466e-05, | |
| "loss": 1.1377, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.5910770105143507, | |
| "grad_norm": 0.28759074211120605, | |
| "learning_rate": 8.184143222506395e-05, | |
| "loss": 1.053, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.5939187269110543, | |
| "grad_norm": 0.32831844687461853, | |
| "learning_rate": 8.127308894572322e-05, | |
| "loss": 1.1326, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.5967604433077579, | |
| "grad_norm": 0.25032633543014526, | |
| "learning_rate": 8.07047456663825e-05, | |
| "loss": 1.1457, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.5996021597044615, | |
| "grad_norm": 0.28084343671798706, | |
| "learning_rate": 8.013640238704177e-05, | |
| "loss": 1.1014, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.6024438761011651, | |
| "grad_norm": 0.26768869161605835, | |
| "learning_rate": 7.956805910770106e-05, | |
| "loss": 1.1442, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.6052855924978687, | |
| "grad_norm": 0.35225123167037964, | |
| "learning_rate": 7.899971582836033e-05, | |
| "loss": 1.1275, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.6081273088945723, | |
| "grad_norm": 0.25125908851623535, | |
| "learning_rate": 7.843137254901961e-05, | |
| "loss": 1.1318, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.610969025291276, | |
| "grad_norm": 0.2658576965332031, | |
| "learning_rate": 7.78630292696789e-05, | |
| "loss": 1.127, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.6138107416879796, | |
| "grad_norm": 0.27074316143989563, | |
| "learning_rate": 7.729468599033817e-05, | |
| "loss": 1.1609, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.6166524580846832, | |
| "grad_norm": 0.4368594288825989, | |
| "learning_rate": 7.672634271099745e-05, | |
| "loss": 1.1488, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.6194941744813868, | |
| "grad_norm": 0.3108392059803009, | |
| "learning_rate": 7.615799943165672e-05, | |
| "loss": 1.1158, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.6223358908780904, | |
| "grad_norm": 0.3383192718029022, | |
| "learning_rate": 7.5589656152316e-05, | |
| "loss": 1.123, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.625177607274794, | |
| "grad_norm": 0.35067611932754517, | |
| "learning_rate": 7.502131287297528e-05, | |
| "loss": 1.1087, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.6280193236714976, | |
| "grad_norm": 0.28962525725364685, | |
| "learning_rate": 7.445296959363456e-05, | |
| "loss": 1.0414, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.6308610400682012, | |
| "grad_norm": 0.2729092538356781, | |
| "learning_rate": 7.388462631429383e-05, | |
| "loss": 1.1024, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.6337027564649048, | |
| "grad_norm": 0.26230186223983765, | |
| "learning_rate": 7.331628303495311e-05, | |
| "loss": 1.1446, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.6365444728616084, | |
| "grad_norm": 0.30937954783439636, | |
| "learning_rate": 7.27479397556124e-05, | |
| "loss": 1.1018, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.639386189258312, | |
| "grad_norm": 0.2578141987323761, | |
| "learning_rate": 7.217959647627167e-05, | |
| "loss": 1.124, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.6422279056550156, | |
| "grad_norm": 0.2398582547903061, | |
| "learning_rate": 7.161125319693095e-05, | |
| "loss": 1.0647, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.6450696220517192, | |
| "grad_norm": 0.2563438415527344, | |
| "learning_rate": 7.104290991759022e-05, | |
| "loss": 1.1878, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.6479113384484229, | |
| "grad_norm": 0.3550574779510498, | |
| "learning_rate": 7.047456663824951e-05, | |
| "loss": 1.051, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.6507530548451265, | |
| "grad_norm": 0.2969961166381836, | |
| "learning_rate": 6.990622335890878e-05, | |
| "loss": 1.1507, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.6535947712418301, | |
| "grad_norm": 0.2654373049736023, | |
| "learning_rate": 6.933788007956806e-05, | |
| "loss": 1.0709, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.6564364876385337, | |
| "grad_norm": 0.2643349766731262, | |
| "learning_rate": 6.876953680022733e-05, | |
| "loss": 1.0412, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.6592782040352373, | |
| "grad_norm": 0.24034832417964935, | |
| "learning_rate": 6.820119352088662e-05, | |
| "loss": 1.1291, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.6621199204319409, | |
| "grad_norm": 0.23572514951229095, | |
| "learning_rate": 6.76328502415459e-05, | |
| "loss": 1.0813, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.6649616368286445, | |
| "grad_norm": 0.24992486834526062, | |
| "learning_rate": 6.706450696220517e-05, | |
| "loss": 1.0856, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.6678033532253481, | |
| "grad_norm": 0.319242924451828, | |
| "learning_rate": 6.649616368286446e-05, | |
| "loss": 1.0326, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.6706450696220517, | |
| "grad_norm": 0.2844800353050232, | |
| "learning_rate": 6.592782040352373e-05, | |
| "loss": 1.0593, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.6734867860187553, | |
| "grad_norm": 0.3302006423473358, | |
| "learning_rate": 6.535947712418301e-05, | |
| "loss": 1.0814, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.6763285024154589, | |
| "grad_norm": 0.25190767645835876, | |
| "learning_rate": 6.479113384484228e-05, | |
| "loss": 1.1088, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.6791702188121626, | |
| "grad_norm": 0.35067909955978394, | |
| "learning_rate": 6.422279056550157e-05, | |
| "loss": 1.0752, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.6820119352088662, | |
| "grad_norm": 0.3033742904663086, | |
| "learning_rate": 6.365444728616085e-05, | |
| "loss": 1.0715, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.6848536516055698, | |
| "grad_norm": 0.2741170823574066, | |
| "learning_rate": 6.308610400682012e-05, | |
| "loss": 1.1067, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.6876953680022734, | |
| "grad_norm": 0.32321301102638245, | |
| "learning_rate": 6.251776072747941e-05, | |
| "loss": 1.124, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.690537084398977, | |
| "grad_norm": 0.3299727737903595, | |
| "learning_rate": 6.194941744813868e-05, | |
| "loss": 1.0749, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.6933788007956806, | |
| "grad_norm": 0.2597588002681732, | |
| "learning_rate": 6.138107416879796e-05, | |
| "loss": 1.056, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.6962205171923842, | |
| "grad_norm": 0.23553162813186646, | |
| "learning_rate": 6.081273088945724e-05, | |
| "loss": 1.0891, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.6990622335890878, | |
| "grad_norm": 0.2980464696884155, | |
| "learning_rate": 6.024438761011652e-05, | |
| "loss": 1.1324, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.7019039499857914, | |
| "grad_norm": 0.2644715905189514, | |
| "learning_rate": 5.9676044330775795e-05, | |
| "loss": 1.0958, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.704745666382495, | |
| "grad_norm": 0.3175826668739319, | |
| "learning_rate": 5.910770105143507e-05, | |
| "loss": 1.1356, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.7075873827791986, | |
| "grad_norm": 0.2976154685020447, | |
| "learning_rate": 5.853935777209435e-05, | |
| "loss": 1.0735, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.7104290991759022, | |
| "grad_norm": 0.31366729736328125, | |
| "learning_rate": 5.797101449275363e-05, | |
| "loss": 1.1751, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.7132708155726059, | |
| "grad_norm": 0.31203290820121765, | |
| "learning_rate": 5.7402671213412905e-05, | |
| "loss": 1.1212, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.7161125319693095, | |
| "grad_norm": 0.2864065170288086, | |
| "learning_rate": 5.683432793407218e-05, | |
| "loss": 1.146, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.7189542483660131, | |
| "grad_norm": 0.2840626835823059, | |
| "learning_rate": 5.626598465473146e-05, | |
| "loss": 1.0261, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.7217959647627167, | |
| "grad_norm": 0.3476808965206146, | |
| "learning_rate": 5.5697641375390744e-05, | |
| "loss": 1.1344, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.7246376811594203, | |
| "grad_norm": 0.3733687996864319, | |
| "learning_rate": 5.512929809605002e-05, | |
| "loss": 1.1298, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.7274793975561239, | |
| "grad_norm": 0.30094149708747864, | |
| "learning_rate": 5.45609548167093e-05, | |
| "loss": 1.1255, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.7303211139528275, | |
| "grad_norm": 0.32213765382766724, | |
| "learning_rate": 5.3992611537368576e-05, | |
| "loss": 1.1319, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.7331628303495311, | |
| "grad_norm": 0.2626177668571472, | |
| "learning_rate": 5.3424268258027854e-05, | |
| "loss": 1.141, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.7360045467462347, | |
| "grad_norm": 0.30769652128219604, | |
| "learning_rate": 5.285592497868713e-05, | |
| "loss": 1.0559, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.7388462631429383, | |
| "grad_norm": 0.24354040622711182, | |
| "learning_rate": 5.228758169934641e-05, | |
| "loss": 1.1385, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.7416879795396419, | |
| "grad_norm": 0.24648752808570862, | |
| "learning_rate": 5.1719238420005686e-05, | |
| "loss": 1.1231, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.7445296959363455, | |
| "grad_norm": 0.30956050753593445, | |
| "learning_rate": 5.1150895140664964e-05, | |
| "loss": 1.104, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.7473714123330492, | |
| "grad_norm": 0.27976328134536743, | |
| "learning_rate": 5.058255186132425e-05, | |
| "loss": 1.1321, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.7502131287297528, | |
| "grad_norm": 0.2535441517829895, | |
| "learning_rate": 5.0014208581983526e-05, | |
| "loss": 1.1174, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.7530548451264564, | |
| "grad_norm": 0.3341921865940094, | |
| "learning_rate": 4.9445865302642796e-05, | |
| "loss": 1.1038, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.75589656152316, | |
| "grad_norm": 0.4902798533439636, | |
| "learning_rate": 4.8877522023302074e-05, | |
| "loss": 1.1245, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.7587382779198636, | |
| "grad_norm": 0.2470143884420395, | |
| "learning_rate": 4.830917874396135e-05, | |
| "loss": 1.1574, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.7615799943165672, | |
| "grad_norm": 0.2521553635597229, | |
| "learning_rate": 4.774083546462063e-05, | |
| "loss": 1.1444, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.7644217107132708, | |
| "grad_norm": 0.3512313961982727, | |
| "learning_rate": 4.7172492185279906e-05, | |
| "loss": 1.1339, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.7672634271099744, | |
| "grad_norm": 0.25479793548583984, | |
| "learning_rate": 4.660414890593919e-05, | |
| "loss": 1.1517, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.770105143506678, | |
| "grad_norm": 0.2850602865219116, | |
| "learning_rate": 4.603580562659847e-05, | |
| "loss": 1.1212, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.7729468599033816, | |
| "grad_norm": 0.3531084358692169, | |
| "learning_rate": 4.5467462347257746e-05, | |
| "loss": 1.0433, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.7757885763000852, | |
| "grad_norm": 0.2699624001979828, | |
| "learning_rate": 4.489911906791702e-05, | |
| "loss": 1.0769, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.7786302926967889, | |
| "grad_norm": 0.3828187584877014, | |
| "learning_rate": 4.43307757885763e-05, | |
| "loss": 1.0826, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.7814720090934925, | |
| "grad_norm": 0.36253124475479126, | |
| "learning_rate": 4.376243250923558e-05, | |
| "loss": 1.1098, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.7843137254901961, | |
| "grad_norm": 0.26642584800720215, | |
| "learning_rate": 4.3194089229894856e-05, | |
| "loss": 1.084, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.7871554418868997, | |
| "grad_norm": 0.33443573117256165, | |
| "learning_rate": 4.262574595055413e-05, | |
| "loss": 1.1113, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.7899971582836033, | |
| "grad_norm": 0.3628551661968231, | |
| "learning_rate": 4.205740267121341e-05, | |
| "loss": 1.107, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.7928388746803069, | |
| "grad_norm": 0.4214700758457184, | |
| "learning_rate": 4.1489059391872695e-05, | |
| "loss": 1.1348, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.7956805910770105, | |
| "grad_norm": 0.2711296081542969, | |
| "learning_rate": 4.092071611253197e-05, | |
| "loss": 1.0151, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.7985223074737141, | |
| "grad_norm": 0.25555798411369324, | |
| "learning_rate": 4.035237283319125e-05, | |
| "loss": 1.1388, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.8013640238704177, | |
| "grad_norm": 0.2785557806491852, | |
| "learning_rate": 3.978402955385053e-05, | |
| "loss": 1.1212, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.8042057402671213, | |
| "grad_norm": 0.2974455654621124, | |
| "learning_rate": 3.9215686274509805e-05, | |
| "loss": 1.1068, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.8070474566638249, | |
| "grad_norm": 0.2941993176937103, | |
| "learning_rate": 3.864734299516908e-05, | |
| "loss": 1.1138, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.8098891730605285, | |
| "grad_norm": 0.26110532879829407, | |
| "learning_rate": 3.807899971582836e-05, | |
| "loss": 1.1223, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.8127308894572322, | |
| "grad_norm": 0.3765209913253784, | |
| "learning_rate": 3.751065643648764e-05, | |
| "loss": 1.1082, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.8155726058539358, | |
| "grad_norm": 0.25102654099464417, | |
| "learning_rate": 3.6942313157146915e-05, | |
| "loss": 1.0929, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.8184143222506394, | |
| "grad_norm": 0.28592199087142944, | |
| "learning_rate": 3.63739698778062e-05, | |
| "loss": 1.1606, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.821256038647343, | |
| "grad_norm": 0.28945067524909973, | |
| "learning_rate": 3.580562659846548e-05, | |
| "loss": 1.0372, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.8240977550440466, | |
| "grad_norm": 0.25189608335494995, | |
| "learning_rate": 3.5237283319124754e-05, | |
| "loss": 1.1106, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.8269394714407502, | |
| "grad_norm": 0.25932416319847107, | |
| "learning_rate": 3.466894003978403e-05, | |
| "loss": 1.0905, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.8297811878374538, | |
| "grad_norm": 0.384107768535614, | |
| "learning_rate": 3.410059676044331e-05, | |
| "loss": 1.0509, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.8326229042341574, | |
| "grad_norm": 0.2776072919368744, | |
| "learning_rate": 3.353225348110259e-05, | |
| "loss": 1.0733, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.835464620630861, | |
| "grad_norm": 0.3497239351272583, | |
| "learning_rate": 3.2963910201761864e-05, | |
| "loss": 1.0315, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.8383063370275646, | |
| "grad_norm": 0.3055514693260193, | |
| "learning_rate": 3.239556692242114e-05, | |
| "loss": 1.1525, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.8411480534242682, | |
| "grad_norm": 0.34542274475097656, | |
| "learning_rate": 3.1827223643080426e-05, | |
| "loss": 1.1209, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.8439897698209718, | |
| "grad_norm": 0.28153786063194275, | |
| "learning_rate": 3.1258880363739704e-05, | |
| "loss": 1.1896, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.8468314862176755, | |
| "grad_norm": 0.3401312232017517, | |
| "learning_rate": 3.069053708439898e-05, | |
| "loss": 1.1267, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.8496732026143791, | |
| "grad_norm": 0.275859534740448, | |
| "learning_rate": 3.012219380505826e-05, | |
| "loss": 1.1269, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.8525149190110827, | |
| "grad_norm": 0.2636275589466095, | |
| "learning_rate": 2.9553850525717536e-05, | |
| "loss": 1.0916, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.8553566354077863, | |
| "grad_norm": 0.2864493727684021, | |
| "learning_rate": 2.8985507246376814e-05, | |
| "loss": 1.1135, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.8581983518044899, | |
| "grad_norm": 0.32858121395111084, | |
| "learning_rate": 2.841716396703609e-05, | |
| "loss": 1.138, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.8610400682011935, | |
| "grad_norm": 0.2940399944782257, | |
| "learning_rate": 2.7848820687695372e-05, | |
| "loss": 1.1357, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.8638817845978971, | |
| "grad_norm": 0.3530689477920532, | |
| "learning_rate": 2.728047740835465e-05, | |
| "loss": 1.0974, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.8667235009946007, | |
| "grad_norm": 0.2814668118953705, | |
| "learning_rate": 2.6712134129013927e-05, | |
| "loss": 1.1401, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.8695652173913043, | |
| "grad_norm": 0.31256726384162903, | |
| "learning_rate": 2.6143790849673204e-05, | |
| "loss": 1.0997, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.8724069337880079, | |
| "grad_norm": 0.24463021755218506, | |
| "learning_rate": 2.5575447570332482e-05, | |
| "loss": 1.044, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.8752486501847115, | |
| "grad_norm": 0.2700346112251282, | |
| "learning_rate": 2.5007104290991763e-05, | |
| "loss": 1.0897, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.8780903665814151, | |
| "grad_norm": 0.36381930112838745, | |
| "learning_rate": 2.4438761011651037e-05, | |
| "loss": 1.0861, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.8809320829781188, | |
| "grad_norm": 0.37403604388237, | |
| "learning_rate": 2.3870417732310314e-05, | |
| "loss": 1.1199, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.8837737993748224, | |
| "grad_norm": 0.3271077275276184, | |
| "learning_rate": 2.3302074452969595e-05, | |
| "loss": 1.0453, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.886615515771526, | |
| "grad_norm": 0.29940828680992126, | |
| "learning_rate": 2.2733731173628873e-05, | |
| "loss": 1.1525, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.8894572321682296, | |
| "grad_norm": 0.26956799626350403, | |
| "learning_rate": 2.216538789428815e-05, | |
| "loss": 1.1288, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.8922989485649332, | |
| "grad_norm": 0.28132757544517517, | |
| "learning_rate": 2.1597044614947428e-05, | |
| "loss": 1.137, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.8951406649616368, | |
| "grad_norm": 0.3393004834651947, | |
| "learning_rate": 2.1028701335606705e-05, | |
| "loss": 1.0603, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.8979823813583404, | |
| "grad_norm": 0.29636818170547485, | |
| "learning_rate": 2.0460358056265986e-05, | |
| "loss": 1.0911, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.900824097755044, | |
| "grad_norm": 0.30555668473243713, | |
| "learning_rate": 1.9892014776925264e-05, | |
| "loss": 1.2047, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.9036658141517476, | |
| "grad_norm": 0.31181901693344116, | |
| "learning_rate": 1.932367149758454e-05, | |
| "loss": 1.1506, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.9065075305484512, | |
| "grad_norm": 0.3718467652797699, | |
| "learning_rate": 1.875532821824382e-05, | |
| "loss": 1.1278, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.9093492469451548, | |
| "grad_norm": 0.2683012783527374, | |
| "learning_rate": 1.81869849389031e-05, | |
| "loss": 1.0927, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.9121909633418585, | |
| "grad_norm": 0.3459053933620453, | |
| "learning_rate": 1.7618641659562377e-05, | |
| "loss": 0.9968, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.9150326797385621, | |
| "grad_norm": 0.37453094124794006, | |
| "learning_rate": 1.7050298380221655e-05, | |
| "loss": 1.0649, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.9178743961352657, | |
| "grad_norm": 0.2843706011772156, | |
| "learning_rate": 1.6481955100880932e-05, | |
| "loss": 1.0884, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.9207161125319693, | |
| "grad_norm": 0.2847299575805664, | |
| "learning_rate": 1.5913611821540213e-05, | |
| "loss": 1.124, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.9235578289286729, | |
| "grad_norm": 0.2724878191947937, | |
| "learning_rate": 1.534526854219949e-05, | |
| "loss": 1.1162, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.9263995453253765, | |
| "grad_norm": 0.3032269775867462, | |
| "learning_rate": 1.4776925262858768e-05, | |
| "loss": 1.152, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.9292412617220801, | |
| "grad_norm": 0.2314031720161438, | |
| "learning_rate": 1.4208581983518046e-05, | |
| "loss": 1.083, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.9320829781187837, | |
| "grad_norm": 0.3560166656970978, | |
| "learning_rate": 1.3640238704177325e-05, | |
| "loss": 1.0657, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.9349246945154873, | |
| "grad_norm": 0.3593125343322754, | |
| "learning_rate": 1.3071895424836602e-05, | |
| "loss": 1.1199, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.9377664109121909, | |
| "grad_norm": 0.2630976438522339, | |
| "learning_rate": 1.2503552145495881e-05, | |
| "loss": 1.0206, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.9406081273088945, | |
| "grad_norm": 0.2689420282840729, | |
| "learning_rate": 1.1935208866155157e-05, | |
| "loss": 1.1464, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.9434498437055981, | |
| "grad_norm": 0.33892905712127686, | |
| "learning_rate": 1.1366865586814436e-05, | |
| "loss": 1.1148, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.9462915601023018, | |
| "grad_norm": 0.2977355420589447, | |
| "learning_rate": 1.0798522307473714e-05, | |
| "loss": 1.0744, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.9491332764990054, | |
| "grad_norm": 0.24483497440814972, | |
| "learning_rate": 1.0230179028132993e-05, | |
| "loss": 1.0821, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.951974992895709, | |
| "grad_norm": 0.31132972240448, | |
| "learning_rate": 9.66183574879227e-06, | |
| "loss": 1.1173, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.9548167092924126, | |
| "grad_norm": 0.38017576932907104, | |
| "learning_rate": 9.09349246945155e-06, | |
| "loss": 1.0849, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.9576584256891162, | |
| "grad_norm": 0.3665505051612854, | |
| "learning_rate": 8.525149190110827e-06, | |
| "loss": 1.1455, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.9605001420858198, | |
| "grad_norm": 0.29008227586746216, | |
| "learning_rate": 7.956805910770107e-06, | |
| "loss": 1.073, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.9633418584825234, | |
| "grad_norm": 0.3025209903717041, | |
| "learning_rate": 7.388462631429384e-06, | |
| "loss": 1.1217, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.966183574879227, | |
| "grad_norm": 0.28936612606048584, | |
| "learning_rate": 6.820119352088662e-06, | |
| "loss": 1.0274, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.9690252912759306, | |
| "grad_norm": 0.3080444633960724, | |
| "learning_rate": 6.251776072747941e-06, | |
| "loss": 1.1762, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.9718670076726342, | |
| "grad_norm": 0.3216501772403717, | |
| "learning_rate": 5.683432793407218e-06, | |
| "loss": 1.1382, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.9747087240693378, | |
| "grad_norm": 0.2817239761352539, | |
| "learning_rate": 5.1150895140664966e-06, | |
| "loss": 1.0852, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.9775504404660414, | |
| "grad_norm": 0.33580419421195984, | |
| "learning_rate": 4.546746234725775e-06, | |
| "loss": 1.1122, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.9803921568627451, | |
| "grad_norm": 0.27000322937965393, | |
| "learning_rate": 3.978402955385053e-06, | |
| "loss": 1.1576, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.9832338732594487, | |
| "grad_norm": 0.3857513666152954, | |
| "learning_rate": 3.410059676044331e-06, | |
| "loss": 1.0738, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.9860755896561523, | |
| "grad_norm": 0.2714364528656006, | |
| "learning_rate": 2.841716396703609e-06, | |
| "loss": 1.0978, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 0.9889173060528559, | |
| "grad_norm": 0.314001202583313, | |
| "learning_rate": 2.2733731173628875e-06, | |
| "loss": 1.0158, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.9917590224495595, | |
| "grad_norm": 0.3561016023159027, | |
| "learning_rate": 1.7050298380221656e-06, | |
| "loss": 1.1506, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 0.9946007388462631, | |
| "grad_norm": 0.33506929874420166, | |
| "learning_rate": 1.1366865586814437e-06, | |
| "loss": 1.0156, | |
| "step": 3500 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 3519, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.6906446012416e+17, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |