| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 3519, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.002841716396703609, |
| "grad_norm": 0.20229442417621613, |
| "learning_rate": 0.00019948849104859336, |
| "loss": 1.523, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.005683432793407218, |
| "grad_norm": 0.24781420826911926, |
| "learning_rate": 0.00019892014776925262, |
| "loss": 1.3539, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.008525149190110827, |
| "grad_norm": 0.21505358815193176, |
| "learning_rate": 0.0001983518044899119, |
| "loss": 1.3182, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.011366865586814436, |
| "grad_norm": 0.2120037078857422, |
| "learning_rate": 0.00019778346121057119, |
| "loss": 1.3647, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.014208581983518044, |
| "grad_norm": 0.19157598912715912, |
| "learning_rate": 0.00019721511793123047, |
| "loss": 1.4083, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.017050298380221655, |
| "grad_norm": 0.2424042969942093, |
| "learning_rate": 0.00019664677465188975, |
| "loss": 1.2882, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.019892014776925263, |
| "grad_norm": 0.25576573610305786, |
| "learning_rate": 0.000196078431372549, |
| "loss": 1.2469, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.022733731173628872, |
| "grad_norm": 0.3163057267665863, |
| "learning_rate": 0.0001955100880932083, |
| "loss": 1.3234, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.02557544757033248, |
| "grad_norm": 0.20805688202381134, |
| "learning_rate": 0.00019494174481386758, |
| "loss": 1.2203, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.02841716396703609, |
| "grad_norm": 0.26067784428596497, |
| "learning_rate": 0.00019437340153452686, |
| "loss": 1.235, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.0312588803637397, |
| "grad_norm": 0.25863921642303467, |
| "learning_rate": 0.00019380505825518612, |
| "loss": 1.2209, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.03410059676044331, |
| "grad_norm": 0.2780992090702057, |
| "learning_rate": 0.0001932367149758454, |
| "loss": 1.2806, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.03694231315714692, |
| "grad_norm": 0.2662423253059387, |
| "learning_rate": 0.0001926683716965047, |
| "loss": 1.2075, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.039784029553850526, |
| "grad_norm": 0.35407036542892456, |
| "learning_rate": 0.00019210002841716397, |
| "loss": 1.2721, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.042625745950554135, |
| "grad_norm": 0.2910842001438141, |
| "learning_rate": 0.00019153168513782326, |
| "loss": 1.2716, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.045467462347257744, |
| "grad_norm": 0.2619645893573761, |
| "learning_rate": 0.00019096334185848252, |
| "loss": 1.186, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.04830917874396135, |
| "grad_norm": 0.2996160686016083, |
| "learning_rate": 0.0001903949985791418, |
| "loss": 1.1886, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.05115089514066496, |
| "grad_norm": 0.227691650390625, |
| "learning_rate": 0.00018982665529980108, |
| "loss": 1.2613, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.05399261153736857, |
| "grad_norm": 0.2601442039012909, |
| "learning_rate": 0.00018925831202046037, |
| "loss": 1.1767, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.05683432793407218, |
| "grad_norm": 0.28762301802635193, |
| "learning_rate": 0.00018868996874111963, |
| "loss": 1.2628, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.059676044330775786, |
| "grad_norm": 0.23256859183311462, |
| "learning_rate": 0.0001881216254617789, |
| "loss": 1.1098, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.0625177607274794, |
| "grad_norm": 0.2880021333694458, |
| "learning_rate": 0.0001875532821824382, |
| "loss": 1.1575, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.06535947712418301, |
| "grad_norm": 0.25147515535354614, |
| "learning_rate": 0.00018698493890309748, |
| "loss": 1.1868, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.06820119352088662, |
| "grad_norm": 0.29358601570129395, |
| "learning_rate": 0.00018641659562375676, |
| "loss": 1.241, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.07104290991759023, |
| "grad_norm": 0.24878720939159393, |
| "learning_rate": 0.00018584825234441602, |
| "loss": 1.1583, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.07388462631429384, |
| "grad_norm": 0.23219600319862366, |
| "learning_rate": 0.0001852799090650753, |
| "loss": 1.155, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.07672634271099744, |
| "grad_norm": 0.2404685616493225, |
| "learning_rate": 0.0001847115657857346, |
| "loss": 1.1896, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.07956805910770105, |
| "grad_norm": 0.21366341412067413, |
| "learning_rate": 0.00018414322250639387, |
| "loss": 1.1522, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.08240977550440466, |
| "grad_norm": 0.30190715193748474, |
| "learning_rate": 0.00018357487922705313, |
| "loss": 1.2053, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.08525149190110827, |
| "grad_norm": 0.253252238035202, |
| "learning_rate": 0.00018300653594771241, |
| "loss": 1.1812, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.08809320829781188, |
| "grad_norm": 0.2292664349079132, |
| "learning_rate": 0.0001824381926683717, |
| "loss": 1.112, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.09093492469451549, |
| "grad_norm": 0.28798526525497437, |
| "learning_rate": 0.00018186984938903098, |
| "loss": 1.2313, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.0937766410912191, |
| "grad_norm": 0.28377199172973633, |
| "learning_rate": 0.00018130150610969027, |
| "loss": 1.108, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.0966183574879227, |
| "grad_norm": 0.25983279943466187, |
| "learning_rate": 0.00018073316283034952, |
| "loss": 1.1511, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.09946007388462631, |
| "grad_norm": 0.25927045941352844, |
| "learning_rate": 0.0001801648195510088, |
| "loss": 1.2269, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.10230179028132992, |
| "grad_norm": 0.2704865634441376, |
| "learning_rate": 0.0001795964762716681, |
| "loss": 1.1827, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.10514350667803353, |
| "grad_norm": 0.30205655097961426, |
| "learning_rate": 0.00017902813299232738, |
| "loss": 1.2028, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.10798522307473714, |
| "grad_norm": 0.3334643244743347, |
| "learning_rate": 0.00017845978971298663, |
| "loss": 1.1631, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.11082693947144075, |
| "grad_norm": 0.25340893864631653, |
| "learning_rate": 0.00017789144643364592, |
| "loss": 1.1823, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.11366865586814436, |
| "grad_norm": 0.2417430877685547, |
| "learning_rate": 0.0001773231031543052, |
| "loss": 1.1956, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.11651037226484796, |
| "grad_norm": 0.238485187292099, |
| "learning_rate": 0.0001767547598749645, |
| "loss": 1.2436, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.11935208866155157, |
| "grad_norm": 0.2162630409002304, |
| "learning_rate": 0.00017618641659562377, |
| "loss": 1.1831, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.12219380505825518, |
| "grad_norm": 0.25849658250808716, |
| "learning_rate": 0.00017561807331628303, |
| "loss": 1.2077, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.1250355214549588, |
| "grad_norm": 0.3160068392753601, |
| "learning_rate": 0.0001750497300369423, |
| "loss": 1.2443, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.1278772378516624, |
| "grad_norm": 0.25949159264564514, |
| "learning_rate": 0.0001744813867576016, |
| "loss": 1.2738, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.13071895424836602, |
| "grad_norm": 0.2856585383415222, |
| "learning_rate": 0.00017391304347826088, |
| "loss": 1.1611, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.13356067064506963, |
| "grad_norm": 0.27770936489105225, |
| "learning_rate": 0.00017334470019892014, |
| "loss": 1.1796, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.13640238704177324, |
| "grad_norm": 0.2558460831642151, |
| "learning_rate": 0.00017277635691957942, |
| "loss": 1.1277, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.13924410343847685, |
| "grad_norm": 0.23918330669403076, |
| "learning_rate": 0.0001722080136402387, |
| "loss": 1.1651, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.14208581983518045, |
| "grad_norm": 0.24087974429130554, |
| "learning_rate": 0.000171639670360898, |
| "loss": 1.2795, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.14492753623188406, |
| "grad_norm": 0.22798433899879456, |
| "learning_rate": 0.00017107132708155728, |
| "loss": 1.1636, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.14776925262858767, |
| "grad_norm": 0.25570231676101685, |
| "learning_rate": 0.00017050298380221653, |
| "loss": 1.0991, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.15061096902529128, |
| "grad_norm": 0.3161047101020813, |
| "learning_rate": 0.00016993464052287582, |
| "loss": 1.1638, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.1534526854219949, |
| "grad_norm": 0.34027767181396484, |
| "learning_rate": 0.0001693662972435351, |
| "loss": 1.1913, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.1562944018186985, |
| "grad_norm": 0.5431545972824097, |
| "learning_rate": 0.00016879795396419439, |
| "loss": 1.2059, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.1591361182154021, |
| "grad_norm": 0.3261612057685852, |
| "learning_rate": 0.00016822961068485364, |
| "loss": 1.1779, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.16197783461210571, |
| "grad_norm": 0.25675147771835327, |
| "learning_rate": 0.00016766126740551293, |
| "loss": 1.2524, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.16481955100880932, |
| "grad_norm": 0.2560219168663025, |
| "learning_rate": 0.0001670929241261722, |
| "loss": 1.0579, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.16766126740551293, |
| "grad_norm": 0.29484283924102783, |
| "learning_rate": 0.0001665245808468315, |
| "loss": 1.0802, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.17050298380221654, |
| "grad_norm": 0.22986841201782227, |
| "learning_rate": 0.00016595623756749078, |
| "loss": 1.1268, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.17334470019892015, |
| "grad_norm": 0.23908346891403198, |
| "learning_rate": 0.00016538789428815004, |
| "loss": 1.1326, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.17618641659562376, |
| "grad_norm": 0.30050793290138245, |
| "learning_rate": 0.00016481955100880932, |
| "loss": 1.0501, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.17902813299232737, |
| "grad_norm": 0.24418674409389496, |
| "learning_rate": 0.0001642512077294686, |
| "loss": 1.1718, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.18186984938903097, |
| "grad_norm": 0.4306769073009491, |
| "learning_rate": 0.0001636828644501279, |
| "loss": 1.1343, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.18471156578573458, |
| "grad_norm": 0.2366916537284851, |
| "learning_rate": 0.00016311452117078715, |
| "loss": 1.1766, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.1875532821824382, |
| "grad_norm": 0.281982958316803, |
| "learning_rate": 0.00016254617789144643, |
| "loss": 1.2057, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.1903949985791418, |
| "grad_norm": 0.2688102424144745, |
| "learning_rate": 0.00016197783461210572, |
| "loss": 1.1958, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.1932367149758454, |
| "grad_norm": 0.28181755542755127, |
| "learning_rate": 0.000161409491332765, |
| "loss": 1.1662, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.19607843137254902, |
| "grad_norm": 0.23462365567684174, |
| "learning_rate": 0.00016084114805342428, |
| "loss": 1.1193, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.19892014776925263, |
| "grad_norm": 0.27968090772628784, |
| "learning_rate": 0.00016027280477408354, |
| "loss": 1.1678, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.20176186416595623, |
| "grad_norm": 0.2571905851364136, |
| "learning_rate": 0.00015970446149474283, |
| "loss": 1.1775, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.20460358056265984, |
| "grad_norm": 0.2821357250213623, |
| "learning_rate": 0.0001591361182154021, |
| "loss": 1.1649, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.20744529695936345, |
| "grad_norm": 0.2606565058231354, |
| "learning_rate": 0.0001585677749360614, |
| "loss": 1.0562, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.21028701335606706, |
| "grad_norm": 0.27794864773750305, |
| "learning_rate": 0.00015799943165672065, |
| "loss": 1.1366, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.21312872975277067, |
| "grad_norm": 0.2107602059841156, |
| "learning_rate": 0.00015743108837737994, |
| "loss": 1.1106, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.21597044614947428, |
| "grad_norm": 0.2524462640285492, |
| "learning_rate": 0.00015686274509803922, |
| "loss": 1.1777, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.21881216254617789, |
| "grad_norm": 0.22253374755382538, |
| "learning_rate": 0.0001562944018186985, |
| "loss": 1.0856, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.2216538789428815, |
| "grad_norm": 0.2423143982887268, |
| "learning_rate": 0.0001557260585393578, |
| "loss": 1.1836, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.2244955953395851, |
| "grad_norm": 0.2595592737197876, |
| "learning_rate": 0.00015515771526001705, |
| "loss": 1.1698, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.2273373117362887, |
| "grad_norm": 0.2568744421005249, |
| "learning_rate": 0.00015458937198067633, |
| "loss": 1.1707, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.23017902813299232, |
| "grad_norm": 0.30398836731910706, |
| "learning_rate": 0.00015402102870133561, |
| "loss": 1.1547, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.23302074452969593, |
| "grad_norm": 0.300106406211853, |
| "learning_rate": 0.0001534526854219949, |
| "loss": 1.1925, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.23586246092639954, |
| "grad_norm": 0.2774117588996887, |
| "learning_rate": 0.00015288434214265416, |
| "loss": 1.1404, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.23870417732310314, |
| "grad_norm": 0.28219330310821533, |
| "learning_rate": 0.00015231599886331344, |
| "loss": 1.122, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.24154589371980675, |
| "grad_norm": 0.3004832863807678, |
| "learning_rate": 0.00015174765558397272, |
| "loss": 1.2315, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.24438761011651036, |
| "grad_norm": 0.24674588441848755, |
| "learning_rate": 0.000151179312304632, |
| "loss": 1.1663, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.24722932651321397, |
| "grad_norm": 0.21483612060546875, |
| "learning_rate": 0.0001506109690252913, |
| "loss": 1.173, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.2500710429099176, |
| "grad_norm": 0.3079366683959961, |
| "learning_rate": 0.00015004262574595055, |
| "loss": 1.1695, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.2529127593066212, |
| "grad_norm": 0.2626102566719055, |
| "learning_rate": 0.00014947428246660983, |
| "loss": 1.1552, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.2557544757033248, |
| "grad_norm": 0.3606242537498474, |
| "learning_rate": 0.00014890593918726912, |
| "loss": 1.0921, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.25859619210002843, |
| "grad_norm": 0.3001089096069336, |
| "learning_rate": 0.0001483375959079284, |
| "loss": 1.1911, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.26143790849673204, |
| "grad_norm": 0.2784072458744049, |
| "learning_rate": 0.00014776925262858766, |
| "loss": 1.1979, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.26427962489343565, |
| "grad_norm": 0.21513643860816956, |
| "learning_rate": 0.00014720090934924694, |
| "loss": 1.0723, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.26712134129013926, |
| "grad_norm": 0.253627210855484, |
| "learning_rate": 0.00014663256606990623, |
| "loss": 1.1883, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.26996305768684287, |
| "grad_norm": 0.2411937564611435, |
| "learning_rate": 0.0001460642227905655, |
| "loss": 1.1786, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.2728047740835465, |
| "grad_norm": 0.2327292114496231, |
| "learning_rate": 0.0001454958795112248, |
| "loss": 1.2151, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.2756464904802501, |
| "grad_norm": 0.26323702931404114, |
| "learning_rate": 0.00014492753623188405, |
| "loss": 1.1002, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.2784882068769537, |
| "grad_norm": 0.2239420861005783, |
| "learning_rate": 0.00014435919295254334, |
| "loss": 1.0453, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.2813299232736573, |
| "grad_norm": 0.2786525785923004, |
| "learning_rate": 0.00014379084967320262, |
| "loss": 1.2195, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.2841716396703609, |
| "grad_norm": 0.23764382302761078, |
| "learning_rate": 0.0001432225063938619, |
| "loss": 1.1263, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2870133560670645, |
| "grad_norm": 0.2542721927165985, |
| "learning_rate": 0.00014265416311452116, |
| "loss": 1.1603, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.2898550724637681, |
| "grad_norm": 0.26884064078330994, |
| "learning_rate": 0.00014208581983518045, |
| "loss": 1.1759, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.29269678886047173, |
| "grad_norm": 0.2652948498725891, |
| "learning_rate": 0.00014151747655583973, |
| "loss": 1.0826, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.29553850525717534, |
| "grad_norm": 0.3024510443210602, |
| "learning_rate": 0.00014094913327649902, |
| "loss": 1.1451, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.29838022165387895, |
| "grad_norm": 0.22471967339515686, |
| "learning_rate": 0.0001403807899971583, |
| "loss": 1.1412, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.30122193805058256, |
| "grad_norm": 0.32953527569770813, |
| "learning_rate": 0.00013981244671781756, |
| "loss": 1.1233, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.30406365444728617, |
| "grad_norm": 0.3743230700492859, |
| "learning_rate": 0.00013924410343847684, |
| "loss": 1.1403, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.3069053708439898, |
| "grad_norm": 0.36825114488601685, |
| "learning_rate": 0.00013867576015913613, |
| "loss": 1.2296, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.3097470872406934, |
| "grad_norm": 0.3129768669605255, |
| "learning_rate": 0.0001381074168797954, |
| "loss": 1.1697, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.312588803637397, |
| "grad_norm": 0.31028246879577637, |
| "learning_rate": 0.00013753907360045467, |
| "loss": 1.096, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.3154305200341006, |
| "grad_norm": 0.2524968981742859, |
| "learning_rate": 0.00013697073032111395, |
| "loss": 1.1114, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.3182722364308042, |
| "grad_norm": 0.23146887123584747, |
| "learning_rate": 0.00013640238704177324, |
| "loss": 1.0634, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.3211139528275078, |
| "grad_norm": 0.26541128754615784, |
| "learning_rate": 0.00013583404376243252, |
| "loss": 1.0631, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.32395566922421143, |
| "grad_norm": 0.24775725603103638, |
| "learning_rate": 0.0001352657004830918, |
| "loss": 1.1456, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.32679738562091504, |
| "grad_norm": 0.25310489535331726, |
| "learning_rate": 0.00013469735720375106, |
| "loss": 1.2405, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.32963910201761865, |
| "grad_norm": 0.2598433792591095, |
| "learning_rate": 0.00013412901392441035, |
| "loss": 1.2455, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.33248081841432225, |
| "grad_norm": 0.2735394835472107, |
| "learning_rate": 0.00013356067064506963, |
| "loss": 1.1014, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.33532253481102586, |
| "grad_norm": 0.26913198828697205, |
| "learning_rate": 0.00013299232736572892, |
| "loss": 1.1476, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.33816425120772947, |
| "grad_norm": 0.22991891205310822, |
| "learning_rate": 0.00013242398408638817, |
| "loss": 1.0964, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.3410059676044331, |
| "grad_norm": 0.2543002963066101, |
| "learning_rate": 0.00013185564080704746, |
| "loss": 1.1409, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.3438476840011367, |
| "grad_norm": 0.2660631537437439, |
| "learning_rate": 0.00013128729752770674, |
| "loss": 1.0375, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.3466894003978403, |
| "grad_norm": 0.25068119168281555, |
| "learning_rate": 0.00013071895424836603, |
| "loss": 1.1708, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.3495311167945439, |
| "grad_norm": 0.27296605706214905, |
| "learning_rate": 0.0001301506109690253, |
| "loss": 1.1449, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.3523728331912475, |
| "grad_norm": 0.23561522364616394, |
| "learning_rate": 0.00012958226768968457, |
| "loss": 1.1954, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.3552145495879511, |
| "grad_norm": 0.2912009358406067, |
| "learning_rate": 0.00012901392441034385, |
| "loss": 1.1254, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.35805626598465473, |
| "grad_norm": 0.24392102658748627, |
| "learning_rate": 0.00012844558113100314, |
| "loss": 1.1263, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.36089798238135834, |
| "grad_norm": 0.30842769145965576, |
| "learning_rate": 0.00012787723785166242, |
| "loss": 1.0731, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.36373969877806195, |
| "grad_norm": 0.2747196853160858, |
| "learning_rate": 0.0001273088945723217, |
| "loss": 1.1913, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.36658141517476556, |
| "grad_norm": 0.31838688254356384, |
| "learning_rate": 0.00012674055129298096, |
| "loss": 1.222, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.36942313157146917, |
| "grad_norm": 0.3127056062221527, |
| "learning_rate": 0.00012617220801364025, |
| "loss": 1.0872, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.3722648479681728, |
| "grad_norm": 0.2799491882324219, |
| "learning_rate": 0.00012560386473429953, |
| "loss": 1.1277, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.3751065643648764, |
| "grad_norm": 0.2875117063522339, |
| "learning_rate": 0.00012503552145495881, |
| "loss": 1.153, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.37794828076158, |
| "grad_norm": 0.3062177300453186, |
| "learning_rate": 0.00012446717817561807, |
| "loss": 1.1142, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.3807899971582836, |
| "grad_norm": 0.23725247383117676, |
| "learning_rate": 0.00012389883489627736, |
| "loss": 1.0559, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.3836317135549872, |
| "grad_norm": 0.28051912784576416, |
| "learning_rate": 0.00012333049161693664, |
| "loss": 1.0642, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.3864734299516908, |
| "grad_norm": 0.2455301284790039, |
| "learning_rate": 0.00012276214833759592, |
| "loss": 1.175, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.3893151463483944, |
| "grad_norm": 0.26085713505744934, |
| "learning_rate": 0.0001221938050582552, |
| "loss": 1.1368, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.39215686274509803, |
| "grad_norm": 0.4084455370903015, |
| "learning_rate": 0.00012162546177891448, |
| "loss": 1.1346, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.39499857914180164, |
| "grad_norm": 0.28224310278892517, |
| "learning_rate": 0.00012105711849957375, |
| "loss": 1.0693, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.39784029553850525, |
| "grad_norm": 0.3237653970718384, |
| "learning_rate": 0.00012048877522023303, |
| "loss": 1.0585, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.40068201193520886, |
| "grad_norm": 0.26249366998672485, |
| "learning_rate": 0.0001199204319408923, |
| "loss": 1.1227, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.40352372833191247, |
| "grad_norm": 0.29983624815940857, |
| "learning_rate": 0.00011935208866155159, |
| "loss": 1.1473, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.4063654447286161, |
| "grad_norm": 0.26302003860473633, |
| "learning_rate": 0.00011878374538221086, |
| "loss": 1.2398, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.4092071611253197, |
| "grad_norm": 0.2707955539226532, |
| "learning_rate": 0.00011821540210287014, |
| "loss": 1.1299, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.4120488775220233, |
| "grad_norm": 0.47745946049690247, |
| "learning_rate": 0.00011764705882352942, |
| "loss": 1.138, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.4148905939187269, |
| "grad_norm": 0.24607343971729279, |
| "learning_rate": 0.0001170787155441887, |
| "loss": 1.1444, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.4177323103154305, |
| "grad_norm": 0.2819903492927551, |
| "learning_rate": 0.00011651037226484798, |
| "loss": 1.0861, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.4205740267121341, |
| "grad_norm": 0.2718110978603363, |
| "learning_rate": 0.00011594202898550725, |
| "loss": 1.1225, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.4234157431088377, |
| "grad_norm": 0.2966226041316986, |
| "learning_rate": 0.00011537368570616654, |
| "loss": 1.1901, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.42625745950554134, |
| "grad_norm": 0.38320621848106384, |
| "learning_rate": 0.00011480534242682581, |
| "loss": 1.134, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.42909917590224494, |
| "grad_norm": 0.2895069718360901, |
| "learning_rate": 0.0001142369991474851, |
| "loss": 1.0505, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.43194089229894855, |
| "grad_norm": 0.32522544264793396, |
| "learning_rate": 0.00011366865586814436, |
| "loss": 1.119, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.43478260869565216, |
| "grad_norm": 0.3680785298347473, |
| "learning_rate": 0.00011310031258880365, |
| "loss": 1.154, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.43762432509235577, |
| "grad_norm": 0.24093805253505707, |
| "learning_rate": 0.00011253196930946292, |
| "loss": 1.1294, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.4404660414890594, |
| "grad_norm": 0.3424024283885956, |
| "learning_rate": 0.0001119636260301222, |
| "loss": 1.1875, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.443307757885763, |
| "grad_norm": 0.24656616151332855, |
| "learning_rate": 0.00011139528275078149, |
| "loss": 1.111, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.4461494742824666, |
| "grad_norm": 0.289628803730011, |
| "learning_rate": 0.00011082693947144076, |
| "loss": 1.1301, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.4489911906791702, |
| "grad_norm": 0.33433884382247925, |
| "learning_rate": 0.00011025859619210004, |
| "loss": 1.1408, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.4518329070758738, |
| "grad_norm": 0.32477903366088867, |
| "learning_rate": 0.00010969025291275931, |
| "loss": 1.1735, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.4546746234725774, |
| "grad_norm": 0.2586929202079773, |
| "learning_rate": 0.0001091219096334186, |
| "loss": 1.1209, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.45751633986928103, |
| "grad_norm": 0.3958762586116791, |
| "learning_rate": 0.00010855356635407787, |
| "loss": 1.1538, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.46035805626598464, |
| "grad_norm": 0.2704383134841919, |
| "learning_rate": 0.00010798522307473715, |
| "loss": 1.1352, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.46319977266268825, |
| "grad_norm": 0.3805047869682312, |
| "learning_rate": 0.00010741687979539642, |
| "loss": 1.0875, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.46604148905939186, |
| "grad_norm": 0.2563640773296356, |
| "learning_rate": 0.00010684853651605571, |
| "loss": 1.0743, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.46888320545609546, |
| "grad_norm": 0.2729780972003937, |
| "learning_rate": 0.00010628019323671499, |
| "loss": 1.082, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.4717249218527991, |
| "grad_norm": 0.31282857060432434, |
| "learning_rate": 0.00010571184995737426, |
| "loss": 1.1308, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.4745666382495027, |
| "grad_norm": 0.30954697728157043, |
| "learning_rate": 0.00010514350667803355, |
| "loss": 1.1608, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.4774083546462063, |
| "grad_norm": 0.2565356194972992, |
| "learning_rate": 0.00010457516339869282, |
| "loss": 1.0761, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.4802500710429099, |
| "grad_norm": 0.25005489587783813, |
| "learning_rate": 0.0001040068201193521, |
| "loss": 1.129, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.4830917874396135, |
| "grad_norm": 0.2812393605709076, |
| "learning_rate": 0.00010343847684001137, |
| "loss": 1.1722, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.4859335038363171, |
| "grad_norm": 0.2592753469944, |
| "learning_rate": 0.00010287013356067066, |
| "loss": 1.0974, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.4887752202330207, |
| "grad_norm": 0.4299195408821106, |
| "learning_rate": 0.00010230179028132993, |
| "loss": 1.1636, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.49161693662972433, |
| "grad_norm": 0.2507430613040924, |
| "learning_rate": 0.00010173344700198921, |
| "loss": 1.1259, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.49445865302642794, |
| "grad_norm": 0.25915494561195374, |
| "learning_rate": 0.0001011651037226485, |
| "loss": 1.1513, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.49730036942313155, |
| "grad_norm": 0.4046742022037506, |
| "learning_rate": 0.00010059676044330777, |
| "loss": 1.1431, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.5001420858198352, |
| "grad_norm": 0.269300639629364, |
| "learning_rate": 0.00010002841716396705, |
| "loss": 1.1176, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.5029838022165388, |
| "grad_norm": 0.27633029222488403, |
| "learning_rate": 9.946007388462631e-05, |
| "loss": 1.1394, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.5058255186132424, |
| "grad_norm": 0.2624037563800812, |
| "learning_rate": 9.889173060528559e-05, |
| "loss": 1.1789, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.508667235009946, |
| "grad_norm": 0.2688696086406708, |
| "learning_rate": 9.832338732594488e-05, |
| "loss": 1.0863, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.5115089514066496, |
| "grad_norm": 0.41462844610214233, |
| "learning_rate": 9.775504404660415e-05, |
| "loss": 1.1191, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.5143506678033533, |
| "grad_norm": 0.26823553442955017, |
| "learning_rate": 9.718670076726343e-05, |
| "loss": 1.0279, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.5171923842000569, |
| "grad_norm": 0.37709948420524597, |
| "learning_rate": 9.66183574879227e-05, |
| "loss": 1.1478, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.5200341005967605, |
| "grad_norm": 0.3214150369167328, |
| "learning_rate": 9.605001420858199e-05, |
| "loss": 1.0844, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.5228758169934641, |
| "grad_norm": 0.27517786622047424, |
| "learning_rate": 9.548167092924126e-05, |
| "loss": 1.1664, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.5257175333901677, |
| "grad_norm": 0.2494751513004303, |
| "learning_rate": 9.491332764990054e-05, |
| "loss": 1.1159, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.5285592497868713, |
| "grad_norm": 0.3077758252620697, |
| "learning_rate": 9.434498437055981e-05, |
| "loss": 1.1658, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.5314009661835749, |
| "grad_norm": 0.34368380904197693, |
| "learning_rate": 9.37766410912191e-05, |
| "loss": 1.1377, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.5342426825802785, |
| "grad_norm": 0.2457958608865738, |
| "learning_rate": 9.320829781187838e-05, |
| "loss": 1.1323, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.5370843989769821, |
| "grad_norm": 0.2676566243171692, |
| "learning_rate": 9.263995453253765e-05, |
| "loss": 1.1968, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.5399261153736857, |
| "grad_norm": 0.23688159883022308, |
| "learning_rate": 9.207161125319694e-05, |
| "loss": 1.093, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.5427678317703893, |
| "grad_norm": 0.3742455542087555, |
| "learning_rate": 9.150326797385621e-05, |
| "loss": 1.1396, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.545609548167093, |
| "grad_norm": 0.31831708550453186, |
| "learning_rate": 9.093492469451549e-05, |
| "loss": 1.169, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.5484512645637966, |
| "grad_norm": 0.2743741273880005, |
| "learning_rate": 9.036658141517476e-05, |
| "loss": 1.0958, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.5512929809605002, |
| "grad_norm": 0.3475467562675476, |
| "learning_rate": 8.979823813583405e-05, |
| "loss": 1.0955, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.5541346973572038, |
| "grad_norm": 0.245005264878273, |
| "learning_rate": 8.922989485649332e-05, |
| "loss": 1.1146, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.5569764137539074, |
| "grad_norm": 0.2580576241016388, |
| "learning_rate": 8.86615515771526e-05, |
| "loss": 1.1223, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.559818130150611, |
| "grad_norm": 0.3004942536354065, |
| "learning_rate": 8.809320829781189e-05, |
| "loss": 1.0554, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.5626598465473146, |
| "grad_norm": 0.27015358209609985, |
| "learning_rate": 8.752486501847116e-05, |
| "loss": 1.1463, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.5655015629440182, |
| "grad_norm": 0.26687344908714294, |
| "learning_rate": 8.695652173913044e-05, |
| "loss": 1.1709, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.5683432793407218, |
| "grad_norm": 0.2721405625343323, |
| "learning_rate": 8.638817845978971e-05, |
| "loss": 1.1327, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.5711849957374254, |
| "grad_norm": 0.287565678358078, |
| "learning_rate": 8.5819835180449e-05, |
| "loss": 1.1123, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.574026712134129, |
| "grad_norm": 0.3454604148864746, |
| "learning_rate": 8.525149190110827e-05, |
| "loss": 1.1508, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.5768684285308326, |
| "grad_norm": 0.26024872064590454, |
| "learning_rate": 8.468314862176755e-05, |
| "loss": 1.0406, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.5797101449275363, |
| "grad_norm": 0.2535635828971863, |
| "learning_rate": 8.411480534242682e-05, |
| "loss": 1.1191, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.5825518613242399, |
| "grad_norm": 0.334521621465683, |
| "learning_rate": 8.35464620630861e-05, |
| "loss": 1.1982, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.5853935777209435, |
| "grad_norm": 0.27677032351493835, |
| "learning_rate": 8.297811878374539e-05, |
| "loss": 1.0914, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.5882352941176471, |
| "grad_norm": 0.360412061214447, |
| "learning_rate": 8.240977550440466e-05, |
| "loss": 1.1377, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.5910770105143507, |
| "grad_norm": 0.28759074211120605, |
| "learning_rate": 8.184143222506395e-05, |
| "loss": 1.053, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.5939187269110543, |
| "grad_norm": 0.32831844687461853, |
| "learning_rate": 8.127308894572322e-05, |
| "loss": 1.1326, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.5967604433077579, |
| "grad_norm": 0.25032633543014526, |
| "learning_rate": 8.07047456663825e-05, |
| "loss": 1.1457, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.5996021597044615, |
| "grad_norm": 0.28084343671798706, |
| "learning_rate": 8.013640238704177e-05, |
| "loss": 1.1014, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.6024438761011651, |
| "grad_norm": 0.26768869161605835, |
| "learning_rate": 7.956805910770106e-05, |
| "loss": 1.1442, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.6052855924978687, |
| "grad_norm": 0.35225123167037964, |
| "learning_rate": 7.899971582836033e-05, |
| "loss": 1.1275, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.6081273088945723, |
| "grad_norm": 0.25125908851623535, |
| "learning_rate": 7.843137254901961e-05, |
| "loss": 1.1318, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.610969025291276, |
| "grad_norm": 0.2658576965332031, |
| "learning_rate": 7.78630292696789e-05, |
| "loss": 1.127, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.6138107416879796, |
| "grad_norm": 0.27074316143989563, |
| "learning_rate": 7.729468599033817e-05, |
| "loss": 1.1609, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.6166524580846832, |
| "grad_norm": 0.4368594288825989, |
| "learning_rate": 7.672634271099745e-05, |
| "loss": 1.1488, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.6194941744813868, |
| "grad_norm": 0.3108392059803009, |
| "learning_rate": 7.615799943165672e-05, |
| "loss": 1.1158, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.6223358908780904, |
| "grad_norm": 0.3383192718029022, |
| "learning_rate": 7.5589656152316e-05, |
| "loss": 1.123, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.625177607274794, |
| "grad_norm": 0.35067611932754517, |
| "learning_rate": 7.502131287297528e-05, |
| "loss": 1.1087, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.6280193236714976, |
| "grad_norm": 0.28962525725364685, |
| "learning_rate": 7.445296959363456e-05, |
| "loss": 1.0414, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.6308610400682012, |
| "grad_norm": 0.2729092538356781, |
| "learning_rate": 7.388462631429383e-05, |
| "loss": 1.1024, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.6337027564649048, |
| "grad_norm": 0.26230186223983765, |
| "learning_rate": 7.331628303495311e-05, |
| "loss": 1.1446, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.6365444728616084, |
| "grad_norm": 0.30937954783439636, |
| "learning_rate": 7.27479397556124e-05, |
| "loss": 1.1018, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.639386189258312, |
| "grad_norm": 0.2578141987323761, |
| "learning_rate": 7.217959647627167e-05, |
| "loss": 1.124, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.6422279056550156, |
| "grad_norm": 0.2398582547903061, |
| "learning_rate": 7.161125319693095e-05, |
| "loss": 1.0647, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.6450696220517192, |
| "grad_norm": 0.2563438415527344, |
| "learning_rate": 7.104290991759022e-05, |
| "loss": 1.1878, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.6479113384484229, |
| "grad_norm": 0.3550574779510498, |
| "learning_rate": 7.047456663824951e-05, |
| "loss": 1.051, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.6507530548451265, |
| "grad_norm": 0.2969961166381836, |
| "learning_rate": 6.990622335890878e-05, |
| "loss": 1.1507, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.6535947712418301, |
| "grad_norm": 0.2654373049736023, |
| "learning_rate": 6.933788007956806e-05, |
| "loss": 1.0709, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.6564364876385337, |
| "grad_norm": 0.2643349766731262, |
| "learning_rate": 6.876953680022733e-05, |
| "loss": 1.0412, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.6592782040352373, |
| "grad_norm": 0.24034832417964935, |
| "learning_rate": 6.820119352088662e-05, |
| "loss": 1.1291, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.6621199204319409, |
| "grad_norm": 0.23572514951229095, |
| "learning_rate": 6.76328502415459e-05, |
| "loss": 1.0813, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.6649616368286445, |
| "grad_norm": 0.24992486834526062, |
| "learning_rate": 6.706450696220517e-05, |
| "loss": 1.0856, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.6678033532253481, |
| "grad_norm": 0.319242924451828, |
| "learning_rate": 6.649616368286446e-05, |
| "loss": 1.0326, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.6706450696220517, |
| "grad_norm": 0.2844800353050232, |
| "learning_rate": 6.592782040352373e-05, |
| "loss": 1.0593, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.6734867860187553, |
| "grad_norm": 0.3302006423473358, |
| "learning_rate": 6.535947712418301e-05, |
| "loss": 1.0814, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.6763285024154589, |
| "grad_norm": 0.25190767645835876, |
| "learning_rate": 6.479113384484228e-05, |
| "loss": 1.1088, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.6791702188121626, |
| "grad_norm": 0.35067909955978394, |
| "learning_rate": 6.422279056550157e-05, |
| "loss": 1.0752, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.6820119352088662, |
| "grad_norm": 0.3033742904663086, |
| "learning_rate": 6.365444728616085e-05, |
| "loss": 1.0715, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.6848536516055698, |
| "grad_norm": 0.2741170823574066, |
| "learning_rate": 6.308610400682012e-05, |
| "loss": 1.1067, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.6876953680022734, |
| "grad_norm": 0.32321301102638245, |
| "learning_rate": 6.251776072747941e-05, |
| "loss": 1.124, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.690537084398977, |
| "grad_norm": 0.3299727737903595, |
| "learning_rate": 6.194941744813868e-05, |
| "loss": 1.0749, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.6933788007956806, |
| "grad_norm": 0.2597588002681732, |
| "learning_rate": 6.138107416879796e-05, |
| "loss": 1.056, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.6962205171923842, |
| "grad_norm": 0.23553162813186646, |
| "learning_rate": 6.081273088945724e-05, |
| "loss": 1.0891, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.6990622335890878, |
| "grad_norm": 0.2980464696884155, |
| "learning_rate": 6.024438761011652e-05, |
| "loss": 1.1324, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.7019039499857914, |
| "grad_norm": 0.2644715905189514, |
| "learning_rate": 5.9676044330775795e-05, |
| "loss": 1.0958, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.704745666382495, |
| "grad_norm": 0.3175826668739319, |
| "learning_rate": 5.910770105143507e-05, |
| "loss": 1.1356, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.7075873827791986, |
| "grad_norm": 0.2976154685020447, |
| "learning_rate": 5.853935777209435e-05, |
| "loss": 1.0735, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.7104290991759022, |
| "grad_norm": 0.31366729736328125, |
| "learning_rate": 5.797101449275363e-05, |
| "loss": 1.1751, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.7132708155726059, |
| "grad_norm": 0.31203290820121765, |
| "learning_rate": 5.7402671213412905e-05, |
| "loss": 1.1212, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.7161125319693095, |
| "grad_norm": 0.2864065170288086, |
| "learning_rate": 5.683432793407218e-05, |
| "loss": 1.146, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.7189542483660131, |
| "grad_norm": 0.2840626835823059, |
| "learning_rate": 5.626598465473146e-05, |
| "loss": 1.0261, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.7217959647627167, |
| "grad_norm": 0.3476808965206146, |
| "learning_rate": 5.5697641375390744e-05, |
| "loss": 1.1344, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.7246376811594203, |
| "grad_norm": 0.3733687996864319, |
| "learning_rate": 5.512929809605002e-05, |
| "loss": 1.1298, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.7274793975561239, |
| "grad_norm": 0.30094149708747864, |
| "learning_rate": 5.45609548167093e-05, |
| "loss": 1.1255, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.7303211139528275, |
| "grad_norm": 0.32213765382766724, |
| "learning_rate": 5.3992611537368576e-05, |
| "loss": 1.1319, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.7331628303495311, |
| "grad_norm": 0.2626177668571472, |
| "learning_rate": 5.3424268258027854e-05, |
| "loss": 1.141, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.7360045467462347, |
| "grad_norm": 0.30769652128219604, |
| "learning_rate": 5.285592497868713e-05, |
| "loss": 1.0559, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.7388462631429383, |
| "grad_norm": 0.24354040622711182, |
| "learning_rate": 5.228758169934641e-05, |
| "loss": 1.1385, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.7416879795396419, |
| "grad_norm": 0.24648752808570862, |
| "learning_rate": 5.1719238420005686e-05, |
| "loss": 1.1231, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.7445296959363455, |
| "grad_norm": 0.30956050753593445, |
| "learning_rate": 5.1150895140664964e-05, |
| "loss": 1.104, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.7473714123330492, |
| "grad_norm": 0.27976328134536743, |
| "learning_rate": 5.058255186132425e-05, |
| "loss": 1.1321, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.7502131287297528, |
| "grad_norm": 0.2535441517829895, |
| "learning_rate": 5.0014208581983526e-05, |
| "loss": 1.1174, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.7530548451264564, |
| "grad_norm": 0.3341921865940094, |
| "learning_rate": 4.9445865302642796e-05, |
| "loss": 1.1038, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.75589656152316, |
| "grad_norm": 0.4902798533439636, |
| "learning_rate": 4.8877522023302074e-05, |
| "loss": 1.1245, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.7587382779198636, |
| "grad_norm": 0.2470143884420395, |
| "learning_rate": 4.830917874396135e-05, |
| "loss": 1.1574, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.7615799943165672, |
| "grad_norm": 0.2521553635597229, |
| "learning_rate": 4.774083546462063e-05, |
| "loss": 1.1444, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.7644217107132708, |
| "grad_norm": 0.3512313961982727, |
| "learning_rate": 4.7172492185279906e-05, |
| "loss": 1.1339, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.7672634271099744, |
| "grad_norm": 0.25479793548583984, |
| "learning_rate": 4.660414890593919e-05, |
| "loss": 1.1517, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.770105143506678, |
| "grad_norm": 0.2850602865219116, |
| "learning_rate": 4.603580562659847e-05, |
| "loss": 1.1212, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.7729468599033816, |
| "grad_norm": 0.3531084358692169, |
| "learning_rate": 4.5467462347257746e-05, |
| "loss": 1.0433, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.7757885763000852, |
| "grad_norm": 0.2699624001979828, |
| "learning_rate": 4.489911906791702e-05, |
| "loss": 1.0769, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.7786302926967889, |
| "grad_norm": 0.3828187584877014, |
| "learning_rate": 4.43307757885763e-05, |
| "loss": 1.0826, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.7814720090934925, |
| "grad_norm": 0.36253124475479126, |
| "learning_rate": 4.376243250923558e-05, |
| "loss": 1.1098, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.7843137254901961, |
| "grad_norm": 0.26642584800720215, |
| "learning_rate": 4.3194089229894856e-05, |
| "loss": 1.084, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.7871554418868997, |
| "grad_norm": 0.33443573117256165, |
| "learning_rate": 4.262574595055413e-05, |
| "loss": 1.1113, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.7899971582836033, |
| "grad_norm": 0.3628551661968231, |
| "learning_rate": 4.205740267121341e-05, |
| "loss": 1.107, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.7928388746803069, |
| "grad_norm": 0.4214700758457184, |
| "learning_rate": 4.1489059391872695e-05, |
| "loss": 1.1348, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.7956805910770105, |
| "grad_norm": 0.2711296081542969, |
| "learning_rate": 4.092071611253197e-05, |
| "loss": 1.0151, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.7985223074737141, |
| "grad_norm": 0.25555798411369324, |
| "learning_rate": 4.035237283319125e-05, |
| "loss": 1.1388, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.8013640238704177, |
| "grad_norm": 0.2785557806491852, |
| "learning_rate": 3.978402955385053e-05, |
| "loss": 1.1212, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.8042057402671213, |
| "grad_norm": 0.2974455654621124, |
| "learning_rate": 3.9215686274509805e-05, |
| "loss": 1.1068, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.8070474566638249, |
| "grad_norm": 0.2941993176937103, |
| "learning_rate": 3.864734299516908e-05, |
| "loss": 1.1138, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.8098891730605285, |
| "grad_norm": 0.26110532879829407, |
| "learning_rate": 3.807899971582836e-05, |
| "loss": 1.1223, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.8127308894572322, |
| "grad_norm": 0.3765209913253784, |
| "learning_rate": 3.751065643648764e-05, |
| "loss": 1.1082, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.8155726058539358, |
| "grad_norm": 0.25102654099464417, |
| "learning_rate": 3.6942313157146915e-05, |
| "loss": 1.0929, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.8184143222506394, |
| "grad_norm": 0.28592199087142944, |
| "learning_rate": 3.63739698778062e-05, |
| "loss": 1.1606, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.821256038647343, |
| "grad_norm": 0.28945067524909973, |
| "learning_rate": 3.580562659846548e-05, |
| "loss": 1.0372, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.8240977550440466, |
| "grad_norm": 0.25189608335494995, |
| "learning_rate": 3.5237283319124754e-05, |
| "loss": 1.1106, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.8269394714407502, |
| "grad_norm": 0.25932416319847107, |
| "learning_rate": 3.466894003978403e-05, |
| "loss": 1.0905, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.8297811878374538, |
| "grad_norm": 0.384107768535614, |
| "learning_rate": 3.410059676044331e-05, |
| "loss": 1.0509, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.8326229042341574, |
| "grad_norm": 0.2776072919368744, |
| "learning_rate": 3.353225348110259e-05, |
| "loss": 1.0733, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.835464620630861, |
| "grad_norm": 0.3497239351272583, |
| "learning_rate": 3.2963910201761864e-05, |
| "loss": 1.0315, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.8383063370275646, |
| "grad_norm": 0.3055514693260193, |
| "learning_rate": 3.239556692242114e-05, |
| "loss": 1.1525, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.8411480534242682, |
| "grad_norm": 0.34542274475097656, |
| "learning_rate": 3.1827223643080426e-05, |
| "loss": 1.1209, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.8439897698209718, |
| "grad_norm": 0.28153786063194275, |
| "learning_rate": 3.1258880363739704e-05, |
| "loss": 1.1896, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.8468314862176755, |
| "grad_norm": 0.3401312232017517, |
| "learning_rate": 3.069053708439898e-05, |
| "loss": 1.1267, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.8496732026143791, |
| "grad_norm": 0.275859534740448, |
| "learning_rate": 3.012219380505826e-05, |
| "loss": 1.1269, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.8525149190110827, |
| "grad_norm": 0.2636275589466095, |
| "learning_rate": 2.9553850525717536e-05, |
| "loss": 1.0916, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.8553566354077863, |
| "grad_norm": 0.2864493727684021, |
| "learning_rate": 2.8985507246376814e-05, |
| "loss": 1.1135, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.8581983518044899, |
| "grad_norm": 0.32858121395111084, |
| "learning_rate": 2.841716396703609e-05, |
| "loss": 1.138, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.8610400682011935, |
| "grad_norm": 0.2940399944782257, |
| "learning_rate": 2.7848820687695372e-05, |
| "loss": 1.1357, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.8638817845978971, |
| "grad_norm": 0.3530689477920532, |
| "learning_rate": 2.728047740835465e-05, |
| "loss": 1.0974, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.8667235009946007, |
| "grad_norm": 0.2814668118953705, |
| "learning_rate": 2.6712134129013927e-05, |
| "loss": 1.1401, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.8695652173913043, |
| "grad_norm": 0.31256726384162903, |
| "learning_rate": 2.6143790849673204e-05, |
| "loss": 1.0997, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.8724069337880079, |
| "grad_norm": 0.24463021755218506, |
| "learning_rate": 2.5575447570332482e-05, |
| "loss": 1.044, |
| "step": 3070 |
| }, |
| { |
| "epoch": 0.8752486501847115, |
| "grad_norm": 0.2700346112251282, |
| "learning_rate": 2.5007104290991763e-05, |
| "loss": 1.0897, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.8780903665814151, |
| "grad_norm": 0.36381930112838745, |
| "learning_rate": 2.4438761011651037e-05, |
| "loss": 1.0861, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.8809320829781188, |
| "grad_norm": 0.37403604388237, |
| "learning_rate": 2.3870417732310314e-05, |
| "loss": 1.1199, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.8837737993748224, |
| "grad_norm": 0.3271077275276184, |
| "learning_rate": 2.3302074452969595e-05, |
| "loss": 1.0453, |
| "step": 3110 |
| }, |
| { |
| "epoch": 0.886615515771526, |
| "grad_norm": 0.29940828680992126, |
| "learning_rate": 2.2733731173628873e-05, |
| "loss": 1.1525, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.8894572321682296, |
| "grad_norm": 0.26956799626350403, |
| "learning_rate": 2.216538789428815e-05, |
| "loss": 1.1288, |
| "step": 3130 |
| }, |
| { |
| "epoch": 0.8922989485649332, |
| "grad_norm": 0.28132757544517517, |
| "learning_rate": 2.1597044614947428e-05, |
| "loss": 1.137, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.8951406649616368, |
| "grad_norm": 0.3393004834651947, |
| "learning_rate": 2.1028701335606705e-05, |
| "loss": 1.0603, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.8979823813583404, |
| "grad_norm": 0.29636818170547485, |
| "learning_rate": 2.0460358056265986e-05, |
| "loss": 1.0911, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.900824097755044, |
| "grad_norm": 0.30555668473243713, |
| "learning_rate": 1.9892014776925264e-05, |
| "loss": 1.2047, |
| "step": 3170 |
| }, |
| { |
| "epoch": 0.9036658141517476, |
| "grad_norm": 0.31181901693344116, |
| "learning_rate": 1.932367149758454e-05, |
| "loss": 1.1506, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.9065075305484512, |
| "grad_norm": 0.3718467652797699, |
| "learning_rate": 1.875532821824382e-05, |
| "loss": 1.1278, |
| "step": 3190 |
| }, |
| { |
| "epoch": 0.9093492469451548, |
| "grad_norm": 0.2683012783527374, |
| "learning_rate": 1.81869849389031e-05, |
| "loss": 1.0927, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.9121909633418585, |
| "grad_norm": 0.3459053933620453, |
| "learning_rate": 1.7618641659562377e-05, |
| "loss": 0.9968, |
| "step": 3210 |
| }, |
| { |
| "epoch": 0.9150326797385621, |
| "grad_norm": 0.37453094124794006, |
| "learning_rate": 1.7050298380221655e-05, |
| "loss": 1.0649, |
| "step": 3220 |
| }, |
| { |
| "epoch": 0.9178743961352657, |
| "grad_norm": 0.2843706011772156, |
| "learning_rate": 1.6481955100880932e-05, |
| "loss": 1.0884, |
| "step": 3230 |
| }, |
| { |
| "epoch": 0.9207161125319693, |
| "grad_norm": 0.2847299575805664, |
| "learning_rate": 1.5913611821540213e-05, |
| "loss": 1.124, |
| "step": 3240 |
| }, |
| { |
| "epoch": 0.9235578289286729, |
| "grad_norm": 0.2724878191947937, |
| "learning_rate": 1.534526854219949e-05, |
| "loss": 1.1162, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.9263995453253765, |
| "grad_norm": 0.3032269775867462, |
| "learning_rate": 1.4776925262858768e-05, |
| "loss": 1.152, |
| "step": 3260 |
| }, |
| { |
| "epoch": 0.9292412617220801, |
| "grad_norm": 0.2314031720161438, |
| "learning_rate": 1.4208581983518046e-05, |
| "loss": 1.083, |
| "step": 3270 |
| }, |
| { |
| "epoch": 0.9320829781187837, |
| "grad_norm": 0.3560166656970978, |
| "learning_rate": 1.3640238704177325e-05, |
| "loss": 1.0657, |
| "step": 3280 |
| }, |
| { |
| "epoch": 0.9349246945154873, |
| "grad_norm": 0.3593125343322754, |
| "learning_rate": 1.3071895424836602e-05, |
| "loss": 1.1199, |
| "step": 3290 |
| }, |
| { |
| "epoch": 0.9377664109121909, |
| "grad_norm": 0.2630976438522339, |
| "learning_rate": 1.2503552145495881e-05, |
| "loss": 1.0206, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.9406081273088945, |
| "grad_norm": 0.2689420282840729, |
| "learning_rate": 1.1935208866155157e-05, |
| "loss": 1.1464, |
| "step": 3310 |
| }, |
| { |
| "epoch": 0.9434498437055981, |
| "grad_norm": 0.33892905712127686, |
| "learning_rate": 1.1366865586814436e-05, |
| "loss": 1.1148, |
| "step": 3320 |
| }, |
| { |
| "epoch": 0.9462915601023018, |
| "grad_norm": 0.2977355420589447, |
| "learning_rate": 1.0798522307473714e-05, |
| "loss": 1.0744, |
| "step": 3330 |
| }, |
| { |
| "epoch": 0.9491332764990054, |
| "grad_norm": 0.24483497440814972, |
| "learning_rate": 1.0230179028132993e-05, |
| "loss": 1.0821, |
| "step": 3340 |
| }, |
| { |
| "epoch": 0.951974992895709, |
| "grad_norm": 0.31132972240448, |
| "learning_rate": 9.66183574879227e-06, |
| "loss": 1.1173, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.9548167092924126, |
| "grad_norm": 0.38017576932907104, |
| "learning_rate": 9.09349246945155e-06, |
| "loss": 1.0849, |
| "step": 3360 |
| }, |
| { |
| "epoch": 0.9576584256891162, |
| "grad_norm": 0.3665505051612854, |
| "learning_rate": 8.525149190110827e-06, |
| "loss": 1.1455, |
| "step": 3370 |
| }, |
| { |
| "epoch": 0.9605001420858198, |
| "grad_norm": 0.29008227586746216, |
| "learning_rate": 7.956805910770107e-06, |
| "loss": 1.073, |
| "step": 3380 |
| }, |
| { |
| "epoch": 0.9633418584825234, |
| "grad_norm": 0.3025209903717041, |
| "learning_rate": 7.388462631429384e-06, |
| "loss": 1.1217, |
| "step": 3390 |
| }, |
| { |
| "epoch": 0.966183574879227, |
| "grad_norm": 0.28936612606048584, |
| "learning_rate": 6.820119352088662e-06, |
| "loss": 1.0274, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.9690252912759306, |
| "grad_norm": 0.3080444633960724, |
| "learning_rate": 6.251776072747941e-06, |
| "loss": 1.1762, |
| "step": 3410 |
| }, |
| { |
| "epoch": 0.9718670076726342, |
| "grad_norm": 0.3216501772403717, |
| "learning_rate": 5.683432793407218e-06, |
| "loss": 1.1382, |
| "step": 3420 |
| }, |
| { |
| "epoch": 0.9747087240693378, |
| "grad_norm": 0.2817239761352539, |
| "learning_rate": 5.1150895140664966e-06, |
| "loss": 1.0852, |
| "step": 3430 |
| }, |
| { |
| "epoch": 0.9775504404660414, |
| "grad_norm": 0.33580419421195984, |
| "learning_rate": 4.546746234725775e-06, |
| "loss": 1.1122, |
| "step": 3440 |
| }, |
| { |
| "epoch": 0.9803921568627451, |
| "grad_norm": 0.27000322937965393, |
| "learning_rate": 3.978402955385053e-06, |
| "loss": 1.1576, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.9832338732594487, |
| "grad_norm": 0.3857513666152954, |
| "learning_rate": 3.410059676044331e-06, |
| "loss": 1.0738, |
| "step": 3460 |
| }, |
| { |
| "epoch": 0.9860755896561523, |
| "grad_norm": 0.2714364528656006, |
| "learning_rate": 2.841716396703609e-06, |
| "loss": 1.0978, |
| "step": 3470 |
| }, |
| { |
| "epoch": 0.9889173060528559, |
| "grad_norm": 0.314001202583313, |
| "learning_rate": 2.2733731173628875e-06, |
| "loss": 1.0158, |
| "step": 3480 |
| }, |
| { |
| "epoch": 0.9917590224495595, |
| "grad_norm": 0.3561016023159027, |
| "learning_rate": 1.7050298380221656e-06, |
| "loss": 1.1506, |
| "step": 3490 |
| }, |
| { |
| "epoch": 0.9946007388462631, |
| "grad_norm": 0.33506929874420166, |
| "learning_rate": 1.1366865586814437e-06, |
| "loss": 1.0156, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.9974424552429667, |
| "grad_norm": 0.3048253059387207, |
| "learning_rate": 5.683432793407219e-07, |
| "loss": 1.084, |
| "step": 3510 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 3519, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.7205204853981184e+17, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|