| {"current_steps": 5, "total_steps": 1800, "loss": 5.0358, "lr": 2.2222222222222223e-05, "epoch": 0.05555555555555555, "percentage": 0.28, "elapsed_time": "0:00:01", "remaining_time": "0:07:34", "throughput": 1238.15, "total_tokens": 1568} |
| {"current_steps": 10, "total_steps": 1800, "loss": 1.6677, "lr": 5e-05, "epoch": 0.1111111111111111, "percentage": 0.56, "elapsed_time": "0:00:02", "remaining_time": "0:06:07", "throughput": 1513.8, "total_tokens": 3104} |
| {"current_steps": 15, "total_steps": 1800, "loss": 0.5514, "lr": 7.777777777777778e-05, "epoch": 0.16666666666666666, "percentage": 0.83, "elapsed_time": "0:00:02", "remaining_time": "0:05:38", "throughput": 1655.77, "total_tokens": 4704} |
| {"current_steps": 20, "total_steps": 1800, "loss": 0.278, "lr": 0.00010555555555555555, "epoch": 0.2222222222222222, "percentage": 1.11, "elapsed_time": "0:00:03", "remaining_time": "0:05:23", "throughput": 1736.19, "total_tokens": 6304} |
| {"current_steps": 25, "total_steps": 1800, "loss": 0.5795, "lr": 0.00013333333333333334, "epoch": 0.2777777777777778, "percentage": 1.39, "elapsed_time": "0:00:04", "remaining_time": "0:05:13", "throughput": 1767.61, "total_tokens": 7808} |
| {"current_steps": 30, "total_steps": 1800, "loss": 0.3714, "lr": 0.0001611111111111111, "epoch": 0.3333333333333333, "percentage": 1.67, "elapsed_time": "0:00:05", "remaining_time": "0:05:07", "throughput": 1793.99, "total_tokens": 9344} |
| {"current_steps": 35, "total_steps": 1800, "loss": 0.2761, "lr": 0.00018888888888888888, "epoch": 0.3888888888888889, "percentage": 1.94, "elapsed_time": "0:00:05", "remaining_time": "0:05:02", "throughput": 1819.59, "total_tokens": 10912} |
| {"current_steps": 40, "total_steps": 1800, "loss": 0.3341, "lr": 0.00021666666666666668, "epoch": 0.4444444444444444, "percentage": 2.22, "elapsed_time": "0:00:06", "remaining_time": "0:04:58", "throughput": 1843.63, "total_tokens": 12512} |
| {"current_steps": 45, "total_steps": 1800, "loss": 0.9829, "lr": 0.00024444444444444443, "epoch": 0.5, "percentage": 2.5, "elapsed_time": "0:00:07", "remaining_time": "0:04:55", "throughput": 1851.43, "total_tokens": 14016} |
| {"current_steps": 50, "total_steps": 1800, "loss": 10.264, "lr": 0.0002722222222222222, "epoch": 0.5555555555555556, "percentage": 2.78, "elapsed_time": "0:00:08", "remaining_time": "0:04:52", "throughput": 1861.06, "total_tokens": 15552} |
| {"current_steps": 55, "total_steps": 1800, "loss": 6.6835, "lr": 0.0003, "epoch": 0.6111111111111112, "percentage": 3.06, "elapsed_time": "0:00:09", "remaining_time": "0:04:50", "throughput": 1872.25, "total_tokens": 17120} |
| {"current_steps": 60, "total_steps": 1800, "loss": 3.0323, "lr": 0.0003277777777777778, "epoch": 0.6666666666666666, "percentage": 3.33, "elapsed_time": "0:00:09", "remaining_time": "0:04:48", "throughput": 1884.45, "total_tokens": 18720} |
| {"current_steps": 65, "total_steps": 1800, "loss": 0.7167, "lr": 0.00035555555555555557, "epoch": 0.7222222222222222, "percentage": 3.61, "elapsed_time": "0:00:10", "remaining_time": "0:04:46", "throughput": 1892.12, "total_tokens": 20288} |
| {"current_steps": 70, "total_steps": 1800, "loss": 0.4521, "lr": 0.00038333333333333334, "epoch": 0.7777777777777778, "percentage": 3.89, "elapsed_time": "0:00:11", "remaining_time": "0:04:44", "throughput": 1898.27, "total_tokens": 21856} |
| {"current_steps": 75, "total_steps": 1800, "loss": 0.5278, "lr": 0.0004111111111111111, "epoch": 0.8333333333333334, "percentage": 4.17, "elapsed_time": "0:00:12", "remaining_time": "0:04:43", "throughput": 1906.06, "total_tokens": 23456} |
| {"current_steps": 80, "total_steps": 1800, "loss": 0.3483, "lr": 0.0004388888888888889, "epoch": 0.8888888888888888, "percentage": 4.44, "elapsed_time": "0:00:13", "remaining_time": "0:04:41", "throughput": 1908.2, "total_tokens": 24992} |
| {"current_steps": 85, "total_steps": 1800, "loss": 0.254, "lr": 0.00046666666666666666, "epoch": 0.9444444444444444, "percentage": 4.72, "elapsed_time": "0:00:13", "remaining_time": "0:04:40", "throughput": 1910.39, "total_tokens": 26528} |
| {"current_steps": 90, "total_steps": 1800, "loss": 0.2658, "lr": 0.0004944444444444445, "epoch": 1.0, "percentage": 5.0, "elapsed_time": "0:00:14", "remaining_time": "0:04:39", "throughput": 1911.86, "total_tokens": 28096} |
| {"current_steps": 90, "total_steps": 1800, "eval_loss": 0.2337619960308075, "epoch": 1.0, "percentage": 5.0, "elapsed_time": "0:00:15", "remaining_time": "0:04:54", "throughput": 1812.27, "total_tokens": 28096} |
| {"current_steps": 95, "total_steps": 1800, "loss": 0.2372, "lr": 0.0005222222222222223, "epoch": 1.0555555555555556, "percentage": 5.28, "elapsed_time": "0:00:17", "remaining_time": "0:05:07", "throughput": 1727.34, "total_tokens": 29632} |
| {"current_steps": 100, "total_steps": 1800, "loss": 0.2304, "lr": 0.00055, "epoch": 1.1111111111111112, "percentage": 5.56, "elapsed_time": "0:00:17", "remaining_time": "0:05:05", "throughput": 1735.56, "total_tokens": 31200} |
| {"current_steps": 105, "total_steps": 1800, "loss": 0.2383, "lr": 0.0005777777777777778, "epoch": 1.1666666666666667, "percentage": 5.83, "elapsed_time": "0:00:18", "remaining_time": "0:05:04", "throughput": 1737.01, "total_tokens": 32768} |
| {"current_steps": 110, "total_steps": 1800, "loss": 0.2395, "lr": 0.0006055555555555556, "epoch": 1.2222222222222223, "percentage": 6.11, "elapsed_time": "0:00:19", "remaining_time": "0:05:01", "throughput": 1747.03, "total_tokens": 34336} |
| {"current_steps": 115, "total_steps": 1800, "loss": 0.2242, "lr": 0.0006333333333333333, "epoch": 1.2777777777777777, "percentage": 6.39, "elapsed_time": "0:00:20", "remaining_time": "0:04:59", "throughput": 1754.55, "total_tokens": 35872} |
| {"current_steps": 120, "total_steps": 1800, "loss": 0.2926, "lr": 0.0006611111111111111, "epoch": 1.3333333333333333, "percentage": 6.67, "elapsed_time": "0:00:21", "remaining_time": "0:04:57", "throughput": 1763.27, "total_tokens": 37440} |
| {"current_steps": 125, "total_steps": 1800, "loss": 0.2411, "lr": 0.000688888888888889, "epoch": 1.3888888888888888, "percentage": 6.94, "elapsed_time": "0:00:22", "remaining_time": "0:04:55", "throughput": 1769.99, "total_tokens": 38976} |
| {"current_steps": 130, "total_steps": 1800, "loss": 0.2359, "lr": 0.0007166666666666667, "epoch": 1.4444444444444444, "percentage": 7.22, "elapsed_time": "0:00:22", "remaining_time": "0:04:53", "throughput": 1777.59, "total_tokens": 40544} |
| {"current_steps": 135, "total_steps": 1800, "loss": 0.2433, "lr": 0.0007444444444444445, "epoch": 1.5, "percentage": 7.5, "elapsed_time": "0:00:23", "remaining_time": "0:04:51", "throughput": 1785.89, "total_tokens": 42144} |
| {"current_steps": 140, "total_steps": 1800, "loss": 0.2923, "lr": 0.0007722222222222223, "epoch": 1.5555555555555556, "percentage": 7.78, "elapsed_time": "0:00:24", "remaining_time": "0:04:49", "throughput": 1793.47, "total_tokens": 43744} |
| {"current_steps": 145, "total_steps": 1800, "loss": 0.2501, "lr": 0.0008, "epoch": 1.6111111111111112, "percentage": 8.06, "elapsed_time": "0:00:25", "remaining_time": "0:04:47", "throughput": 1798.49, "total_tokens": 45280} |
| {"current_steps": 150, "total_steps": 1800, "loss": 0.244, "lr": 0.0008277777777777778, "epoch": 1.6666666666666665, "percentage": 8.33, "elapsed_time": "0:00:25", "remaining_time": "0:04:45", "throughput": 1805.4, "total_tokens": 46880} |
| {"current_steps": 155, "total_steps": 1800, "loss": 0.2374, "lr": 0.0008555555555555556, "epoch": 1.7222222222222223, "percentage": 8.61, "elapsed_time": "0:00:26", "remaining_time": "0:04:43", "throughput": 1811.82, "total_tokens": 48480} |
| {"current_steps": 160, "total_steps": 1800, "loss": 0.2398, "lr": 0.0008833333333333333, "epoch": 1.7777777777777777, "percentage": 8.89, "elapsed_time": "0:00:27", "remaining_time": "0:04:42", "throughput": 1817.78, "total_tokens": 50080} |
| {"current_steps": 165, "total_steps": 1800, "loss": 0.2385, "lr": 0.0009111111111111111, "epoch": 1.8333333333333335, "percentage": 9.17, "elapsed_time": "0:00:28", "remaining_time": "0:04:40", "throughput": 1822.37, "total_tokens": 51648} |
| {"current_steps": 170, "total_steps": 1800, "loss": 0.2268, "lr": 0.000938888888888889, "epoch": 1.8888888888888888, "percentage": 9.44, "elapsed_time": "0:00:29", "remaining_time": "0:04:39", "throughput": 1826.62, "total_tokens": 53216} |
| {"current_steps": 175, "total_steps": 1800, "loss": 0.2437, "lr": 0.0009666666666666667, "epoch": 1.9444444444444444, "percentage": 9.72, "elapsed_time": "0:00:29", "remaining_time": "0:04:37", "throughput": 1829.78, "total_tokens": 54752} |
| {"current_steps": 180, "total_steps": 1800, "loss": 0.2448, "lr": 0.0009944444444444445, "epoch": 2.0, "percentage": 10.0, "elapsed_time": "0:00:30", "remaining_time": "0:04:36", "throughput": 1832.99, "total_tokens": 56352} |
| {"current_steps": 180, "total_steps": 1800, "eval_loss": 0.23352833092212677, "epoch": 2.0, "percentage": 10.0, "elapsed_time": "0:00:31", "remaining_time": "0:04:43", "throughput": 1785.98, "total_tokens": 56352} |
| {"current_steps": 185, "total_steps": 1800, "loss": 0.2458, "lr": 0.000999984957239884, "epoch": 2.0555555555555554, "percentage": 10.28, "elapsed_time": "0:00:33", "remaining_time": "0:04:50", "throughput": 1741.36, "total_tokens": 57888} |
| {"current_steps": 190, "total_steps": 1800, "loss": 0.2359, "lr": 0.0009999238475781956, "epoch": 2.111111111111111, "percentage": 10.56, "elapsed_time": "0:00:34", "remaining_time": "0:04:48", "throughput": 1747.66, "total_tokens": 59488} |
| {"current_steps": 195, "total_steps": 1800, "loss": 0.2319, "lr": 0.000999815736583355, "epoch": 2.1666666666666665, "percentage": 10.83, "elapsed_time": "0:00:34", "remaining_time": "0:04:46", "throughput": 1752.29, "total_tokens": 61024} |
| {"current_steps": 200, "total_steps": 1800, "loss": 0.2367, "lr": 0.000999660634419631, "epoch": 2.2222222222222223, "percentage": 11.11, "elapsed_time": "0:00:35", "remaining_time": "0:04:44", "throughput": 1755.89, "total_tokens": 62528} |
| {"current_steps": 205, "total_steps": 1800, "loss": 0.234, "lr": 0.0009994585556692623, "epoch": 2.2777777777777777, "percentage": 11.39, "elapsed_time": "0:00:36", "remaining_time": "0:04:43", "throughput": 1760.13, "total_tokens": 64064} |
| {"current_steps": 210, "total_steps": 1800, "loss": 0.2351, "lr": 0.0009992095193310836, "epoch": 2.3333333333333335, "percentage": 11.67, "elapsed_time": "0:00:37", "remaining_time": "0:04:41", "throughput": 1766.54, "total_tokens": 65696} |
| {"current_steps": 215, "total_steps": 1800, "loss": 0.2384, "lr": 0.0009989135488187406, "epoch": 2.388888888888889, "percentage": 11.94, "elapsed_time": "0:00:37", "remaining_time": "0:04:39", "throughput": 1770.41, "total_tokens": 67232} |
| {"current_steps": 220, "total_steps": 1800, "loss": 0.2308, "lr": 0.0009985706719584887, "epoch": 2.4444444444444446, "percentage": 12.22, "elapsed_time": "0:00:38", "remaining_time": "0:04:38", "throughput": 1775.55, "total_tokens": 68832} |
| {"current_steps": 225, "total_steps": 1800, "loss": 0.2296, "lr": 0.000998180920986577, "epoch": 2.5, "percentage": 12.5, "elapsed_time": "0:00:39", "remaining_time": "0:04:36", "throughput": 1780.52, "total_tokens": 70432} |
| {"current_steps": 230, "total_steps": 1800, "loss": 0.243, "lr": 0.0009977443325462165, "epoch": 2.5555555555555554, "percentage": 12.78, "elapsed_time": "0:00:40", "remaining_time": "0:04:35", "throughput": 1785.27, "total_tokens": 72032} |
| {"current_steps": 235, "total_steps": 1800, "loss": 0.2275, "lr": 0.0009972609476841367, "epoch": 2.611111111111111, "percentage": 13.06, "elapsed_time": "0:00:41", "remaining_time": "0:04:33", "throughput": 1789.89, "total_tokens": 73632} |
| {"current_steps": 240, "total_steps": 1800, "loss": 0.2462, "lr": 0.0009967308118467252, "epoch": 2.6666666666666665, "percentage": 13.33, "elapsed_time": "0:00:41", "remaining_time": "0:04:32", "throughput": 1792.98, "total_tokens": 75168} |
| {"current_steps": 245, "total_steps": 1800, "loss": 0.2286, "lr": 0.0009961539748757548, "epoch": 2.7222222222222223, "percentage": 13.61, "elapsed_time": "0:00:42", "remaining_time": "0:04:31", "throughput": 1797.27, "total_tokens": 76768} |
| {"current_steps": 250, "total_steps": 1800, "loss": 0.2263, "lr": 0.0009955304910036994, "epoch": 2.7777777777777777, "percentage": 13.89, "elapsed_time": "0:00:43", "remaining_time": "0:04:29", "throughput": 1800.62, "total_tokens": 78336} |
| {"current_steps": 255, "total_steps": 1800, "loss": 0.2373, "lr": 0.0009948604188486328, "epoch": 2.8333333333333335, "percentage": 14.17, "elapsed_time": "0:00:44", "remaining_time": "0:04:28", "throughput": 1803.12, "total_tokens": 79872} |
| {"current_steps": 260, "total_steps": 1800, "loss": 0.2242, "lr": 0.000994143821408719, "epoch": 2.888888888888889, "percentage": 14.44, "elapsed_time": "0:00:45", "remaining_time": "0:04:27", "throughput": 1806.23, "total_tokens": 81440} |
| {"current_steps": 265, "total_steps": 1800, "loss": 0.2227, "lr": 0.0009933807660562897, "epoch": 2.9444444444444446, "percentage": 14.72, "elapsed_time": "0:00:45", "remaining_time": "0:04:25", "throughput": 1809.3, "total_tokens": 83008} |
| {"current_steps": 270, "total_steps": 1800, "loss": 0.2379, "lr": 0.0009925713245315083, "epoch": 3.0, "percentage": 15.0, "elapsed_time": "0:00:46", "remaining_time": "0:04:24", "throughput": 1810.56, "total_tokens": 84544} |
| {"current_steps": 270, "total_steps": 1800, "eval_loss": 0.23681053519248962, "epoch": 3.0, "percentage": 15.0, "elapsed_time": "0:00:47", "remaining_time": "0:04:29", "throughput": 1779.68, "total_tokens": 84544} |
| {"current_steps": 275, "total_steps": 1800, "loss": 0.2435, "lr": 0.0009917155729356273, "epoch": 3.0555555555555554, "percentage": 15.28, "elapsed_time": "0:00:49", "remaining_time": "0:04:32", "throughput": 1749.36, "total_tokens": 86112} |
| {"current_steps": 280, "total_steps": 1800, "loss": 0.2295, "lr": 0.000990813591723832, "epoch": 3.111111111111111, "percentage": 15.56, "elapsed_time": "0:00:50", "remaining_time": "0:04:31", "throughput": 1753.12, "total_tokens": 87680} |
| {"current_steps": 285, "total_steps": 1800, "loss": 0.2372, "lr": 0.000989865465697677, "epoch": 3.1666666666666665, "percentage": 15.83, "elapsed_time": "0:00:50", "remaining_time": "0:04:30", "throughput": 1756.2, "total_tokens": 89216} |
| {"current_steps": 290, "total_steps": 1800, "loss": 0.2306, "lr": 0.0009888712839971133, "epoch": 3.2222222222222223, "percentage": 16.11, "elapsed_time": "0:00:51", "remaining_time": "0:04:28", "throughput": 1760.25, "total_tokens": 90816} |
| {"current_steps": 295, "total_steps": 1800, "loss": 0.2307, "lr": 0.0009878311400921072, "epoch": 3.2777777777777777, "percentage": 16.39, "elapsed_time": "0:00:52", "remaining_time": "0:04:27", "throughput": 1764.8, "total_tokens": 92448} |
| {"current_steps": 300, "total_steps": 1800, "loss": 0.2298, "lr": 0.0009867451317738534, "epoch": 3.3333333333333335, "percentage": 16.67, "elapsed_time": "0:00:53", "remaining_time": "0:04:25", "throughput": 1767.59, "total_tokens": 93984} |
| {"current_steps": 305, "total_steps": 1800, "loss": 0.236, "lr": 0.0009856133611455802, "epoch": 3.388888888888889, "percentage": 16.94, "elapsed_time": "0:00:53", "remaining_time": "0:04:24", "throughput": 1771.37, "total_tokens": 95584} |
| {"current_steps": 310, "total_steps": 1800, "loss": 0.2356, "lr": 0.0009844359346129503, "epoch": 3.4444444444444446, "percentage": 17.22, "elapsed_time": "0:00:54", "remaining_time": "0:04:23", "throughput": 1773.45, "total_tokens": 97088} |
| {"current_steps": 315, "total_steps": 1800, "loss": 0.2307, "lr": 0.0009832129628740574, "epoch": 3.5, "percentage": 17.5, "elapsed_time": "0:00:55", "remaining_time": "0:04:21", "throughput": 1777.01, "total_tokens": 98688} |
| {"current_steps": 320, "total_steps": 1800, "loss": 0.2377, "lr": 0.0009819445609090174, "epoch": 3.5555555555555554, "percentage": 17.78, "elapsed_time": "0:00:56", "remaining_time": "0:04:20", "throughput": 1780.51, "total_tokens": 100288} |
| {"current_steps": 325, "total_steps": 1800, "loss": 0.2348, "lr": 0.0009806308479691594, "epoch": 3.611111111111111, "percentage": 18.06, "elapsed_time": "0:00:57", "remaining_time": "0:04:19", "throughput": 1782.88, "total_tokens": 101824} |
| {"current_steps": 330, "total_steps": 1800, "loss": 0.2297, "lr": 0.0009792719475658143, "epoch": 3.6666666666666665, "percentage": 18.33, "elapsed_time": "0:00:57", "remaining_time": "0:04:17", "throughput": 1785.68, "total_tokens": 103392} |
| {"current_steps": 335, "total_steps": 1800, "loss": 0.2338, "lr": 0.0009778679874587015, "epoch": 3.7222222222222223, "percentage": 18.61, "elapsed_time": "0:00:58", "remaining_time": "0:04:16", "throughput": 1787.9, "total_tokens": 104928} |
| {"current_steps": 340, "total_steps": 1800, "loss": 0.234, "lr": 0.0009764190996439181, "epoch": 3.7777777777777777, "percentage": 18.89, "elapsed_time": "0:00:59", "remaining_time": "0:04:15", "throughput": 1791.42, "total_tokens": 106560} |
| {"current_steps": 345, "total_steps": 1800, "loss": 0.232, "lr": 0.0009749254203415288, "epoch": 3.8333333333333335, "percentage": 19.17, "elapsed_time": "0:01:00", "remaining_time": "0:04:14", "throughput": 1793.36, "total_tokens": 108096} |
| {"current_steps": 350, "total_steps": 1800, "loss": 0.2287, "lr": 0.000973387089982759, "epoch": 3.888888888888889, "percentage": 19.44, "elapsed_time": "0:01:01", "remaining_time": "0:04:12", "throughput": 1795.81, "total_tokens": 109664} |
| {"current_steps": 355, "total_steps": 1800, "loss": 0.2336, "lr": 0.0009718042531967918, "epoch": 3.9444444444444446, "percentage": 19.72, "elapsed_time": "0:01:01", "remaining_time": "0:04:11", "throughput": 1798.22, "total_tokens": 111232} |
| {"current_steps": 360, "total_steps": 1800, "loss": 0.223, "lr": 0.0009701770587971706, "epoch": 4.0, "percentage": 20.0, "elapsed_time": "0:01:02", "remaining_time": "0:04:10", "throughput": 1799.71, "total_tokens": 112800} |
| {"current_steps": 360, "total_steps": 1800, "eval_loss": 0.24439206719398499, "epoch": 4.0, "percentage": 20.0, "elapsed_time": "0:01:03", "remaining_time": "0:04:13", "throughput": 1776.72, "total_tokens": 112800} |
| {"current_steps": 365, "total_steps": 1800, "loss": 0.2636, "lr": 0.0009685056597678075, "epoch": 4.055555555555555, "percentage": 20.28, "elapsed_time": "0:01:05", "remaining_time": "0:04:16", "throughput": 1753.69, "total_tokens": 114400} |
| {"current_steps": 370, "total_steps": 1800, "loss": 0.2268, "lr": 0.0009667902132486009, "epoch": 4.111111111111111, "percentage": 20.56, "elapsed_time": "0:01:06", "remaining_time": "0:04:15", "throughput": 1755.99, "total_tokens": 115936} |
| {"current_steps": 375, "total_steps": 1800, "loss": 0.2314, "lr": 0.0009650308805206616, "epoch": 4.166666666666667, "percentage": 20.83, "elapsed_time": "0:01:06", "remaining_time": "0:04:13", "throughput": 1758.28, "total_tokens": 117472} |
| {"current_steps": 380, "total_steps": 1800, "loss": 0.2235, "lr": 0.0009632278269911492, "epoch": 4.222222222222222, "percentage": 21.11, "elapsed_time": "0:01:07", "remaining_time": "0:04:12", "throughput": 1760.93, "total_tokens": 119040} |
| {"current_steps": 385, "total_steps": 1800, "loss": 0.2426, "lr": 0.0009613812221777212, "epoch": 4.277777777777778, "percentage": 21.39, "elapsed_time": "0:01:08", "remaining_time": "0:04:11", "throughput": 1763.97, "total_tokens": 120640} |
| {"current_steps": 390, "total_steps": 1800, "loss": 0.2342, "lr": 0.0009594912396925958, "epoch": 4.333333333333333, "percentage": 21.67, "elapsed_time": "0:01:09", "remaining_time": "0:04:10", "throughput": 1766.14, "total_tokens": 122176} |
| {"current_steps": 395, "total_steps": 1800, "loss": 0.2414, "lr": 0.0009575580572262289, "epoch": 4.388888888888889, "percentage": 21.94, "elapsed_time": "0:01:09", "remaining_time": "0:04:08", "throughput": 1768.66, "total_tokens": 123744} |
| {"current_steps": 400, "total_steps": 1800, "loss": 0.2329, "lr": 0.0009555818565306084, "epoch": 4.444444444444445, "percentage": 22.22, "elapsed_time": "0:01:10", "remaining_time": "0:04:07", "throughput": 1770.72, "total_tokens": 125280} |
| {"current_steps": 405, "total_steps": 1800, "loss": 0.2479, "lr": 0.0009535628234021669, "epoch": 4.5, "percentage": 22.5, "elapsed_time": "0:01:11", "remaining_time": "0:04:06", "throughput": 1772.72, "total_tokens": 126816} |
| {"current_steps": 410, "total_steps": 1800, "loss": 0.2604, "lr": 0.0009515011476643126, "epoch": 4.555555555555555, "percentage": 22.78, "elapsed_time": "0:01:12", "remaining_time": "0:04:05", "throughput": 1774.67, "total_tokens": 128352} |
| {"current_steps": 415, "total_steps": 1800, "loss": 0.2329, "lr": 0.0009493970231495835, "epoch": 4.611111111111111, "percentage": 23.06, "elapsed_time": "0:01:13", "remaining_time": "0:04:04", "throughput": 1776.98, "total_tokens": 129920} |
| {"current_steps": 420, "total_steps": 1800, "loss": 0.2337, "lr": 0.0009472506476814238, "epoch": 4.666666666666667, "percentage": 23.33, "elapsed_time": "0:01:13", "remaining_time": "0:04:02", "throughput": 1778.46, "total_tokens": 131424} |
| {"current_steps": 425, "total_steps": 1800, "loss": 0.2363, "lr": 0.0009450622230555847, "epoch": 4.722222222222222, "percentage": 23.61, "elapsed_time": "0:01:14", "remaining_time": "0:04:01", "throughput": 1780.27, "total_tokens": 132960} |
| {"current_steps": 430, "total_steps": 1800, "loss": 0.226, "lr": 0.0009428319550211531, "epoch": 4.777777777777778, "percentage": 23.89, "elapsed_time": "0:01:15", "remaining_time": "0:04:00", "throughput": 1782.41, "total_tokens": 134528} |
| {"current_steps": 435, "total_steps": 1800, "loss": 0.2309, "lr": 0.000940560053261206, "epoch": 4.833333333333333, "percentage": 24.17, "elapsed_time": "0:01:16", "remaining_time": "0:03:59", "throughput": 1784.44, "total_tokens": 136096} |
| {"current_steps": 440, "total_steps": 1800, "loss": 0.2332, "lr": 0.0009382467313730985, "epoch": 4.888888888888889, "percentage": 24.44, "elapsed_time": "0:01:17", "remaining_time": "0:03:58", "throughput": 1787.2, "total_tokens": 137728} |
| {"current_steps": 445, "total_steps": 1800, "loss": 0.2259, "lr": 0.0009358922068483812, "epoch": 4.944444444444445, "percentage": 24.72, "elapsed_time": "0:01:17", "remaining_time": "0:03:57", "throughput": 1788.85, "total_tokens": 139264} |
| {"current_steps": 450, "total_steps": 1800, "loss": 0.2277, "lr": 0.0009334967010523523, "epoch": 5.0, "percentage": 25.0, "elapsed_time": "0:01:18", "remaining_time": "0:03:56", "throughput": 1789.78, "total_tokens": 140800} |
| {"current_steps": 450, "total_steps": 1800, "eval_loss": 0.23905985057353973, "epoch": 5.0, "percentage": 25.0, "elapsed_time": "0:01:19", "remaining_time": "0:03:58", "throughput": 1771.49, "total_tokens": 140800} |
| {"current_steps": 455, "total_steps": 1800, "loss": 0.2608, "lr": 0.0009310604392032455, "epoch": 5.055555555555555, "percentage": 25.28, "elapsed_time": "0:01:21", "remaining_time": "0:03:59", "throughput": 1754.51, "total_tokens": 142400} |
| {"current_steps": 460, "total_steps": 1800, "loss": 0.2342, "lr": 0.0009285836503510562, "epoch": 5.111111111111111, "percentage": 25.56, "elapsed_time": "0:01:21", "remaining_time": "0:03:58", "throughput": 1755.7, "total_tokens": 143872} |
| {"current_steps": 465, "total_steps": 1800, "loss": 0.2325, "lr": 0.0009260665673560057, "epoch": 5.166666666666667, "percentage": 25.83, "elapsed_time": "0:01:22", "remaining_time": "0:03:57", "throughput": 1757.9, "total_tokens": 145440} |
| {"current_steps": 470, "total_steps": 1800, "loss": 0.2348, "lr": 0.0009235094268666498, "epoch": 5.222222222222222, "percentage": 26.11, "elapsed_time": "0:01:23", "remaining_time": "0:03:56", "throughput": 1760.42, "total_tokens": 147040} |
| {"current_steps": 475, "total_steps": 1800, "loss": 0.2353, "lr": 0.0009209124692976287, "epoch": 5.277777777777778, "percentage": 26.39, "elapsed_time": "0:01:24", "remaining_time": "0:03:55", "throughput": 1762.22, "total_tokens": 148576} |
| {"current_steps": 480, "total_steps": 1800, "loss": 0.2285, "lr": 0.0009182759388070649, "epoch": 5.333333333333333, "percentage": 26.67, "elapsed_time": "0:01:25", "remaining_time": "0:03:54", "throughput": 1763.97, "total_tokens": 150112} |
| {"current_steps": 485, "total_steps": 1800, "loss": 0.2405, "lr": 0.0009156000832736073, "epoch": 5.388888888888889, "percentage": 26.94, "elapsed_time": "0:01:25", "remaining_time": "0:03:52", "throughput": 1766.37, "total_tokens": 151712} |
| {"current_steps": 490, "total_steps": 1800, "loss": 0.2288, "lr": 0.0009128851542731271, "epoch": 5.444444444444445, "percentage": 27.22, "elapsed_time": "0:01:26", "remaining_time": "0:03:51", "throughput": 1768.39, "total_tokens": 153280} |
| {"current_steps": 495, "total_steps": 1800, "loss": 0.2349, "lr": 0.0009101314070550646, "epoch": 5.5, "percentage": 27.5, "elapsed_time": "0:01:27", "remaining_time": "0:03:50", "throughput": 1770.4, "total_tokens": 154848} |
| {"current_steps": 500, "total_steps": 1800, "loss": 0.2272, "lr": 0.0009073391005184324, "epoch": 5.555555555555555, "percentage": 27.78, "elapsed_time": "0:01:28", "remaining_time": "0:03:49", "throughput": 1771.69, "total_tokens": 156352} |
| {"current_steps": 505, "total_steps": 1800, "loss": 0.2436, "lr": 0.0009045084971874737, "epoch": 5.611111111111111, "percentage": 28.06, "elapsed_time": "0:01:29", "remaining_time": "0:03:48", "throughput": 1773.62, "total_tokens": 157920} |
| {"current_steps": 510, "total_steps": 1800, "loss": 0.2235, "lr": 0.0009016398631869811, "epoch": 5.666666666666667, "percentage": 28.33, "elapsed_time": "0:01:29", "remaining_time": "0:03:47", "throughput": 1775.52, "total_tokens": 159488} |
| {"current_steps": 515, "total_steps": 1800, "loss": 0.2307, "lr": 0.0008987334682172759, "epoch": 5.722222222222222, "percentage": 28.61, "elapsed_time": "0:01:30", "remaining_time": "0:03:46", "throughput": 1777.37, "total_tokens": 161056} |
| {"current_steps": 520, "total_steps": 1800, "loss": 0.2326, "lr": 0.0008957895855288517, "epoch": 5.777777777777778, "percentage": 28.89, "elapsed_time": "0:01:31", "remaining_time": "0:03:45", "throughput": 1779.12, "total_tokens": 162624} |
| {"current_steps": 525, "total_steps": 1800, "loss": 0.2316, "lr": 0.000892808491896685, "epoch": 5.833333333333333, "percentage": 29.17, "elapsed_time": "0:01:32", "remaining_time": "0:03:43", "throughput": 1780.21, "total_tokens": 164128} |
| {"current_steps": 530, "total_steps": 1800, "loss": 0.2337, "lr": 0.0008897904675942128, "epoch": 5.888888888888889, "percentage": 29.44, "elapsed_time": "0:01:32", "remaining_time": "0:03:42", "throughput": 1781.63, "total_tokens": 165664} |
| {"current_steps": 535, "total_steps": 1800, "loss": 0.2294, "lr": 0.000886735796366982, "epoch": 5.944444444444445, "percentage": 29.72, "elapsed_time": "0:01:33", "remaining_time": "0:03:41", "throughput": 1783.06, "total_tokens": 167200} |
| {"current_steps": 540, "total_steps": 1800, "loss": 0.2325, "lr": 0.0008836447654059734, "epoch": 6.0, "percentage": 30.0, "elapsed_time": "0:01:34", "remaining_time": "0:03:40", "throughput": 1783.9, "total_tokens": 168736} |
| {"current_steps": 540, "total_steps": 1800, "eval_loss": 0.23540043830871582, "epoch": 6.0, "percentage": 30.0, "elapsed_time": "0:01:35", "remaining_time": "0:03:42", "throughput": 1768.7, "total_tokens": 168736} |
| {"current_steps": 545, "total_steps": 1800, "loss": 0.2351, "lr": 0.0008805176653206003, "epoch": 6.055555555555555, "percentage": 30.28, "elapsed_time": "0:01:37", "remaining_time": "0:03:43", "throughput": 1753.68, "total_tokens": 170304} |
| {"current_steps": 550, "total_steps": 1800, "loss": 0.2259, "lr": 0.000877354790111386, "epoch": 6.111111111111111, "percentage": 30.56, "elapsed_time": "0:01:37", "remaining_time": "0:03:42", "throughput": 1755.27, "total_tokens": 171840} |
| {"current_steps": 555, "total_steps": 1800, "loss": 0.2309, "lr": 0.0008741564371423235, "epoch": 6.166666666666667, "percentage": 30.83, "elapsed_time": "0:01:38", "remaining_time": "0:03:41", "throughput": 1757.13, "total_tokens": 173408} |
| {"current_steps": 560, "total_steps": 1800, "loss": 0.2348, "lr": 0.0008709229071129177, "epoch": 6.222222222222222, "percentage": 31.11, "elapsed_time": "0:01:39", "remaining_time": "0:03:40", "throughput": 1758.97, "total_tokens": 174976} |
| {"current_steps": 565, "total_steps": 1800, "loss": 0.2309, "lr": 0.0008676545040299144, "epoch": 6.277777777777778, "percentage": 31.39, "elapsed_time": "0:01:40", "remaining_time": "0:03:39", "throughput": 1760.79, "total_tokens": 176544} |
| {"current_steps": 570, "total_steps": 1800, "loss": 0.2311, "lr": 0.0008643515351787192, "epoch": 6.333333333333333, "percentage": 31.67, "elapsed_time": "0:01:41", "remaining_time": "0:03:38", "throughput": 1762.57, "total_tokens": 178112} |
| {"current_steps": 575, "total_steps": 1800, "loss": 0.224, "lr": 0.0008610143110945068, "epoch": 6.388888888888889, "percentage": 31.94, "elapsed_time": "0:01:41", "remaining_time": "0:03:36", "throughput": 1763.5, "total_tokens": 179584} |
| {"current_steps": 580, "total_steps": 1800, "loss": 0.2255, "lr": 0.0008576431455330258, "epoch": 6.444444444444445, "percentage": 32.22, "elapsed_time": "0:01:42", "remaining_time": "0:03:35", "throughput": 1765.23, "total_tokens": 181152} |
| {"current_steps": 585, "total_steps": 1800, "loss": 0.2436, "lr": 0.0008542383554411, "epoch": 6.5, "percentage": 32.5, "elapsed_time": "0:01:43", "remaining_time": "0:03:34", "throughput": 1766.66, "total_tokens": 182688} |
| {"current_steps": 590, "total_steps": 1800, "loss": 0.2213, "lr": 0.0008508002609268301, "epoch": 6.555555555555555, "percentage": 32.78, "elapsed_time": "0:01:44", "remaining_time": "0:03:33", "throughput": 1768.06, "total_tokens": 184224} |
| {"current_steps": 595, "total_steps": 1800, "loss": 0.2417, "lr": 0.0008473291852294987, "epoch": 6.611111111111111, "percentage": 33.06, "elapsed_time": "0:01:44", "remaining_time": "0:03:32", "throughput": 1769.72, "total_tokens": 185792} |
| {"current_steps": 600, "total_steps": 1800, "loss": 0.2242, "lr": 0.0008438254546891792, "epoch": 6.666666666666667, "percentage": 33.33, "elapsed_time": "0:01:45", "remaining_time": "0:03:31", "throughput": 1771.61, "total_tokens": 187392} |
| {"current_steps": 605, "total_steps": 1800, "loss": 0.226, "lr": 0.0008402893987160552, "epoch": 6.722222222222222, "percentage": 33.61, "elapsed_time": "0:01:46", "remaining_time": "0:03:30", "throughput": 1773.46, "total_tokens": 188992} |
| {"current_steps": 610, "total_steps": 1800, "loss": 0.2421, "lr": 0.0008367213497594501, "epoch": 6.777777777777778, "percentage": 33.89, "elapsed_time": "0:01:47", "remaining_time": "0:03:29", "throughput": 1775.02, "total_tokens": 190560} |
| {"current_steps": 615, "total_steps": 1800, "loss": 0.2386, "lr": 0.0008331216432765713, "epoch": 6.833333333333333, "percentage": 34.17, "elapsed_time": "0:01:48", "remaining_time": "0:03:28", "throughput": 1776.48, "total_tokens": 192128} |
| {"current_steps": 620, "total_steps": 1800, "loss": 0.2297, "lr": 0.0008294906177009707, "epoch": 6.888888888888889, "percentage": 34.44, "elapsed_time": "0:01:48", "remaining_time": "0:03:27", "throughput": 1777.72, "total_tokens": 193664} |
| {"current_steps": 625, "total_steps": 1800, "loss": 0.2265, "lr": 0.0008258286144107276, "epoch": 6.944444444444445, "percentage": 34.72, "elapsed_time": "0:01:49", "remaining_time": "0:03:26", "throughput": 1779.75, "total_tokens": 195296} |
| {"current_steps": 630, "total_steps": 1800, "loss": 0.2296, "lr": 0.0008221359776963525, "epoch": 7.0, "percentage": 35.0, "elapsed_time": "0:01:50", "remaining_time": "0:03:25", "throughput": 1780.99, "total_tokens": 196896} |
| {"current_steps": 630, "total_steps": 1800, "eval_loss": 0.23590680956840515, "epoch": 7.0, "percentage": 35.0, "elapsed_time": "0:01:51", "remaining_time": "0:03:26", "throughput": 1768.0, "total_tokens": 196896} |
| {"current_steps": 635, "total_steps": 1800, "loss": 0.2512, "lr": 0.000818413054728418, "epoch": 7.055555555555555, "percentage": 35.28, "elapsed_time": "0:01:53", "remaining_time": "0:03:27", "throughput": 1756.16, "total_tokens": 198528} |
| {"current_steps": 640, "total_steps": 1800, "loss": 0.2285, "lr": 0.0008146601955249188, "epoch": 7.111111111111111, "percentage": 35.56, "elapsed_time": "0:01:53", "remaining_time": "0:03:26", "throughput": 1757.72, "total_tokens": 200096} |
| {"current_steps": 645, "total_steps": 1800, "loss": 0.2366, "lr": 0.0008108777529183644, "epoch": 7.166666666666667, "percentage": 35.83, "elapsed_time": "0:01:54", "remaining_time": "0:03:25", "throughput": 1759.56, "total_tokens": 201696} |
| {"current_steps": 650, "total_steps": 1800, "loss": 0.2374, "lr": 0.000807066082522607, "epoch": 7.222222222222222, "percentage": 36.11, "elapsed_time": "0:01:55", "remaining_time": "0:03:24", "throughput": 1760.87, "total_tokens": 203232} |
| {"current_steps": 655, "total_steps": 1800, "loss": 0.2287, "lr": 0.0008032255426994069, "epoch": 7.277777777777778, "percentage": 36.39, "elapsed_time": "0:01:56", "remaining_time": "0:03:23", "throughput": 1762.17, "total_tokens": 204768} |
| {"current_steps": 660, "total_steps": 1800, "loss": 0.2361, "lr": 0.0007993564945247409, "epoch": 7.333333333333333, "percentage": 36.67, "elapsed_time": "0:01:56", "remaining_time": "0:03:22", "throughput": 1763.94, "total_tokens": 206368} |
| {"current_steps": 665, "total_steps": 1800, "loss": 0.2285, "lr": 0.0007954593017548556, "epoch": 7.388888888888889, "percentage": 36.94, "elapsed_time": "0:01:57", "remaining_time": "0:03:21", "throughput": 1765.2, "total_tokens": 207904} |
| {"current_steps": 670, "total_steps": 1800, "loss": 0.2272, "lr": 0.0007915343307920673, "epoch": 7.444444444444445, "percentage": 37.22, "elapsed_time": "0:01:58", "remaining_time": "0:03:19", "throughput": 1766.69, "total_tokens": 209472} |
| {"current_steps": 675, "total_steps": 1800, "loss": 0.2423, "lr": 0.0007875819506503144, "epoch": 7.5, "percentage": 37.5, "elapsed_time": "0:01:59", "remaining_time": "0:03:18", "throughput": 1768.37, "total_tokens": 211072} |
| {"current_steps": 680, "total_steps": 1800, "loss": 0.2354, "lr": 0.0007836025329204635, "epoch": 7.555555555555555, "percentage": 37.78, "elapsed_time": "0:02:00", "remaining_time": "0:03:17", "throughput": 1769.57, "total_tokens": 212608} |
| {"current_steps": 685, "total_steps": 1800, "loss": 0.2304, "lr": 0.0007795964517353734, "epoch": 7.611111111111111, "percentage": 38.06, "elapsed_time": "0:02:00", "remaining_time": "0:03:16", "throughput": 1771.0, "total_tokens": 214176} |
| {"current_steps": 690, "total_steps": 1800, "loss": 0.2325, "lr": 0.0007755640837347215, "epoch": 7.666666666666667, "percentage": 38.33, "elapsed_time": "0:02:01", "remaining_time": "0:03:15", "throughput": 1772.41, "total_tokens": 215744} |
| {"current_steps": 695, "total_steps": 1800, "loss": 0.2296, "lr": 0.0007715058080295917, "epoch": 7.722222222222222, "percentage": 38.61, "elapsed_time": "0:02:02", "remaining_time": "0:03:14", "throughput": 1773.3, "total_tokens": 217248} |
| {"current_steps": 700, "total_steps": 1800, "loss": 0.2306, "lr": 0.0007674220061668323, "epoch": 7.777777777777778, "percentage": 38.89, "elapsed_time": "0:02:03", "remaining_time": "0:03:13", "throughput": 1774.66, "total_tokens": 218816} |
| {"current_steps": 705, "total_steps": 1800, "loss": 0.2318, "lr": 0.0007633130620931837, "epoch": 7.833333333333333, "percentage": 39.17, "elapsed_time": "0:02:04", "remaining_time": "0:03:12", "throughput": 1775.24, "total_tokens": 220288} |
| {"current_steps": 710, "total_steps": 1800, "loss": 0.2307, "lr": 0.0007591793621191819, "epoch": 7.888888888888889, "percentage": 39.44, "elapsed_time": "0:02:04", "remaining_time": "0:03:11", "throughput": 1776.55, "total_tokens": 221856} |
| {"current_steps": 715, "total_steps": 1800, "loss": 0.236, "lr": 0.0007550212948828377, "epoch": 7.944444444444445, "percentage": 39.72, "elapsed_time": "0:02:05", "remaining_time": "0:03:10", "throughput": 1777.87, "total_tokens": 223424} |
| {"current_steps": 720, "total_steps": 1800, "loss": 0.2306, "lr": 0.0007508392513130979, "epoch": 8.0, "percentage": 40.0, "elapsed_time": "0:02:06", "remaining_time": "0:03:09", "throughput": 1779.2, "total_tokens": 225056} |
| {"current_steps": 720, "total_steps": 1800, "eval_loss": 0.23065951466560364, "epoch": 8.0, "percentage": 40.0, "elapsed_time": "0:02:07", "remaining_time": "0:03:10", "throughput": 1767.87, "total_tokens": 225056} |
| {"current_steps": 725, "total_steps": 1800, "loss": 0.2306, "lr": 0.0007466336245930927, "epoch": 8.055555555555555, "percentage": 40.28, "elapsed_time": "0:02:09", "remaining_time": "0:03:11", "throughput": 1754.94, "total_tokens": 226656} |
| {"current_steps": 730, "total_steps": 1800, "loss": 0.2357, "lr": 0.0007424048101231686, "epoch": 8.11111111111111, "percentage": 40.56, "elapsed_time": "0:02:09", "remaining_time": "0:03:10", "throughput": 1755.71, "total_tokens": 228160} |
| {"current_steps": 735, "total_steps": 1800, "loss": 0.2263, "lr": 0.0007381532054837144, "epoch": 8.166666666666666, "percentage": 40.83, "elapsed_time": "0:02:10", "remaining_time": "0:03:09", "throughput": 1757.11, "total_tokens": 229728} |
| {"current_steps": 740, "total_steps": 1800, "loss": 0.2296, "lr": 0.0007338792103977821, "epoch": 8.222222222222221, "percentage": 41.11, "elapsed_time": "0:02:11", "remaining_time": "0:03:08", "throughput": 1758.72, "total_tokens": 231328} |
| {"current_steps": 745, "total_steps": 1800, "loss": 0.2318, "lr": 0.0007295832266935059, "epoch": 8.277777777777779, "percentage": 41.39, "elapsed_time": "0:02:12", "remaining_time": "0:03:07", "throughput": 1759.67, "total_tokens": 232832} |
| {"current_steps": 750, "total_steps": 1800, "loss": 0.228, "lr": 0.0007252656582663236, "epoch": 8.333333333333334, "percentage": 41.67, "elapsed_time": "0:02:13", "remaining_time": "0:03:06", "throughput": 1761.26, "total_tokens": 234432} |
| {"current_steps": 755, "total_steps": 1800, "loss": 0.2336, "lr": 0.0007209269110410039, "epoch": 8.38888888888889, "percentage": 41.94, "elapsed_time": "0:02:13", "remaining_time": "0:03:05", "throughput": 1762.4, "total_tokens": 235968} |
| {"current_steps": 760, "total_steps": 1800, "loss": 0.2291, "lr": 0.0007165673929334815, "epoch": 8.444444444444445, "percentage": 42.22, "elapsed_time": "0:02:14", "remaining_time": "0:03:04", "throughput": 1763.94, "total_tokens": 237568} |
| {"current_steps": 765, "total_steps": 1800, "loss": 0.2277, "lr": 0.0007121875138125077, "epoch": 8.5, "percentage": 42.5, "elapsed_time": "0:02:15", "remaining_time": "0:03:03", "throughput": 1765.47, "total_tokens": 239168} |
| {"current_steps": 770, "total_steps": 1800, "loss": 0.2328, "lr": 0.0007077876854611145, "epoch": 8.555555555555555, "percentage": 42.78, "elapsed_time": "0:02:16", "remaining_time": "0:03:02", "throughput": 1767.18, "total_tokens": 240800} |
| {"current_steps": 775, "total_steps": 1800, "loss": 0.2373, "lr": 0.0007033683215379002, "epoch": 8.61111111111111, "percentage": 43.06, "elapsed_time": "0:02:17", "remaining_time": "0:03:01", "throughput": 1768.46, "total_tokens": 242368} |
| {"current_steps": 780, "total_steps": 1800, "loss": 0.2295, "lr": 0.000698929837538139, "epoch": 8.666666666666666, "percentage": 43.33, "elapsed_time": "0:02:17", "remaining_time": "0:03:00", "throughput": 1769.3, "total_tokens": 243872} |
| {"current_steps": 785, "total_steps": 1800, "loss": 0.2352, "lr": 0.0006944726507547168, "epoch": 8.722222222222221, "percentage": 43.61, "elapsed_time": "0:02:18", "remaining_time": "0:02:59", "throughput": 1770.72, "total_tokens": 245472} |
| {"current_steps": 790, "total_steps": 1800, "loss": 0.237, "lr": 0.0006899971802388996, "epoch": 8.777777777777779, "percentage": 43.89, "elapsed_time": "0:02:19", "remaining_time": "0:02:58", "throughput": 1772.13, "total_tokens": 247072} |
| {"current_steps": 795, "total_steps": 1800, "loss": 0.2265, "lr": 0.0006855038467609335, "epoch": 8.833333333333334, "percentage": 44.17, "elapsed_time": "0:02:20", "remaining_time": "0:02:57", "throughput": 1773.28, "total_tokens": 248640} |
| {"current_steps": 800, "total_steps": 1800, "loss": 0.2305, "lr": 0.0006809930727704874, "epoch": 8.88888888888889, "percentage": 44.44, "elapsed_time": "0:02:21", "remaining_time": "0:02:56", "throughput": 1774.43, "total_tokens": 250208} |
| {"current_steps": 805, "total_steps": 1800, "loss": 0.2254, "lr": 0.0006764652823569344, "epoch": 8.944444444444445, "percentage": 44.72, "elapsed_time": "0:02:21", "remaining_time": "0:02:55", "throughput": 1775.42, "total_tokens": 251744} |
| {"current_steps": 810, "total_steps": 1800, "loss": 0.2271, "lr": 0.0006719209012094805, "epoch": 9.0, "percentage": 45.0, "elapsed_time": "0:02:22", "remaining_time": "0:02:54", "throughput": 1776.22, "total_tokens": 253312} |
| {"current_steps": 810, "total_steps": 1800, "eval_loss": 0.2339496612548828, "epoch": 9.0, "percentage": 45.0, "elapsed_time": "0:02:23", "remaining_time": "0:02:55", "throughput": 1766.16, "total_tokens": 253312} |
| {"current_steps": 815, "total_steps": 1800, "loss": 0.2324, "lr": 0.0006673603565771424, "epoch": 9.055555555555555, "percentage": 45.28, "elapsed_time": "0:02:25", "remaining_time": "0:02:55", "throughput": 1756.61, "total_tokens": 254944} |
| {"current_steps": 820, "total_steps": 1800, "loss": 0.235, "lr": 0.0006627840772285784, "epoch": 9.11111111111111, "percentage": 45.56, "elapsed_time": "0:02:25", "remaining_time": "0:02:54", "throughput": 1757.85, "total_tokens": 256512} |
| {"current_steps": 825, "total_steps": 1800, "loss": 0.2275, "lr": 0.0006581924934117783, "epoch": 9.166666666666666, "percentage": 45.83, "elapsed_time": "0:02:26", "remaining_time": "0:02:53", "throughput": 1758.7, "total_tokens": 258016} |
| {"current_steps": 830, "total_steps": 1800, "loss": 0.2159, "lr": 0.0006535860368136113, "epoch": 9.222222222222221, "percentage": 46.11, "elapsed_time": "0:02:27", "remaining_time": "0:02:52", "throughput": 1759.72, "total_tokens": 259552} |
| {"current_steps": 835, "total_steps": 1800, "loss": 0.2203, "lr": 0.0006489651405192409, "epoch": 9.277777777777779, "percentage": 46.39, "elapsed_time": "0:02:28", "remaining_time": "0:02:51", "throughput": 1761.13, "total_tokens": 261152} |
| {"current_steps": 840, "total_steps": 1800, "loss": 0.232, "lr": 0.0006443302389714074, "epoch": 9.333333333333334, "percentage": 46.67, "elapsed_time": "0:02:29", "remaining_time": "0:02:50", "throughput": 1762.34, "total_tokens": 262720} |
| {"current_steps": 845, "total_steps": 1800, "loss": 0.2329, "lr": 0.0006396817679295822, "epoch": 9.38888888888889, "percentage": 46.94, "elapsed_time": "0:02:29", "remaining_time": "0:02:49", "throughput": 1763.15, "total_tokens": 264224} |
| {"current_steps": 850, "total_steps": 1800, "loss": 0.2728, "lr": 0.0006350201644290005, "epoch": 9.444444444444445, "percentage": 47.22, "elapsed_time": "0:02:30", "remaining_time": "0:02:48", "throughput": 1764.32, "total_tokens": 265792} |
| {"current_steps": 855, "total_steps": 1800, "loss": 0.2346, "lr": 0.0006303458667395708, "epoch": 9.5, "percentage": 47.5, "elapsed_time": "0:02:31", "remaining_time": "0:02:47", "throughput": 1765.68, "total_tokens": 267392} |
| {"current_steps": 860, "total_steps": 1800, "loss": 0.242, "lr": 0.0006256593143246718, "epoch": 9.555555555555555, "percentage": 47.78, "elapsed_time": "0:02:32", "remaining_time": "0:02:46", "throughput": 1766.45, "total_tokens": 268896} |
| {"current_steps": 865, "total_steps": 1800, "loss": 0.2311, "lr": 0.0006209609477998338, "epoch": 9.61111111111111, "percentage": 48.06, "elapsed_time": "0:02:33", "remaining_time": "0:02:45", "throughput": 1767.41, "total_tokens": 270432} |
| {"current_steps": 870, "total_steps": 1800, "loss": 0.2314, "lr": 0.0006162512088913149, "epoch": 9.666666666666666, "percentage": 48.33, "elapsed_time": "0:02:33", "remaining_time": "0:02:44", "throughput": 1768.35, "total_tokens": 271968} |
| {"current_steps": 875, "total_steps": 1800, "loss": 0.2158, "lr": 0.0006115305403945697, "epoch": 9.722222222222221, "percentage": 48.61, "elapsed_time": "0:02:34", "remaining_time": "0:02:43", "throughput": 1769.27, "total_tokens": 273504} |
| {"current_steps": 880, "total_steps": 1800, "loss": 0.2641, "lr": 0.0006067993861326201, "epoch": 9.777777777777779, "percentage": 48.89, "elapsed_time": "0:02:35", "remaining_time": "0:02:42", "throughput": 1770.54, "total_tokens": 275104} |
| {"current_steps": 885, "total_steps": 1800, "loss": 0.2338, "lr": 0.0006020581909143279, "epoch": 9.833333333333334, "percentage": 49.17, "elapsed_time": "0:02:36", "remaining_time": "0:02:41", "throughput": 1771.6, "total_tokens": 276672} |
| {"current_steps": 890, "total_steps": 1800, "loss": 0.2548, "lr": 0.0005973074004925755, "epoch": 9.88888888888889, "percentage": 49.44, "elapsed_time": "0:02:36", "remaining_time": "0:02:40", "throughput": 1772.86, "total_tokens": 278272} |
| {"current_steps": 895, "total_steps": 1800, "loss": 0.2242, "lr": 0.0005925474615223572, "epoch": 9.944444444444445, "percentage": 49.72, "elapsed_time": "0:02:37", "remaining_time": "0:02:39", "throughput": 1773.76, "total_tokens": 279808} |
| {"current_steps": 900, "total_steps": 1800, "loss": 0.2352, "lr": 0.0005877788215187867, "epoch": 10.0, "percentage": 50.0, "elapsed_time": "0:02:38", "remaining_time": "0:02:38", "throughput": 1774.64, "total_tokens": 281408} |
| {"current_steps": 900, "total_steps": 1800, "eval_loss": 0.23249304294586182, "epoch": 10.0, "percentage": 50.0, "elapsed_time": "0:02:39", "remaining_time": "0:02:39", "throughput": 1764.08, "total_tokens": 281408} |
| {"current_steps": 905, "total_steps": 1800, "loss": 0.2309, "lr": 0.0005830019288150222, "epoch": 10.055555555555555, "percentage": 50.28, "elapsed_time": "0:02:41", "remaining_time": "0:02:39", "throughput": 1755.59, "total_tokens": 282944} |
| {"current_steps": 910, "total_steps": 1800, "loss": 0.2335, "lr": 0.0005782172325201155, "epoch": 10.11111111111111, "percentage": 50.56, "elapsed_time": "0:02:42", "remaining_time": "0:02:38", "throughput": 1755.94, "total_tokens": 284480} |
| {"current_steps": 915, "total_steps": 1800, "loss": 0.2332, "lr": 0.0005734251824767894, "epoch": 10.166666666666666, "percentage": 50.83, "elapsed_time": "0:02:42", "remaining_time": "0:02:37", "throughput": 1757.24, "total_tokens": 286080} |
| {"current_steps": 920, "total_steps": 1800, "loss": 0.2291, "lr": 0.0005686262292191438, "epoch": 10.222222222222221, "percentage": 51.11, "elapsed_time": "0:02:43", "remaining_time": "0:02:36", "throughput": 1758.54, "total_tokens": 287680} |
| {"current_steps": 925, "total_steps": 1800, "loss": 0.2366, "lr": 0.0005638208239302974, "epoch": 10.277777777777779, "percentage": 51.39, "elapsed_time": "0:02:44", "remaining_time": "0:02:35", "throughput": 1759.29, "total_tokens": 289184} |
| {"current_steps": 930, "total_steps": 1800, "loss": 0.2381, "lr": 0.0005590094183999698, "epoch": 10.333333333333334, "percentage": 51.67, "elapsed_time": "0:02:45", "remaining_time": "0:02:34", "throughput": 1760.58, "total_tokens": 290784} |
| {"current_steps": 935, "total_steps": 1800, "loss": 0.2282, "lr": 0.0005541924649820054, "epoch": 10.38888888888889, "percentage": 51.94, "elapsed_time": "0:02:45", "remaining_time": "0:02:33", "throughput": 1761.83, "total_tokens": 292384} |
| {"current_steps": 940, "total_steps": 1800, "loss": 0.2216, "lr": 0.000549370416551844, "epoch": 10.444444444444445, "percentage": 52.22, "elapsed_time": "0:02:46", "remaining_time": "0:02:32", "throughput": 1762.91, "total_tokens": 293952} |
| {"current_steps": 945, "total_steps": 1800, "loss": 0.2479, "lr": 0.0005445437264639432, "epoch": 10.5, "percentage": 52.5, "elapsed_time": "0:02:47", "remaining_time": "0:02:31", "throughput": 1763.81, "total_tokens": 295488} |
| {"current_steps": 950, "total_steps": 1800, "loss": 0.2327, "lr": 0.0005397128485091551, "epoch": 10.555555555555555, "percentage": 52.78, "elapsed_time": "0:02:48", "remaining_time": "0:02:30", "throughput": 1765.04, "total_tokens": 297088} |
| {"current_steps": 955, "total_steps": 1800, "loss": 0.2319, "lr": 0.0005348782368720626, "epoch": 10.61111111111111, "percentage": 53.06, "elapsed_time": "0:02:49", "remaining_time": "0:02:29", "throughput": 1766.24, "total_tokens": 298688} |
| {"current_steps": 960, "total_steps": 1800, "loss": 0.2255, "lr": 0.0005300403460882783, "epoch": 10.666666666666666, "percentage": 53.33, "elapsed_time": "0:02:49", "remaining_time": "0:02:28", "throughput": 1767.1, "total_tokens": 300224} |
| {"current_steps": 965, "total_steps": 1800, "loss": 0.2341, "lr": 0.00052519963100171, "epoch": 10.722222222222221, "percentage": 53.61, "elapsed_time": "0:02:50", "remaining_time": "0:02:27", "throughput": 1767.77, "total_tokens": 301728} |
| {"current_steps": 970, "total_steps": 1800, "loss": 0.2302, "lr": 0.000520356546721798, "epoch": 10.777777777777779, "percentage": 53.89, "elapsed_time": "0:02:51", "remaining_time": "0:02:26", "throughput": 1768.6, "total_tokens": 303264} |
| {"current_steps": 975, "total_steps": 1800, "loss": 0.228, "lr": 0.0005155115485807269, "epoch": 10.833333333333334, "percentage": 54.17, "elapsed_time": "0:02:52", "remaining_time": "0:02:25", "throughput": 1769.74, "total_tokens": 304864} |
| {"current_steps": 980, "total_steps": 1800, "loss": 0.2435, "lr": 0.0005106650920906171, "epoch": 10.88888888888889, "percentage": 54.44, "elapsed_time": "0:02:53", "remaining_time": "0:02:24", "throughput": 1770.24, "total_tokens": 306336} |
| {"current_steps": 985, "total_steps": 1800, "loss": 0.2242, "lr": 0.0005058176329006986, "epoch": 10.944444444444445, "percentage": 54.72, "elapsed_time": "0:02:53", "remaining_time": "0:02:23", "throughput": 1771.07, "total_tokens": 307872} |
| {"current_steps": 990, "total_steps": 1800, "loss": 0.2299, "lr": 0.0005009696267544715, "epoch": 11.0, "percentage": 55.0, "elapsed_time": "0:02:54", "remaining_time": "0:02:22", "throughput": 1771.73, "total_tokens": 309440} |
| {"current_steps": 990, "total_steps": 1800, "eval_loss": 0.2366514652967453, "epoch": 11.0, "percentage": 55.0, "elapsed_time": "0:02:55", "remaining_time": "0:02:23", "throughput": 1763.54, "total_tokens": 309440} |
| {"current_steps": 995, "total_steps": 1800, "loss": 0.2246, "lr": 0.0004961215294468599, "epoch": 11.055555555555555, "percentage": 55.28, "elapsed_time": "0:02:57", "remaining_time": "0:02:23", "throughput": 1752.59, "total_tokens": 310976} |
| {"current_steps": 1000, "total_steps": 1800, "loss": 0.2192, "lr": 0.0004912737967813582, "epoch": 11.11111111111111, "percentage": 55.56, "elapsed_time": "0:02:58", "remaining_time": "0:02:22", "throughput": 1753.62, "total_tokens": 312544} |
| {"current_steps": 1005, "total_steps": 1800, "loss": 0.2376, "lr": 0.0004864268845271786, "epoch": 11.166666666666666, "percentage": 55.83, "elapsed_time": "0:02:59", "remaining_time": "0:02:21", "throughput": 1754.49, "total_tokens": 314080} |
| {"current_steps": 1010, "total_steps": 1800, "loss": 0.2127, "lr": 0.0004815812483764, "epoch": 11.222222222222221, "percentage": 56.11, "elapsed_time": "0:02:59", "remaining_time": "0:02:20", "throughput": 1755.68, "total_tokens": 315680} |
| {"current_steps": 1015, "total_steps": 1800, "loss": 0.2301, "lr": 0.0004767373439011267, "epoch": 11.277777777777779, "percentage": 56.39, "elapsed_time": "0:03:00", "remaining_time": "0:02:19", "throughput": 1756.69, "total_tokens": 317248} |
| {"current_steps": 1020, "total_steps": 1800, "loss": 0.2318, "lr": 0.00047189562651065565, "epoch": 11.333333333333334, "percentage": 56.67, "elapsed_time": "0:03:01", "remaining_time": "0:02:18", "throughput": 1757.38, "total_tokens": 318752} |
| {"current_steps": 1025, "total_steps": 1800, "loss": 2.7226, "lr": 0.00046705655140866074, "epoch": 11.38888888888889, "percentage": 56.94, "elapsed_time": "0:03:02", "remaining_time": "0:02:17", "throughput": 1758.23, "total_tokens": 320288} |
| {"current_steps": 1030, "total_steps": 1800, "loss": 4.9885, "lr": 0.0004622205735503961, "epoch": 11.444444444444445, "percentage": 57.22, "elapsed_time": "0:03:02", "remaining_time": "0:02:16", "throughput": 1759.39, "total_tokens": 321888} |
| {"current_steps": 1035, "total_steps": 1800, "loss": 0.2258, "lr": 0.00045738814759992174, "epoch": 11.5, "percentage": 57.5, "elapsed_time": "0:03:03", "remaining_time": "0:02:15", "throughput": 1760.21, "total_tokens": 323424} |
| {"current_steps": 1040, "total_steps": 1800, "loss": 0.2283, "lr": 0.00045255972788735873, "epoch": 11.555555555555555, "percentage": 57.78, "elapsed_time": "0:03:04", "remaining_time": "0:02:14", "throughput": 1761.03, "total_tokens": 324960} |
| {"current_steps": 1045, "total_steps": 1800, "loss": 0.2263, "lr": 0.00044773576836617336, "epoch": 11.61111111111111, "percentage": 58.06, "elapsed_time": "0:03:05", "remaining_time": "0:02:13", "throughput": 1762.15, "total_tokens": 326560} |
| {"current_steps": 1050, "total_steps": 1800, "loss": 0.2267, "lr": 0.000442916722570498, "epoch": 11.666666666666666, "percentage": 58.33, "elapsed_time": "0:03:06", "remaining_time": "0:02:12", "throughput": 1763.09, "total_tokens": 328128} |
| {"current_steps": 1055, "total_steps": 1800, "loss": 0.2337, "lr": 0.0004381030435724919, "epoch": 11.722222222222221, "percentage": 58.61, "elapsed_time": "0:03:06", "remaining_time": "0:02:11", "throughput": 1764.02, "total_tokens": 329696} |
| {"current_steps": 1060, "total_steps": 1800, "loss": 0.2236, "lr": 0.00043329518393974364, "epoch": 11.777777777777779, "percentage": 58.89, "elapsed_time": "0:03:07", "remaining_time": "0:02:11", "throughput": 1764.91, "total_tokens": 331264} |
| {"current_steps": 1065, "total_steps": 1800, "loss": 0.2353, "lr": 0.0004284935956927229, "epoch": 11.833333333333334, "percentage": 59.17, "elapsed_time": "0:03:08", "remaining_time": "0:02:10", "throughput": 1765.98, "total_tokens": 332864} |
| {"current_steps": 1070, "total_steps": 1800, "loss": 0.2308, "lr": 0.00042369873026228263, "epoch": 11.88888888888889, "percentage": 59.44, "elapsed_time": "0:03:09", "remaining_time": "0:02:09", "throughput": 1766.9, "total_tokens": 334432} |
| {"current_steps": 1075, "total_steps": 1800, "loss": 0.2291, "lr": 0.00041891103844721633, "epoch": 11.944444444444445, "percentage": 59.72, "elapsed_time": "0:03:10", "remaining_time": "0:02:08", "throughput": 1767.53, "total_tokens": 335936} |
| {"current_steps": 1080, "total_steps": 1800, "loss": 0.2327, "lr": 0.00041413097037187657, "epoch": 12.0, "percentage": 60.0, "elapsed_time": "0:03:10", "remaining_time": "0:02:07", "throughput": 1768.3, "total_tokens": 337536} |
| {"current_steps": 1080, "total_steps": 1800, "eval_loss": 0.23371091485023499, "epoch": 12.0, "percentage": 60.0, "elapsed_time": "0:03:11", "remaining_time": "0:02:07", "throughput": 1760.81, "total_tokens": 337536} |
| {"current_steps": 1085, "total_steps": 1800, "loss": 0.2355, "lr": 0.00040935897544385424, "epoch": 12.055555555555555, "percentage": 60.28, "elapsed_time": "0:03:13", "remaining_time": "0:02:07", "throughput": 1754.02, "total_tokens": 339168} |
| {"current_steps": 1090, "total_steps": 1800, "loss": 0.2323, "lr": 0.0004045955023117276, "epoch": 12.11111111111111, "percentage": 60.56, "elapsed_time": "0:03:14", "remaining_time": "0:02:06", "throughput": 1755.11, "total_tokens": 340768} |
| {"current_steps": 1095, "total_steps": 1800, "loss": 0.2219, "lr": 0.00039984099882288133, "epoch": 12.166666666666666, "percentage": 60.83, "elapsed_time": "0:03:14", "remaining_time": "0:02:05", "throughput": 1756.05, "total_tokens": 342336} |
| {"current_steps": 1100, "total_steps": 1800, "loss": 0.2242, "lr": 0.0003950959119814013, "epoch": 12.222222222222221, "percentage": 61.11, "elapsed_time": "0:03:15", "remaining_time": "0:02:04", "throughput": 1756.99, "total_tokens": 343904} |
| {"current_steps": 1105, "total_steps": 1800, "loss": 0.2147, "lr": 0.0003903606879060483, "epoch": 12.277777777777779, "percentage": 61.39, "elapsed_time": "0:03:16", "remaining_time": "0:02:03", "throughput": 1757.78, "total_tokens": 345440} |
| {"current_steps": 1110, "total_steps": 1800, "loss": 0.2338, "lr": 0.0003856357717883161, "epoch": 12.333333333333334, "percentage": 61.67, "elapsed_time": "0:03:17", "remaining_time": "0:02:02", "throughput": 1758.57, "total_tokens": 346976} |
| {"current_steps": 1115, "total_steps": 1800, "loss": 0.192, "lr": 0.00038092160785057466, "epoch": 12.38888888888889, "percentage": 61.94, "elapsed_time": "0:03:18", "remaining_time": "0:02:01", "throughput": 1759.34, "total_tokens": 348512} |
| {"current_steps": 1120, "total_steps": 1800, "loss": 0.2222, "lr": 0.00037621863930430713, "epoch": 12.444444444444445, "percentage": 62.22, "elapsed_time": "0:03:18", "remaining_time": "0:02:00", "throughput": 1759.96, "total_tokens": 350016} |
| {"current_steps": 1125, "total_steps": 1800, "loss": 0.2135, "lr": 0.000371527308308439, "epoch": 12.5, "percentage": 62.5, "elapsed_time": "0:03:19", "remaining_time": "0:01:59", "throughput": 1760.72, "total_tokens": 351552} |
| {"current_steps": 1130, "total_steps": 1800, "loss": 0.1966, "lr": 0.00036684805592776895, "epoch": 12.555555555555555, "percentage": 62.78, "elapsed_time": "0:03:20", "remaining_time": "0:01:58", "throughput": 1761.62, "total_tokens": 353120} |
| {"current_steps": 1135, "total_steps": 1800, "loss": 0.2514, "lr": 0.00036218132209150044, "epoch": 12.61111111111111, "percentage": 63.06, "elapsed_time": "0:03:21", "remaining_time": "0:01:57", "throughput": 1762.37, "total_tokens": 354656} |
| {"current_steps": 1140, "total_steps": 1800, "loss": 0.2228, "lr": 0.0003575275455518811, "epoch": 12.666666666666666, "percentage": 63.33, "elapsed_time": "0:03:22", "remaining_time": "0:01:56", "throughput": 1763.23, "total_tokens": 356224} |
| {"current_steps": 1145, "total_steps": 1800, "loss": 0.226, "lr": 0.00035288716384295236, "epoch": 12.722222222222221, "percentage": 63.61, "elapsed_time": "0:03:22", "remaining_time": "0:01:56", "throughput": 1763.95, "total_tokens": 357760} |
| {"current_steps": 1150, "total_steps": 1800, "loss": 0.2353, "lr": 0.00034826061323941484, "epoch": 12.777777777777779, "percentage": 63.89, "elapsed_time": "0:03:23", "remaining_time": "0:01:55", "throughput": 1765.08, "total_tokens": 359392} |
| {"current_steps": 1155, "total_steps": 1800, "loss": 0.2289, "lr": 0.0003436483287156091, "epoch": 12.833333333333334, "percentage": 64.17, "elapsed_time": "0:03:24", "remaining_time": "0:01:54", "throughput": 1766.08, "total_tokens": 360992} |
| {"current_steps": 1160, "total_steps": 1800, "loss": 0.241, "lr": 0.000339050743904623, "epoch": 12.88888888888889, "percentage": 64.44, "elapsed_time": "0:03:25", "remaining_time": "0:01:53", "throughput": 1766.95, "total_tokens": 362560} |
| {"current_steps": 1165, "total_steps": 1800, "loss": 0.2318, "lr": 0.000334468291057521, "epoch": 12.944444444444445, "percentage": 64.72, "elapsed_time": "0:03:25", "remaining_time": "0:01:52", "throughput": 1767.95, "total_tokens": 364160} |
| {"current_steps": 1170, "total_steps": 1800, "loss": 0.2405, "lr": 0.00032990140100270637, "epoch": 13.0, "percentage": 65.0, "elapsed_time": "0:03:26", "remaining_time": "0:01:51", "throughput": 1768.66, "total_tokens": 365760} |
| {"current_steps": 1170, "total_steps": 1800, "eval_loss": 0.23510269820690155, "epoch": 13.0, "percentage": 65.0, "elapsed_time": "0:03:27", "remaining_time": "0:01:51", "throughput": 1761.74, "total_tokens": 365760} |
| {"current_steps": 1175, "total_steps": 1800, "loss": 0.2285, "lr": 0.0003253505031054155, "epoch": 13.055555555555555, "percentage": 65.28, "elapsed_time": "0:03:29", "remaining_time": "0:01:51", "throughput": 1755.55, "total_tokens": 367392} |
| {"current_steps": 1180, "total_steps": 1800, "loss": 0.2061, "lr": 0.00032081602522734986, "epoch": 13.11111111111111, "percentage": 65.56, "elapsed_time": "0:03:30", "remaining_time": "0:01:50", "throughput": 1756.42, "total_tokens": 368960} |
| {"current_steps": 1185, "total_steps": 1800, "loss": 0.2391, "lr": 0.00031629839368645086, "epoch": 13.166666666666666, "percentage": 65.83, "elapsed_time": "0:03:30", "remaining_time": "0:01:49", "throughput": 1757.0, "total_tokens": 370464} |
| {"current_steps": 1190, "total_steps": 1800, "loss": 0.2258, "lr": 0.0003117980332168179, "epoch": 13.222222222222221, "percentage": 66.11, "elapsed_time": "0:03:31", "remaining_time": "0:01:48", "throughput": 1757.87, "total_tokens": 372032} |
| {"current_steps": 1195, "total_steps": 1800, "loss": 0.2414, "lr": 0.00030731536692877595, "epoch": 13.277777777777779, "percentage": 66.39, "elapsed_time": "0:03:32", "remaining_time": "0:01:47", "throughput": 1758.83, "total_tokens": 373632} |
| {"current_steps": 1200, "total_steps": 1800, "loss": 0.2149, "lr": 0.0003028508162690967, "epoch": 13.333333333333334, "percentage": 66.67, "elapsed_time": "0:03:33", "remaining_time": "0:01:46", "throughput": 1759.82, "total_tokens": 375232} |
| {"current_steps": 1205, "total_steps": 1800, "loss": 0.2023, "lr": 0.000298404800981375, "epoch": 13.38888888888889, "percentage": 66.94, "elapsed_time": "0:03:34", "remaining_time": "0:01:45", "throughput": 1760.65, "total_tokens": 376800} |
| {"current_steps": 1210, "total_steps": 1800, "loss": 0.2082, "lr": 0.0002939777390665658, "epoch": 13.444444444444445, "percentage": 67.22, "elapsed_time": "0:03:34", "remaining_time": "0:01:44", "throughput": 1761.36, "total_tokens": 378336} |
| {"current_steps": 1215, "total_steps": 1800, "loss": 0.2202, "lr": 0.0002895700467436855, "epoch": 13.5, "percentage": 67.5, "elapsed_time": "0:03:35", "remaining_time": "0:01:43", "throughput": 1762.46, "total_tokens": 379968} |
| {"current_steps": 1220, "total_steps": 1800, "loss": 0.2422, "lr": 0.00028518213841067906, "epoch": 13.555555555555555, "percentage": 67.78, "elapsed_time": "0:03:36", "remaining_time": "0:01:42", "throughput": 1762.97, "total_tokens": 381472} |
| {"current_steps": 1225, "total_steps": 1800, "loss": 0.2148, "lr": 0.00028081442660546124, "epoch": 13.61111111111111, "percentage": 68.06, "elapsed_time": "0:03:37", "remaining_time": "0:01:41", "throughput": 1763.59, "total_tokens": 383008} |
| {"current_steps": 1230, "total_steps": 1800, "loss": 0.2203, "lr": 0.00027646732196712974, "epoch": 13.666666666666666, "percentage": 68.33, "elapsed_time": "0:03:37", "remaining_time": "0:01:41", "throughput": 1764.22, "total_tokens": 384544} |
| {"current_steps": 1235, "total_steps": 1800, "loss": 0.2006, "lr": 0.00027214123319735785, "epoch": 13.722222222222221, "percentage": 68.61, "elapsed_time": "0:03:38", "remaining_time": "0:01:40", "throughput": 1764.87, "total_tokens": 386080} |
| {"current_steps": 1240, "total_steps": 1800, "loss": 0.2133, "lr": 0.00026783656702197156, "epoch": 13.777777777777779, "percentage": 68.89, "elapsed_time": "0:03:39", "remaining_time": "0:01:39", "throughput": 1765.39, "total_tokens": 387584} |
| {"current_steps": 1245, "total_steps": 1800, "loss": 0.1693, "lr": 0.00026355372815270835, "epoch": 13.833333333333334, "percentage": 69.17, "elapsed_time": "0:03:40", "remaining_time": "0:01:38", "throughput": 1766.17, "total_tokens": 389152} |
| {"current_steps": 1250, "total_steps": 1800, "loss": 0.1638, "lr": 0.000259293119249168, "epoch": 13.88888888888889, "percentage": 69.44, "elapsed_time": "0:03:41", "remaining_time": "0:01:37", "throughput": 1766.8, "total_tokens": 390688} |
| {"current_steps": 1255, "total_steps": 1800, "loss": 0.1893, "lr": 0.00025505514088095655, "epoch": 13.944444444444445, "percentage": 69.72, "elapsed_time": "0:03:41", "remaining_time": "0:01:36", "throughput": 1767.68, "total_tokens": 392288} |
| {"current_steps": 1260, "total_steps": 1800, "loss": 0.1816, "lr": 0.0002508401914900249, "epoch": 14.0, "percentage": 70.0, "elapsed_time": "0:03:42", "remaining_time": "0:01:35", "throughput": 1768.18, "total_tokens": 393856} |
| {"current_steps": 1260, "total_steps": 1800, "eval_loss": 0.24969133734703064, "epoch": 14.0, "percentage": 70.0, "elapsed_time": "0:03:43", "remaining_time": "0:01:35", "throughput": 1761.71, "total_tokens": 393856} |
| {"current_steps": 1265, "total_steps": 1800, "loss": 0.2196, "lr": 0.00024664866735320885, "epoch": 14.055555555555555, "percentage": 70.28, "elapsed_time": "0:03:45", "remaining_time": "0:01:35", "throughput": 1755.3, "total_tokens": 395392} |
| {"current_steps": 1270, "total_steps": 1800, "loss": 0.2137, "lr": 0.00024248096254497287, "epoch": 14.11111111111111, "percentage": 70.56, "elapsed_time": "0:03:46", "remaining_time": "0:01:34", "throughput": 1756.19, "total_tokens": 396992} |
| {"current_steps": 1275, "total_steps": 1800, "loss": 0.2051, "lr": 0.00023833746890035963, "epoch": 14.166666666666666, "percentage": 70.83, "elapsed_time": "0:03:46", "remaining_time": "0:01:33", "throughput": 1756.84, "total_tokens": 398528} |
| {"current_steps": 1280, "total_steps": 1800, "loss": 0.2416, "lr": 0.0002342185759781511, "epoch": 14.222222222222221, "percentage": 71.11, "elapsed_time": "0:03:47", "remaining_time": "0:01:32", "throughput": 1757.48, "total_tokens": 400064} |
| {"current_steps": 1285, "total_steps": 1800, "loss": 0.2105, "lr": 0.00023012467102424372, "epoch": 14.277777777777779, "percentage": 71.39, "elapsed_time": "0:03:48", "remaining_time": "0:01:31", "throughput": 1758.16, "total_tokens": 401600} |
| {"current_steps": 1290, "total_steps": 1800, "loss": 0.1923, "lr": 0.00022605613893524008, "epoch": 14.333333333333334, "percentage": 71.67, "elapsed_time": "0:03:49", "remaining_time": "0:01:30", "throughput": 1758.71, "total_tokens": 403104} |
| {"current_steps": 1295, "total_steps": 1800, "loss": 0.2129, "lr": 0.00022201336222226332, "epoch": 14.38888888888889, "percentage": 71.94, "elapsed_time": "0:03:49", "remaining_time": "0:01:29", "throughput": 1759.37, "total_tokens": 404640} |
| {"current_steps": 1300, "total_steps": 1800, "loss": 0.1868, "lr": 0.0002179967209749929, "epoch": 14.444444444444445, "percentage": 72.22, "elapsed_time": "0:03:50", "remaining_time": "0:01:28", "throughput": 1760.09, "total_tokens": 406208} |
| {"current_steps": 1305, "total_steps": 1800, "loss": 0.2073, "lr": 0.00021400659282593083, "epoch": 14.5, "percentage": 72.5, "elapsed_time": "0:03:51", "remaining_time": "0:01:27", "throughput": 1760.93, "total_tokens": 407808} |
| {"current_steps": 1310, "total_steps": 1800, "loss": 0.1882, "lr": 0.0002100433529148979, "epoch": 14.555555555555555, "percentage": 72.78, "elapsed_time": "0:03:52", "remaining_time": "0:01:26", "throughput": 1761.88, "total_tokens": 409440} |
| {"current_steps": 1315, "total_steps": 1800, "loss": 0.2037, "lr": 0.00020610737385376348, "epoch": 14.61111111111111, "percentage": 73.06, "elapsed_time": "0:03:53", "remaining_time": "0:01:26", "throughput": 1762.46, "total_tokens": 410976} |
| {"current_steps": 1320, "total_steps": 1800, "loss": 0.1598, "lr": 0.00020219902569141402, "epoch": 14.666666666666666, "percentage": 73.33, "elapsed_time": "0:03:53", "remaining_time": "0:01:25", "throughput": 1763.28, "total_tokens": 412576} |
| {"current_steps": 1325, "total_steps": 1800, "loss": 0.2255, "lr": 0.00019831867587896218, "epoch": 14.722222222222221, "percentage": 73.61, "elapsed_time": "0:03:54", "remaining_time": "0:01:24", "throughput": 1764.1, "total_tokens": 414176} |
| {"current_steps": 1330, "total_steps": 1800, "loss": 0.224, "lr": 0.0001944666892352001, "epoch": 14.777777777777779, "percentage": 73.89, "elapsed_time": "0:03:55", "remaining_time": "0:01:23", "throughput": 1764.78, "total_tokens": 415744} |
| {"current_steps": 1335, "total_steps": 1800, "loss": 0.2247, "lr": 0.00019064342791230072, "epoch": 14.833333333333334, "percentage": 74.17, "elapsed_time": "0:03:56", "remaining_time": "0:01:22", "throughput": 1765.48, "total_tokens": 417312} |
| {"current_steps": 1340, "total_steps": 1800, "loss": 0.2287, "lr": 0.00018684925136176834, "epoch": 14.88888888888889, "percentage": 74.44, "elapsed_time": "0:03:57", "remaining_time": "0:01:21", "throughput": 1766.16, "total_tokens": 418880} |
| {"current_steps": 1345, "total_steps": 1800, "loss": 0.1834, "lr": 0.0001830845163006448, "epoch": 14.944444444444445, "percentage": 74.72, "elapsed_time": "0:03:57", "remaining_time": "0:01:20", "throughput": 1766.72, "total_tokens": 420416} |
| {"current_steps": 1350, "total_steps": 1800, "loss": 0.1983, "lr": 0.00017934957667797225, "epoch": 15.0, "percentage": 75.0, "elapsed_time": "0:03:58", "remaining_time": "0:01:19", "throughput": 1767.05, "total_tokens": 421952} |
| {"current_steps": 1350, "total_steps": 1800, "eval_loss": 0.2575128674507141, "epoch": 15.0, "percentage": 75.0, "elapsed_time": "0:03:59", "remaining_time": "0:01:19", "throughput": 1761.0, "total_tokens": 421952} |
| {"current_steps": 1355, "total_steps": 1800, "loss": 0.1594, "lr": 0.000175644783641515, "epoch": 15.055555555555555, "percentage": 75.28, "elapsed_time": "0:04:01", "remaining_time": "0:01:19", "throughput": 1755.2, "total_tokens": 423552} |
| {"current_steps": 1360, "total_steps": 1800, "loss": 0.1658, "lr": 0.00017197048550474643, "epoch": 15.11111111111111, "percentage": 75.56, "elapsed_time": "0:04:02", "remaining_time": "0:01:18", "throughput": 1755.95, "total_tokens": 425120} |
| {"current_steps": 1365, "total_steps": 1800, "loss": 0.2419, "lr": 0.0001683270277141014, "epoch": 15.166666666666666, "percentage": 75.83, "elapsed_time": "0:04:02", "remaining_time": "0:01:17", "throughput": 1756.77, "total_tokens": 426720} |
| {"current_steps": 1370, "total_steps": 1800, "loss": 0.1501, "lr": 0.00016471475281649818, "epoch": 15.222222222222221, "percentage": 76.11, "elapsed_time": "0:04:03", "remaining_time": "0:01:16", "throughput": 1757.64, "total_tokens": 428320} |
| {"current_steps": 1375, "total_steps": 1800, "loss": 0.1381, "lr": 0.0001611340004271339, "epoch": 15.277777777777779, "percentage": 76.39, "elapsed_time": "0:04:04", "remaining_time": "0:01:15", "throughput": 1757.91, "total_tokens": 429760} |
| {"current_steps": 1380, "total_steps": 1800, "loss": 0.2387, "lr": 0.0001575851071975541, "epoch": 15.333333333333334, "percentage": 76.67, "elapsed_time": "0:04:05", "remaining_time": "0:01:14", "throughput": 1758.78, "total_tokens": 431360} |
| {"current_steps": 1385, "total_steps": 1800, "loss": 0.1225, "lr": 0.00015406840678400203, "epoch": 15.38888888888889, "percentage": 76.94, "elapsed_time": "0:04:06", "remaining_time": "0:01:13", "throughput": 1759.64, "total_tokens": 432960} |
| {"current_steps": 1390, "total_steps": 1800, "loss": 0.3421, "lr": 0.00015058422981604997, "epoch": 15.444444444444445, "percentage": 77.22, "elapsed_time": "0:04:06", "remaining_time": "0:01:12", "throughput": 1760.14, "total_tokens": 434464} |
| {"current_steps": 1395, "total_steps": 1800, "loss": 0.1961, "lr": 0.00014713290386551348, "epoch": 15.5, "percentage": 77.5, "elapsed_time": "0:04:07", "remaining_time": "0:01:11", "throughput": 1760.87, "total_tokens": 436032} |
| {"current_steps": 1400, "total_steps": 1800, "loss": 0.1867, "lr": 0.00014371475341565454, "epoch": 15.555555555555555, "percentage": 77.78, "elapsed_time": "0:04:08", "remaining_time": "0:01:10", "throughput": 1761.37, "total_tokens": 437536} |
| {"current_steps": 1405, "total_steps": 1800, "loss": 0.1876, "lr": 0.00014033009983067452, "epoch": 15.61111111111111, "percentage": 78.06, "elapsed_time": "0:04:09", "remaining_time": "0:01:10", "throughput": 1762.21, "total_tokens": 439136} |
| {"current_steps": 1410, "total_steps": 1800, "loss": 0.1643, "lr": 0.00013697926132550054, "epoch": 15.666666666666666, "percentage": 78.33, "elapsed_time": "0:04:09", "remaining_time": "0:01:09", "throughput": 1762.94, "total_tokens": 440704} |
| {"current_steps": 1415, "total_steps": 1800, "loss": 0.2097, "lr": 0.0001336625529358682, "epoch": 15.722222222222221, "percentage": 78.61, "elapsed_time": "0:04:10", "remaining_time": "0:01:08", "throughput": 1763.43, "total_tokens": 442208} |
| {"current_steps": 1420, "total_steps": 1800, "loss": 0.1782, "lr": 0.00013038028648870205, "epoch": 15.777777777777779, "percentage": 78.89, "elapsed_time": "0:04:11", "remaining_time": "0:01:07", "throughput": 1764.03, "total_tokens": 443744} |
| {"current_steps": 1425, "total_steps": 1800, "loss": 0.1992, "lr": 0.0001271327705727991, "epoch": 15.833333333333334, "percentage": 79.17, "elapsed_time": "0:04:12", "remaining_time": "0:01:06", "throughput": 1764.96, "total_tokens": 445376} |
| {"current_steps": 1430, "total_steps": 1800, "loss": 0.1773, "lr": 0.0001239203105098165, "epoch": 15.88888888888889, "percentage": 79.44, "elapsed_time": "0:04:13", "remaining_time": "0:01:05", "throughput": 1765.67, "total_tokens": 446944} |
| {"current_steps": 1435, "total_steps": 1800, "loss": 0.1623, "lr": 0.00012074320832556557, "epoch": 15.944444444444445, "percentage": 79.72, "elapsed_time": "0:04:13", "remaining_time": "0:01:04", "throughput": 1766.37, "total_tokens": 448512} |
| {"current_steps": 1440, "total_steps": 1800, "loss": 0.1913, "lr": 0.00011760176272161627, "epoch": 16.0, "percentage": 80.0, "elapsed_time": "0:04:14", "remaining_time": "0:01:03", "throughput": 1766.61, "total_tokens": 450016} |
| {"current_steps": 1440, "total_steps": 1800, "eval_loss": 0.2835889160633087, "epoch": 16.0, "percentage": 80.0, "elapsed_time": "0:04:15", "remaining_time": "0:01:03", "throughput": 1760.99, "total_tokens": 450016} |
| {"current_steps": 1445, "total_steps": 1800, "loss": 0.0924, "lr": 0.00011449626904721472, "epoch": 16.055555555555557, "percentage": 80.28, "elapsed_time": "0:04:17", "remaining_time": "0:01:03", "throughput": 1753.24, "total_tokens": 451584} |
| {"current_steps": 1450, "total_steps": 1800, "loss": 0.1364, "lr": 0.00011142701927151455, "epoch": 16.11111111111111, "percentage": 80.56, "elapsed_time": "0:04:18", "remaining_time": "0:01:02", "throughput": 1753.93, "total_tokens": 453152} |
| {"current_steps": 1455, "total_steps": 1800, "loss": 0.1256, "lr": 0.00010839430195612793, "epoch": 16.166666666666668, "percentage": 80.83, "elapsed_time": "0:04:19", "remaining_time": "0:01:01", "throughput": 1754.74, "total_tokens": 454752} |
| {"current_steps": 1460, "total_steps": 1800, "loss": 0.0619, "lr": 0.00010539840222799463, "epoch": 16.22222222222222, "percentage": 81.11, "elapsed_time": "0:04:19", "remaining_time": "0:01:00", "throughput": 1755.29, "total_tokens": 456288} |
| {"current_steps": 1465, "total_steps": 1800, "loss": 0.1474, "lr": 0.00010243960175257604, "epoch": 16.27777777777778, "percentage": 81.39, "elapsed_time": "0:04:20", "remaining_time": "0:00:59", "throughput": 1756.06, "total_tokens": 457888} |
| {"current_steps": 1470, "total_steps": 1800, "loss": 0.2436, "lr": 9.9518178707374e-05, "epoch": 16.333333333333332, "percentage": 81.67, "elapsed_time": "0:04:21", "remaining_time": "0:00:58", "throughput": 1756.72, "total_tokens": 459456} |
| {"current_steps": 1475, "total_steps": 1800, "loss": 0.1549, "lr": 9.663440775577653e-05, "epoch": 16.38888888888889, "percentage": 81.94, "elapsed_time": "0:04:22", "remaining_time": "0:00:57", "throughput": 1757.27, "total_tokens": 460992} |
| {"current_steps": 1480, "total_steps": 1800, "loss": 0.1663, "lr": 9.378856002123548e-05, "epoch": 16.444444444444443, "percentage": 82.22, "elapsed_time": "0:04:23", "remaining_time": "0:00:56", "throughput": 1757.94, "total_tokens": 462560} |
| {"current_steps": 1485, "total_steps": 1800, "loss": 0.2019, "lr": 9.098090306177625e-05, "epoch": 16.5, "percentage": 82.5, "elapsed_time": "0:04:23", "remaining_time": "0:00:55", "throughput": 1758.71, "total_tokens": 464160} |
| {"current_steps": 1490, "total_steps": 1800, "loss": 0.1989, "lr": 8.821170084484247e-05, "epoch": 16.555555555555557, "percentage": 82.78, "elapsed_time": "0:04:24", "remaining_time": "0:00:55", "throughput": 1759.33, "total_tokens": 465728} |
| {"current_steps": 1495, "total_steps": 1800, "loss": 0.1467, "lr": 8.548121372247918e-05, "epoch": 16.61111111111111, "percentage": 83.06, "elapsed_time": "0:04:25", "remaining_time": "0:00:54", "throughput": 1759.87, "total_tokens": 467264} |
| {"current_steps": 1500, "total_steps": 1800, "loss": 0.1701, "lr": 8.278969840685458e-05, "epoch": 16.666666666666668, "percentage": 83.33, "elapsed_time": "0:04:26", "remaining_time": "0:00:53", "throughput": 1760.45, "total_tokens": 468800} |
| {"current_steps": 1505, "total_steps": 1800, "loss": 0.1745, "lr": 8.013740794612512e-05, "epoch": 16.72222222222222, "percentage": 83.61, "elapsed_time": "0:04:27", "remaining_time": "0:00:52", "throughput": 1761.13, "total_tokens": 470368} |
| {"current_steps": 1510, "total_steps": 1800, "loss": 0.1542, "lr": 7.752459170064491e-05, "epoch": 16.77777777777778, "percentage": 83.89, "elapsed_time": "0:04:27", "remaining_time": "0:00:51", "throughput": 1761.91, "total_tokens": 471968} |
| {"current_steps": 1515, "total_steps": 1800, "loss": 0.1096, "lr": 7.4951495319521e-05, "epoch": 16.833333333333332, "percentage": 84.17, "elapsed_time": "0:04:28", "remaining_time": "0:00:50", "throughput": 1762.67, "total_tokens": 473568} |
| {"current_steps": 1520, "total_steps": 1800, "loss": 0.2386, "lr": 7.241836071751879e-05, "epoch": 16.88888888888889, "percentage": 84.44, "elapsed_time": "0:04:29", "remaining_time": "0:00:49", "throughput": 1763.3, "total_tokens": 475136} |
| {"current_steps": 1525, "total_steps": 1800, "loss": 0.1709, "lr": 6.992542605231739e-05, "epoch": 16.944444444444443, "percentage": 84.72, "elapsed_time": "0:04:30", "remaining_time": "0:00:48", "throughput": 1763.82, "total_tokens": 476672} |
| {"current_steps": 1530, "total_steps": 1800, "loss": 0.1652, "lr": 6.747292570211916e-05, "epoch": 17.0, "percentage": 85.0, "elapsed_time": "0:04:31", "remaining_time": "0:00:47", "throughput": 1764.26, "total_tokens": 478240} |
| {"current_steps": 1530, "total_steps": 1800, "eval_loss": 0.3111337721347809, "epoch": 17.0, "percentage": 85.0, "elapsed_time": "0:04:31", "remaining_time": "0:00:47", "throughput": 1758.95, "total_tokens": 478240} |
| {"current_steps": 1535, "total_steps": 1800, "loss": 0.1318, "lr": 6.506109024361429e-05, "epoch": 17.055555555555557, "percentage": 85.28, "elapsed_time": "0:04:33", "remaining_time": "0:00:47", "throughput": 1753.8, "total_tokens": 479840} |
| {"current_steps": 1540, "total_steps": 1800, "loss": 0.0779, "lr": 6.269014643030213e-05, "epoch": 17.11111111111111, "percentage": 85.56, "elapsed_time": "0:04:34", "remaining_time": "0:00:46", "throughput": 1754.33, "total_tokens": 481376} |
| {"current_steps": 1545, "total_steps": 1800, "loss": 0.1361, "lr": 6.0360317171172794e-05, "epoch": 17.166666666666668, "percentage": 85.83, "elapsed_time": "0:04:35", "remaining_time": "0:00:45", "throughput": 1754.76, "total_tokens": 482880} |
| {"current_steps": 1550, "total_steps": 1800, "loss": 0.0496, "lr": 5.807182150975027e-05, "epoch": 17.22222222222222, "percentage": 86.11, "elapsed_time": "0:04:35", "remaining_time": "0:00:44", "throughput": 1755.4, "total_tokens": 484448} |
| {"current_steps": 1555, "total_steps": 1800, "loss": 0.0952, "lr": 5.5824874603498056e-05, "epoch": 17.27777777777778, "percentage": 86.39, "elapsed_time": "0:04:36", "remaining_time": "0:00:43", "throughput": 1756.04, "total_tokens": 486016} |
| {"current_steps": 1560, "total_steps": 1800, "loss": 0.1348, "lr": 5.361968770359071e-05, "epoch": 17.333333333333332, "percentage": 86.67, "elapsed_time": "0:04:37", "remaining_time": "0:00:42", "throughput": 1756.38, "total_tokens": 487488} |
| {"current_steps": 1565, "total_steps": 1800, "loss": 0.2422, "lr": 5.145646813505339e-05, "epoch": 17.38888888888889, "percentage": 86.94, "elapsed_time": "0:04:38", "remaining_time": "0:00:41", "throughput": 1757.03, "total_tokens": 489056} |
| {"current_steps": 1570, "total_steps": 1800, "loss": 0.126, "lr": 4.933541927726887e-05, "epoch": 17.444444444444443, "percentage": 87.22, "elapsed_time": "0:04:39", "remaining_time": "0:00:40", "throughput": 1757.48, "total_tokens": 490560} |
| {"current_steps": 1575, "total_steps": 1800, "loss": 0.0652, "lr": 4.725674054485712e-05, "epoch": 17.5, "percentage": 87.5, "elapsed_time": "0:04:39", "remaining_time": "0:00:39", "throughput": 1758.34, "total_tokens": 492192} |
| {"current_steps": 1580, "total_steps": 1800, "loss": 0.1534, "lr": 4.522062736892635e-05, "epoch": 17.555555555555557, "percentage": 87.78, "elapsed_time": "0:04:40", "remaining_time": "0:00:39", "throughput": 1758.96, "total_tokens": 493760} |
| {"current_steps": 1585, "total_steps": 1800, "loss": 0.0876, "lr": 4.322727117869951e-05, "epoch": 17.61111111111111, "percentage": 88.06, "elapsed_time": "0:04:41", "remaining_time": "0:00:38", "throughput": 1759.45, "total_tokens": 495296} |
| {"current_steps": 1590, "total_steps": 1800, "loss": 0.0965, "lr": 4.127685938351694e-05, "epoch": 17.666666666666668, "percentage": 88.33, "elapsed_time": "0:04:42", "remaining_time": "0:00:37", "throughput": 1760.14, "total_tokens": 496896} |
| {"current_steps": 1595, "total_steps": 1800, "loss": 0.1208, "lr": 3.936957535521624e-05, "epoch": 17.72222222222222, "percentage": 88.61, "elapsed_time": "0:04:43", "remaining_time": "0:00:36", "throughput": 1760.73, "total_tokens": 498464} |
| {"current_steps": 1600, "total_steps": 1800, "loss": 0.1925, "lr": 3.750559841089196e-05, "epoch": 17.77777777777778, "percentage": 88.89, "elapsed_time": "0:04:43", "remaining_time": "0:00:35", "throughput": 1761.31, "total_tokens": 500032} |
| {"current_steps": 1605, "total_steps": 1800, "loss": 0.1788, "lr": 3.56851037960379e-05, "epoch": 17.833333333333332, "percentage": 89.17, "elapsed_time": "0:04:44", "remaining_time": "0:00:34", "throughput": 1761.87, "total_tokens": 501600} |
| {"current_steps": 1610, "total_steps": 1800, "loss": 0.1501, "lr": 3.3908262668069845e-05, "epoch": 17.88888888888889, "percentage": 89.44, "elapsed_time": "0:04:45", "remaining_time": "0:00:33", "throughput": 1762.65, "total_tokens": 503232} |
| {"current_steps": 1615, "total_steps": 1800, "loss": 0.1687, "lr": 3.217524208023431e-05, "epoch": 17.944444444444443, "percentage": 89.72, "elapsed_time": "0:04:46", "remaining_time": "0:00:32", "throughput": 1763.32, "total_tokens": 504832} |
| {"current_steps": 1620, "total_steps": 1800, "loss": 0.1664, "lr": 3.048620496590304e-05, "epoch": 18.0, "percentage": 90.0, "elapsed_time": "0:04:47", "remaining_time": "0:00:31", "throughput": 1763.61, "total_tokens": 506368} |
| {"current_steps": 1620, "total_steps": 1800, "eval_loss": 0.39571771025657654, "epoch": 18.0, "percentage": 90.0, "elapsed_time": "0:04:47", "remaining_time": "0:00:31", "throughput": 1758.56, "total_tokens": 506368} |
| {"current_steps": 1625, "total_steps": 1800, "loss": 0.1103, "lr": 2.884131012325386e-05, "epoch": 18.055555555555557, "percentage": 90.28, "elapsed_time": "0:04:49", "remaining_time": "0:00:31", "throughput": 1753.36, "total_tokens": 507936} |
| {"current_steps": 1630, "total_steps": 1800, "loss": 0.0579, "lr": 2.724071220034158e-05, "epoch": 18.11111111111111, "percentage": 90.56, "elapsed_time": "0:04:50", "remaining_time": "0:00:30", "throughput": 1754.06, "total_tokens": 509536} |
| {"current_steps": 1635, "total_steps": 1800, "loss": 0.1457, "lr": 2.5684561680557994e-05, "epoch": 18.166666666666668, "percentage": 90.83, "elapsed_time": "0:04:51", "remaining_time": "0:00:29", "throughput": 1754.89, "total_tokens": 511168} |
| {"current_steps": 1640, "total_steps": 1800, "loss": 0.077, "lr": 2.417300486848373e-05, "epoch": 18.22222222222222, "percentage": 91.11, "elapsed_time": "0:04:52", "remaining_time": "0:00:28", "throughput": 1755.39, "total_tokens": 512704} |
| {"current_steps": 1645, "total_steps": 1800, "loss": 0.076, "lr": 2.2706183876134045e-05, "epoch": 18.27777777777778, "percentage": 91.39, "elapsed_time": "0:04:52", "remaining_time": "0:00:27", "throughput": 1756.0, "total_tokens": 514272} |
| {"current_steps": 1650, "total_steps": 1800, "loss": 0.0582, "lr": 2.1284236609596887e-05, "epoch": 18.333333333333332, "percentage": 91.67, "elapsed_time": "0:04:53", "remaining_time": "0:00:26", "throughput": 1756.53, "total_tokens": 515808} |
| {"current_steps": 1655, "total_steps": 1800, "loss": 0.2203, "lr": 1.990729675606784e-05, "epoch": 18.38888888888889, "percentage": 91.94, "elapsed_time": "0:04:54", "remaining_time": "0:00:25", "throughput": 1757.25, "total_tokens": 517408} |
| {"current_steps": 1660, "total_steps": 1800, "loss": 0.0886, "lr": 1.8575493771281205e-05, "epoch": 18.444444444444443, "percentage": 92.22, "elapsed_time": "0:04:55", "remaining_time": "0:00:24", "throughput": 1757.77, "total_tokens": 518944} |
| {"current_steps": 1665, "total_steps": 1800, "loss": 0.0951, "lr": 1.728895286733906e-05, "epoch": 18.5, "percentage": 92.5, "elapsed_time": "0:04:56", "remaining_time": "0:00:24", "throughput": 1758.49, "total_tokens": 520544} |
| {"current_steps": 1670, "total_steps": 1800, "loss": 0.1259, "lr": 1.6047795000938782e-05, "epoch": 18.555555555555557, "percentage": 92.78, "elapsed_time": "0:04:56", "remaining_time": "0:00:23", "throughput": 1759.0, "total_tokens": 522080} |
| {"current_steps": 1675, "total_steps": 1800, "loss": 0.0841, "lr": 1.4852136862001764e-05, "epoch": 18.61111111111111, "percentage": 93.06, "elapsed_time": "0:04:57", "remaining_time": "0:00:22", "throughput": 1759.42, "total_tokens": 523584} |
| {"current_steps": 1680, "total_steps": 1800, "loss": 0.0775, "lr": 1.3702090862701855e-05, "epoch": 18.666666666666668, "percentage": 93.33, "elapsed_time": "0:04:58", "remaining_time": "0:00:21", "throughput": 1759.93, "total_tokens": 525120} |
| {"current_steps": 1685, "total_steps": 1800, "loss": 0.0937, "lr": 1.2597765126897198e-05, "epoch": 18.72222222222222, "percentage": 93.61, "elapsed_time": "0:04:59", "remaining_time": "0:00:20", "throughput": 1760.73, "total_tokens": 526752} |
| {"current_steps": 1690, "total_steps": 1800, "loss": 0.127, "lr": 1.1539263479964535e-05, "epoch": 18.77777777777778, "percentage": 93.89, "elapsed_time": "0:04:59", "remaining_time": "0:00:19", "throughput": 1761.33, "total_tokens": 528320} |
| {"current_steps": 1695, "total_steps": 1800, "loss": 0.08, "lr": 1.0526685439037842e-05, "epoch": 18.833333333333332, "percentage": 94.17, "elapsed_time": "0:05:00", "remaining_time": "0:00:18", "throughput": 1761.72, "total_tokens": 529824} |
| {"current_steps": 1700, "total_steps": 1800, "loss": 0.1767, "lr": 9.560126203652263e-06, "epoch": 18.88888888888889, "percentage": 94.44, "elapsed_time": "0:05:01", "remaining_time": "0:00:17", "throughput": 1762.31, "total_tokens": 531392} |
| {"current_steps": 1705, "total_steps": 1800, "loss": 0.0481, "lr": 8.639676646793382e-06, "epoch": 18.944444444444443, "percentage": 94.72, "elapsed_time": "0:05:02", "remaining_time": "0:00:16", "throughput": 1762.87, "total_tokens": 532960} |
| {"current_steps": 1710, "total_steps": 1800, "loss": 0.0979, "lr": 7.76542330635388e-06, "epoch": 19.0, "percentage": 95.0, "elapsed_time": "0:05:03", "remaining_time": "0:00:15", "throughput": 1763.28, "total_tokens": 534528} |
| {"current_steps": 1710, "total_steps": 1800, "eval_loss": 0.4245486259460449, "epoch": 19.0, "percentage": 95.0, "elapsed_time": "0:05:03", "remaining_time": "0:00:15", "throughput": 1758.55, "total_tokens": 534528} |
| {"current_steps": 1715, "total_steps": 1800, "loss": 0.0811, "lr": 6.9374483769975016e-06, "epoch": 19.055555555555557, "percentage": 95.28, "elapsed_time": "0:05:05", "remaining_time": "0:00:15", "throughput": 1753.75, "total_tokens": 536064} |
| {"current_steps": 1720, "total_steps": 1800, "loss": 0.0683, "lr": 6.15582970243117e-06, "epoch": 19.11111111111111, "percentage": 95.56, "elapsed_time": "0:05:06", "remaining_time": "0:00:14", "throughput": 1754.25, "total_tokens": 537600} |
| {"current_steps": 1725, "total_steps": 1800, "loss": 0.1838, "lr": 5.42064076808646e-06, "epoch": 19.166666666666668, "percentage": 95.83, "elapsed_time": "0:05:07", "remaining_time": "0:00:13", "throughput": 1754.85, "total_tokens": 539168} |
| {"current_steps": 1730, "total_steps": 1800, "loss": 0.0864, "lr": 4.731950694210896e-06, "epoch": 19.22222222222222, "percentage": 96.11, "elapsed_time": "0:05:08", "remaining_time": "0:00:12", "throughput": 1755.35, "total_tokens": 540704} |
| {"current_steps": 1735, "total_steps": 1800, "loss": 0.0314, "lr": 4.089824229369155e-06, "epoch": 19.27777777777778, "percentage": 96.39, "elapsed_time": "0:05:08", "remaining_time": "0:00:11", "throughput": 1756.02, "total_tokens": 542304} |
| {"current_steps": 1740, "total_steps": 1800, "loss": 0.0933, "lr": 3.4943217443557664e-06, "epoch": 19.333333333333332, "percentage": 96.67, "elapsed_time": "0:05:09", "remaining_time": "0:00:10", "throughput": 1756.69, "total_tokens": 543904} |
| {"current_steps": 1745, "total_steps": 1800, "loss": 0.0894, "lr": 2.9454992265193214e-06, "epoch": 19.38888888888889, "percentage": 96.94, "elapsed_time": "0:05:10", "remaining_time": "0:00:09", "throughput": 1757.25, "total_tokens": 545472} |
| {"current_steps": 1750, "total_steps": 1800, "loss": 0.1206, "lr": 2.4434082744984598e-06, "epoch": 19.444444444444443, "percentage": 97.22, "elapsed_time": "0:05:11", "remaining_time": "0:00:08", "throughput": 1757.82, "total_tokens": 547040} |
| {"current_steps": 1755, "total_steps": 1800, "loss": 0.0543, "lr": 1.9880960933710836e-06, "epoch": 19.5, "percentage": 97.5, "elapsed_time": "0:05:11", "remaining_time": "0:00:07", "throughput": 1758.39, "total_tokens": 548608} |
| {"current_steps": 1760, "total_steps": 1800, "loss": 0.0756, "lr": 1.5796054902157964e-06, "epoch": 19.555555555555557, "percentage": 97.78, "elapsed_time": "0:05:12", "remaining_time": "0:00:07", "throughput": 1758.86, "total_tokens": 550144} |
| {"current_steps": 1765, "total_steps": 1800, "loss": 0.0703, "lr": 1.2179748700879012e-06, "epoch": 19.61111111111111, "percentage": 98.06, "elapsed_time": "0:05:13", "remaining_time": "0:00:06", "throughput": 1759.5, "total_tokens": 551744} |
| {"current_steps": 1770, "total_steps": 1800, "loss": 0.0819, "lr": 9.032382324080101e-07, "epoch": 19.666666666666668, "percentage": 98.33, "elapsed_time": "0:05:14", "remaining_time": "0:00:05", "throughput": 1760.16, "total_tokens": 553344} |
| {"current_steps": 1775, "total_steps": 1800, "loss": 0.1254, "lr": 6.354251677661571e-07, "epoch": 19.72222222222222, "percentage": 98.61, "elapsed_time": "0:05:15", "remaining_time": "0:00:04", "throughput": 1760.64, "total_tokens": 554880} |
| {"current_steps": 1780, "total_steps": 1800, "loss": 0.0728, "lr": 4.1456085513935646e-07, "epoch": 19.77777777777778, "percentage": 98.89, "elapsed_time": "0:05:15", "remaining_time": "0:00:03", "throughput": 1761.21, "total_tokens": 556448} |
| {"current_steps": 1785, "total_steps": 1800, "loss": 0.0357, "lr": 2.4066605952444145e-07, "epoch": 19.833333333333332, "percentage": 99.17, "elapsed_time": "0:05:16", "remaining_time": "0:00:02", "throughput": 1761.68, "total_tokens": 557984} |
| {"current_steps": 1790, "total_steps": 1800, "loss": 0.1119, "lr": 1.1375712998595855e-07, "epoch": 19.88888888888889, "percentage": 99.44, "elapsed_time": "0:05:17", "remaining_time": "0:00:01", "throughput": 1762.34, "total_tokens": 559584} |
| {"current_steps": 1795, "total_steps": 1800, "loss": 0.1335, "lr": 3.384599811889766e-08, "epoch": 19.944444444444443, "percentage": 99.72, "elapsed_time": "0:05:18", "remaining_time": "0:00:00", "throughput": 1762.9, "total_tokens": 561152} |
| {"current_steps": 1800, "total_steps": 1800, "loss": 0.1026, "lr": 9.40176926922387e-10, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:05:19", "remaining_time": "0:00:00", "throughput": 1763.27, "total_tokens": 562720} |
| {"current_steps": 1800, "total_steps": 1800, "eval_loss": 0.4507216513156891, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:05:19", "remaining_time": "0:00:00", "throughput": 1758.79, "total_tokens": 562720} |
| {"current_steps": 1800, "total_steps": 1800, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:05:20", "remaining_time": "0:00:00", "throughput": 1754.18, "total_tokens": 562720} |
|
|