| {"current_steps": 5, "total_steps": 250, "loss": 7.3818, "lr": 1.6000000000000001e-06, "epoch": 0.1, "percentage": 2.0, "elapsed_time": "0:00:01", "remaining_time": "0:01:21", "throughput": 1630.39, "total_tokens": 2720} |
| {"current_steps": 10, "total_steps": 250, "loss": 6.0515, "lr": 3.6000000000000003e-06, "epoch": 0.2, "percentage": 4.0, "elapsed_time": "0:00:02", "remaining_time": "0:00:59", "throughput": 2229.72, "total_tokens": 5536} |
| {"current_steps": 15, "total_steps": 250, "loss": 4.1101, "lr": 5.600000000000001e-06, "epoch": 0.3, "percentage": 6.0, "elapsed_time": "0:00:03", "remaining_time": "0:00:51", "throughput": 2559.4, "total_tokens": 8480} |
| {"current_steps": 20, "total_steps": 250, "loss": 2.3525, "lr": 7.600000000000001e-06, "epoch": 0.4, "percentage": 8.0, "elapsed_time": "0:00:04", "remaining_time": "0:00:48", "throughput": 2817.59, "total_tokens": 11776} |
| {"current_steps": 25, "total_steps": 250, "loss": 1.1194, "lr": 9.600000000000001e-06, "epoch": 0.5, "percentage": 10.0, "elapsed_time": "0:00:05", "remaining_time": "0:00:45", "throughput": 2929.61, "total_tokens": 14720} |
| {"current_steps": 25, "total_steps": 250, "eval_loss": 0.7001103758811951, "epoch": 0.5, "percentage": 10.0, "elapsed_time": "0:00:06", "remaining_time": "0:00:56", "throughput": 2342.81, "total_tokens": 14720} |
| {"current_steps": 30, "total_steps": 250, "loss": 0.4917, "lr": 9.992203820909906e-06, "epoch": 0.6, "percentage": 12.0, "elapsed_time": "0:00:09", "remaining_time": "0:01:06", "throughput": 2053.78, "total_tokens": 18560} |
| {"current_steps": 35, "total_steps": 250, "loss": 0.2864, "lr": 9.960573506572391e-06, "epoch": 0.7, "percentage": 14.0, "elapsed_time": "0:00:09", "remaining_time": "0:01:00", "throughput": 2199.53, "total_tokens": 21824} |
| {"current_steps": 40, "total_steps": 250, "loss": 0.2748, "lr": 9.904775776745959e-06, "epoch": 0.8, "percentage": 16.0, "elapsed_time": "0:00:10", "remaining_time": "0:00:56", "throughput": 2270.9, "total_tokens": 24288} |
| {"current_steps": 45, "total_steps": 250, "loss": 0.3115, "lr": 9.825082472361558e-06, "epoch": 0.9, "percentage": 18.0, "elapsed_time": "0:00:11", "remaining_time": "0:00:52", "throughput": 2379.88, "total_tokens": 27648} |
| {"current_steps": 50, "total_steps": 250, "loss": 0.3206, "lr": 9.721881851187406e-06, "epoch": 1.0, "percentage": 20.0, "elapsed_time": "0:00:12", "remaining_time": "0:00:50", "throughput": 2483.28, "total_tokens": 31456} |
| {"current_steps": 50, "total_steps": 250, "eval_loss": 0.2620924413204193, "epoch": 1.0, "percentage": 20.0, "elapsed_time": "0:00:13", "remaining_time": "0:00:55", "throughput": 2261.41, "total_tokens": 31456} |
| {"current_steps": 55, "total_steps": 250, "loss": 0.235, "lr": 9.595676696276173e-06, "epoch": 1.1, "percentage": 22.0, "elapsed_time": "0:00:16", "remaining_time": "0:00:59", "throughput": 2076.76, "total_tokens": 34976} |
| {"current_steps": 60, "total_steps": 250, "loss": 0.2224, "lr": 9.44708186645649e-06, "epoch": 1.2, "percentage": 24.0, "elapsed_time": "0:00:17", "remaining_time": "0:00:56", "throughput": 2140.19, "total_tokens": 37856} |
| {"current_steps": 65, "total_steps": 250, "loss": 0.1765, "lr": 9.276821300802535e-06, "epoch": 1.3, "percentage": 26.0, "elapsed_time": "0:00:18", "remaining_time": "0:00:53", "throughput": 2237.15, "total_tokens": 41728} |
| {"current_steps": 70, "total_steps": 250, "loss": 0.3459, "lr": 9.085724491675642e-06, "epoch": 1.4, "percentage": 28.0, "elapsed_time": "0:00:19", "remaining_time": "0:00:50", "throughput": 2292.35, "total_tokens": 44704} |
| {"current_steps": 75, "total_steps": 250, "loss": 0.2238, "lr": 8.874722443520898e-06, "epoch": 1.5, "percentage": 30.0, "elapsed_time": "0:00:20", "remaining_time": "0:00:47", "throughput": 2324.63, "total_tokens": 47168} |
| {"current_steps": 75, "total_steps": 250, "eval_loss": 0.43755441904067993, "epoch": 1.5, "percentage": 30.0, "elapsed_time": "0:00:21", "remaining_time": "0:00:50", "throughput": 2188.54, "total_tokens": 47168} |
| {"current_steps": 80, "total_steps": 250, "loss": 0.4292, "lr": 8.644843137107058e-06, "epoch": 1.6, "percentage": 32.0, "elapsed_time": "0:00:24", "remaining_time": "0:00:51", "throughput": 2105.42, "total_tokens": 51072} |
| {"current_steps": 85, "total_steps": 250, "loss": 0.4041, "lr": 8.397206521307584e-06, "epoch": 1.7, "percentage": 34.0, "elapsed_time": "0:00:25", "remaining_time": "0:00:48", "throughput": 2162.63, "total_tokens": 54400} |
| {"current_steps": 90, "total_steps": 250, "loss": 0.2023, "lr": 8.133019056822303e-06, "epoch": 1.8, "percentage": 36.0, "elapsed_time": "0:00:25", "remaining_time": "0:00:46", "throughput": 2186.68, "total_tokens": 56736} |
| {"current_steps": 95, "total_steps": 250, "loss": 0.2588, "lr": 7.85356783842216e-06, "epoch": 1.9, "percentage": 38.0, "elapsed_time": "0:00:26", "remaining_time": "0:00:43", "throughput": 2234.81, "total_tokens": 60064} |
| {"current_steps": 100, "total_steps": 250, "loss": 0.1977, "lr": 7.560214324352858e-06, "epoch": 2.0, "percentage": 40.0, "elapsed_time": "0:00:27", "remaining_time": "0:00:41", "throughput": 2273.54, "total_tokens": 63168} |
| {"current_steps": 100, "total_steps": 250, "eval_loss": 0.2416853904724121, "epoch": 2.0, "percentage": 40.0, "elapsed_time": "0:00:29", "remaining_time": "0:00:43", "throughput": 2174.08, "total_tokens": 63168} |
| {"current_steps": 105, "total_steps": 250, "loss": 0.3292, "lr": 7.254387703447154e-06, "epoch": 2.1, "percentage": 42.0, "elapsed_time": "0:00:31", "remaining_time": "0:00:44", "throughput": 2080.47, "total_tokens": 66464} |
| {"current_steps": 110, "total_steps": 250, "loss": 0.198, "lr": 6.9375779322605154e-06, "epoch": 2.2, "percentage": 44.0, "elapsed_time": "0:00:32", "remaining_time": "0:00:41", "throughput": 2131.47, "total_tokens": 70112} |
| {"current_steps": 115, "total_steps": 250, "loss": 0.1548, "lr": 6.611328476152557e-06, "epoch": 2.3, "percentage": 46.0, "elapsed_time": "0:00:33", "remaining_time": "0:00:39", "throughput": 2171.12, "total_tokens": 73376} |
| {"current_steps": 120, "total_steps": 250, "loss": 0.1419, "lr": 6.277228789678953e-06, "epoch": 2.4, "percentage": 48.0, "elapsed_time": "0:00:34", "remaining_time": "0:00:37", "throughput": 2202.09, "total_tokens": 76288} |
| {"current_steps": 125, "total_steps": 250, "loss": 0.4901, "lr": 5.936906572928625e-06, "epoch": 2.5, "percentage": 50.0, "elapsed_time": "0:00:35", "remaining_time": "0:00:35", "throughput": 2234.82, "total_tokens": 79424} |
| {"current_steps": 125, "total_steps": 250, "eval_loss": 0.36641696095466614, "epoch": 2.5, "percentage": 50.0, "elapsed_time": "0:00:36", "remaining_time": "0:00:36", "throughput": 2158.28, "total_tokens": 79424} |
| {"current_steps": 130, "total_steps": 250, "loss": 0.1353, "lr": 5.592019841532507e-06, "epoch": 2.6, "percentage": 52.0, "elapsed_time": "0:00:39", "remaining_time": "0:00:36", "throughput": 2108.45, "total_tokens": 82880} |
| {"current_steps": 135, "total_steps": 250, "loss": 0.2208, "lr": 5.244248848978067e-06, "epoch": 2.7, "percentage": 54.0, "elapsed_time": "0:00:40", "remaining_time": "0:00:34", "throughput": 2150.15, "total_tokens": 86560} |
| {"current_steps": 140, "total_steps": 250, "loss": 0.1244, "lr": 4.895287900583216e-06, "epoch": 2.8, "percentage": 56.0, "elapsed_time": "0:00:41", "remaining_time": "0:00:32", "throughput": 2172.69, "total_tokens": 89184} |
| {"current_steps": 145, "total_steps": 250, "loss": 0.2573, "lr": 4.546837099011101e-06, "epoch": 2.9, "percentage": 58.0, "elapsed_time": "0:00:41", "remaining_time": "0:00:30", "throughput": 2200.14, "total_tokens": 92256} |
| {"current_steps": 150, "total_steps": 250, "loss": 0.2486, "lr": 4.200594061540827e-06, "epoch": 3.0, "percentage": 60.0, "elapsed_time": "0:00:42", "remaining_time": "0:00:28", "throughput": 2223.51, "total_tokens": 95168} |
| {"current_steps": 150, "total_steps": 250, "eval_loss": 0.24323847889900208, "epoch": 3.0, "percentage": 60.0, "elapsed_time": "0:00:44", "remaining_time": "0:00:29", "throughput": 2160.29, "total_tokens": 95168} |
| {"current_steps": 155, "total_steps": 250, "loss": 0.1311, "lr": 3.8582456494467214e-06, "epoch": 3.1, "percentage": 62.0, "elapsed_time": "0:00:46", "remaining_time": "0:00:28", "throughput": 2101.27, "total_tokens": 98528} |
| {"current_steps": 160, "total_steps": 250, "loss": 0.2129, "lr": 3.521459749779769e-06, "epoch": 3.2, "percentage": 64.0, "elapsed_time": "0:00:47", "remaining_time": "0:00:26", "throughput": 2124.49, "total_tokens": 101408} |
| {"current_steps": 165, "total_steps": 250, "loss": 0.1959, "lr": 3.1918771495895395e-06, "epoch": 3.3, "percentage": 66.0, "elapsed_time": "0:00:48", "remaining_time": "0:00:25", "throughput": 2153.46, "total_tokens": 104736} |
| {"current_steps": 170, "total_steps": 250, "loss": 0.1993, "lr": 2.871103542174637e-06, "epoch": 3.4, "percentage": 68.0, "elapsed_time": "0:00:49", "remaining_time": "0:00:23", "throughput": 2179.07, "total_tokens": 107840} |
| {"current_steps": 175, "total_steps": 250, "loss": 0.2548, "lr": 2.560701704306336e-06, "epoch": 3.5, "percentage": 70.0, "elapsed_time": "0:00:50", "remaining_time": "0:00:21", "throughput": 2209.81, "total_tokens": 111392} |
| {"current_steps": 175, "total_steps": 250, "eval_loss": 0.22071371972560883, "epoch": 3.5, "percentage": 70.0, "elapsed_time": "0:00:51", "remaining_time": "0:00:22", "throughput": 2155.8, "total_tokens": 111392} |
| {"current_steps": 180, "total_steps": 250, "loss": 0.1232, "lr": 2.2621838825372496e-06, "epoch": 3.6, "percentage": 72.0, "elapsed_time": "0:00:54", "remaining_time": "0:00:21", "throughput": 2113.54, "total_tokens": 114720} |
| {"current_steps": 185, "total_steps": 250, "loss": 0.1062, "lr": 1.977004425688126e-06, "epoch": 3.7, "percentage": 74.0, "elapsed_time": "0:00:55", "remaining_time": "0:00:19", "throughput": 2134.93, "total_tokens": 117632} |
| {"current_steps": 190, "total_steps": 250, "loss": 0.1406, "lr": 1.7065526994065973e-06, "epoch": 3.8, "percentage": 76.0, "elapsed_time": "0:00:55", "remaining_time": "0:00:17", "throughput": 2154.03, "total_tokens": 120512} |
| {"current_steps": 195, "total_steps": 250, "loss": 0.2957, "lr": 1.4521463173173966e-06, "epoch": 3.9, "percentage": 78.0, "elapsed_time": "0:00:56", "remaining_time": "0:00:16", "throughput": 2183.17, "total_tokens": 124352} |
| {"current_steps": 200, "total_steps": 250, "loss": 0.2618, "lr": 1.2150247217412186e-06, "epoch": 4.0, "percentage": 80.0, "elapsed_time": "0:00:57", "remaining_time": "0:00:14", "throughput": 2196.89, "total_tokens": 127136} |
| {"current_steps": 200, "total_steps": 250, "eval_loss": 0.26114702224731445, "epoch": 4.0, "percentage": 80.0, "elapsed_time": "0:00:59", "remaining_time": "0:00:14", "throughput": 2150.49, "total_tokens": 127136} |
| {"current_steps": 205, "total_steps": 250, "loss": 0.1506, "lr": 9.963431452563331e-07, "epoch": 4.1, "percentage": 82.0, "elapsed_time": "0:01:01", "remaining_time": "0:00:13", "throughput": 2096.57, "total_tokens": 129664} |
| {"current_steps": 210, "total_steps": 250, "loss": 0.241, "lr": 7.971669825215789e-07, "epoch": 4.2, "percentage": 84.0, "elapsed_time": "0:01:02", "remaining_time": "0:00:11", "throughput": 2109.34, "total_tokens": 132128} |
| {"current_steps": 215, "total_steps": 250, "loss": 0.1251, "lr": 6.184665997806832e-07, "epoch": 4.3, "percentage": 86.0, "elapsed_time": "0:01:03", "remaining_time": "0:00:10", "throughput": 2128.68, "total_tokens": 135200} |
| {"current_steps": 220, "total_steps": 250, "loss": 0.1819, "lr": 4.6111260733545714e-07, "epoch": 4.4, "percentage": 88.0, "elapsed_time": "0:01:04", "remaining_time": "0:00:08", "throughput": 2149.37, "total_tokens": 138368} |
| {"current_steps": 225, "total_steps": 250, "loss": 0.1813, "lr": 3.258716180199278e-07, "epoch": 4.5, "percentage": 90.0, "elapsed_time": "0:01:05", "remaining_time": "0:00:07", "throughput": 2168.11, "total_tokens": 141632} |
| {"current_steps": 225, "total_steps": 250, "eval_loss": 0.21796944737434387, "epoch": 4.5, "percentage": 90.0, "elapsed_time": "0:01:06", "remaining_time": "0:00:07", "throughput": 2126.47, "total_tokens": 141632} |
| {"current_steps": 230, "total_steps": 250, "loss": 0.0981, "lr": 2.134025123396638e-07, "epoch": 4.6, "percentage": 92.0, "elapsed_time": "0:01:09", "remaining_time": "0:00:06", "throughput": 2092.4, "total_tokens": 144960} |
| {"current_steps": 235, "total_steps": 250, "loss": 0.0963, "lr": 1.2425322847218368e-07, "epoch": 4.7, "percentage": 94.0, "elapsed_time": "0:01:10", "remaining_time": "0:00:04", "throughput": 2109.54, "total_tokens": 147936} |
| {"current_steps": 240, "total_steps": 250, "loss": 0.0972, "lr": 5.8858092767236084e-08, "epoch": 4.8, "percentage": 96.0, "elapsed_time": "0:01:11", "remaining_time": "0:00:02", "throughput": 2131.43, "total_tokens": 151392} |
| {"current_steps": 245, "total_steps": 250, "loss": 0.15, "lr": 1.753570375247815e-08, "epoch": 4.9, "percentage": 98.0, "elapsed_time": "0:01:11", "remaining_time": "0:00:01", "throughput": 2148.1, "total_tokens": 154624} |
| {"current_steps": 250, "total_steps": 250, "loss": 0.1126, "lr": 4.87379953478806e-10, "epoch": 5.0, "percentage": 100.0, "elapsed_time": "0:01:13", "remaining_time": "0:00:00", "throughput": 2170.84, "total_tokens": 158656} |
| {"current_steps": 250, "total_steps": 250, "eval_loss": 0.2241511344909668, "epoch": 5.0, "percentage": 100.0, "elapsed_time": "0:01:14", "remaining_time": "0:00:00", "throughput": 2133.49, "total_tokens": 158656} |
|
|