train_openbookqa_42_1760623659 / trainer_log.jsonl
rbelanec's picture
Training in progress, step 1116
f3dde6d verified
{"current_steps": 5, "total_steps": 22320, "loss": 0.1, "lr": 8.960573476702509e-08, "epoch": 0.004480286738351254, "percentage": 0.02, "elapsed_time": "0:00:01", "remaining_time": "1:32:49", "throughput": 1410.21, "total_tokens": 1760}
{"current_steps": 10, "total_steps": 22320, "loss": 0.153, "lr": 2.0161290322580645e-07, "epoch": 0.008960573476702509, "percentage": 0.04, "elapsed_time": "0:00:02", "remaining_time": "1:15:28", "throughput": 1844.42, "total_tokens": 3744}
{"current_steps": 15, "total_steps": 22320, "loss": 0.5233, "lr": 3.136200716845878e-07, "epoch": 0.013440860215053764, "percentage": 0.07, "elapsed_time": "0:00:02", "remaining_time": "1:08:55", "throughput": 2001.89, "total_tokens": 5568}
{"current_steps": 20, "total_steps": 22320, "loss": 0.4997, "lr": 4.2562724014336925e-07, "epoch": 0.017921146953405017, "percentage": 0.09, "elapsed_time": "0:00:03", "remaining_time": "1:05:09", "throughput": 2080.93, "total_tokens": 7296}
{"current_steps": 25, "total_steps": 22320, "loss": 0.8399, "lr": 5.376344086021506e-07, "epoch": 0.022401433691756272, "percentage": 0.11, "elapsed_time": "0:00:04", "remaining_time": "1:03:43", "throughput": 2164.54, "total_tokens": 9280}
{"current_steps": 30, "total_steps": 22320, "loss": 0.5972, "lr": 6.496415770609319e-07, "epoch": 0.026881720430107527, "percentage": 0.13, "elapsed_time": "0:00:05", "remaining_time": "1:02:24", "throughput": 2215.75, "total_tokens": 11168}
{"current_steps": 35, "total_steps": 22320, "loss": 0.6734, "lr": 7.616487455197133e-07, "epoch": 0.03136200716845878, "percentage": 0.16, "elapsed_time": "0:00:05", "remaining_time": "1:01:10", "throughput": 2248.35, "total_tokens": 12960}
{"current_steps": 40, "total_steps": 22320, "loss": 0.1975, "lr": 8.736559139784946e-07, "epoch": 0.035842293906810034, "percentage": 0.18, "elapsed_time": "0:00:06", "remaining_time": "1:00:13", "throughput": 2278.83, "total_tokens": 14784}
{"current_steps": 45, "total_steps": 22320, "loss": 0.339, "lr": 9.85663082437276e-07, "epoch": 0.04032258064516129, "percentage": 0.2, "elapsed_time": "0:00:07", "remaining_time": "0:59:58", "throughput": 2302.04, "total_tokens": 16736}
{"current_steps": 50, "total_steps": 22320, "loss": 0.3511, "lr": 1.0976702508960573e-06, "epoch": 0.044802867383512544, "percentage": 0.22, "elapsed_time": "0:00:08", "remaining_time": "0:59:58", "throughput": 2325.15, "total_tokens": 18784}
{"current_steps": 55, "total_steps": 22320, "loss": 0.5934, "lr": 1.2096774193548388e-06, "epoch": 0.0492831541218638, "percentage": 0.25, "elapsed_time": "0:00:08", "remaining_time": "0:59:47", "throughput": 2339.62, "total_tokens": 20736}
{"current_steps": 60, "total_steps": 22320, "loss": 1.1356, "lr": 1.3216845878136201e-06, "epoch": 0.053763440860215055, "percentage": 0.27, "elapsed_time": "0:00:09", "remaining_time": "0:59:16", "throughput": 2349.92, "total_tokens": 22528}
{"current_steps": 65, "total_steps": 22320, "loss": 0.8311, "lr": 1.4336917562724014e-06, "epoch": 0.05824372759856631, "percentage": 0.29, "elapsed_time": "0:00:10", "remaining_time": "0:59:01", "throughput": 2360.81, "total_tokens": 24416}
{"current_steps": 70, "total_steps": 22320, "loss": 0.1859, "lr": 1.545698924731183e-06, "epoch": 0.06272401433691756, "percentage": 0.31, "elapsed_time": "0:00:11", "remaining_time": "0:59:03", "throughput": 2368.29, "total_tokens": 26400}
{"current_steps": 75, "total_steps": 22320, "loss": 0.4455, "lr": 1.6577060931899643e-06, "epoch": 0.06720430107526881, "percentage": 0.34, "elapsed_time": "0:00:11", "remaining_time": "0:58:39", "throughput": 2375.79, "total_tokens": 28192}
{"current_steps": 80, "total_steps": 22320, "loss": 0.2879, "lr": 1.7697132616487456e-06, "epoch": 0.07168458781362007, "percentage": 0.36, "elapsed_time": "0:00:12", "remaining_time": "0:58:49", "throughput": 2389.4, "total_tokens": 30336}
{"current_steps": 85, "total_steps": 22320, "loss": 0.2833, "lr": 1.881720430107527e-06, "epoch": 0.07616487455197132, "percentage": 0.38, "elapsed_time": "0:00:13", "remaining_time": "0:58:37", "throughput": 2391.89, "total_tokens": 32160}
{"current_steps": 90, "total_steps": 22320, "loss": 0.6263, "lr": 1.9937275985663085e-06, "epoch": 0.08064516129032258, "percentage": 0.4, "elapsed_time": "0:00:14", "remaining_time": "0:58:27", "throughput": 2406.39, "total_tokens": 34176}
{"current_steps": 95, "total_steps": 22320, "loss": 0.4051, "lr": 2.10573476702509e-06, "epoch": 0.08512544802867383, "percentage": 0.43, "elapsed_time": "0:00:14", "remaining_time": "0:58:18", "throughput": 2409.41, "total_tokens": 36032}
{"current_steps": 100, "total_steps": 22320, "loss": 0.6321, "lr": 2.217741935483871e-06, "epoch": 0.08960573476702509, "percentage": 0.45, "elapsed_time": "0:00:15", "remaining_time": "0:58:17", "throughput": 2419.34, "total_tokens": 38080}
{"current_steps": 105, "total_steps": 22320, "loss": 0.6962, "lr": 2.3297491039426526e-06, "epoch": 0.09408602150537634, "percentage": 0.47, "elapsed_time": "0:00:16", "remaining_time": "0:58:03", "throughput": 2421.73, "total_tokens": 39872}
{"current_steps": 110, "total_steps": 22320, "loss": 0.1895, "lr": 2.441756272401434e-06, "epoch": 0.0985663082437276, "percentage": 0.49, "elapsed_time": "0:00:17", "remaining_time": "0:58:11", "throughput": 2426.0, "total_tokens": 41952}
{"current_steps": 115, "total_steps": 22320, "loss": 0.095, "lr": 2.5537634408602153e-06, "epoch": 0.10304659498207885, "percentage": 0.52, "elapsed_time": "0:00:18", "remaining_time": "0:58:03", "throughput": 2428.09, "total_tokens": 43808}
{"current_steps": 120, "total_steps": 22320, "loss": 0.5633, "lr": 2.6657706093189964e-06, "epoch": 0.10752688172043011, "percentage": 0.54, "elapsed_time": "0:00:18", "remaining_time": "0:57:56", "throughput": 2429.68, "total_tokens": 45664}
{"current_steps": 125, "total_steps": 22320, "loss": 1.2632, "lr": 2.777777777777778e-06, "epoch": 0.11200716845878136, "percentage": 0.56, "elapsed_time": "0:00:19", "remaining_time": "0:57:50", "throughput": 2432.72, "total_tokens": 47552}
{"current_steps": 130, "total_steps": 22320, "loss": 0.8563, "lr": 2.889784946236559e-06, "epoch": 0.11648745519713262, "percentage": 0.58, "elapsed_time": "0:00:20", "remaining_time": "0:57:49", "throughput": 2436.87, "total_tokens": 49536}
{"current_steps": 135, "total_steps": 22320, "loss": 0.3517, "lr": 3.0017921146953406e-06, "epoch": 0.12096774193548387, "percentage": 0.6, "elapsed_time": "0:00:21", "remaining_time": "0:57:43", "throughput": 2437.07, "total_tokens": 51360}
{"current_steps": 140, "total_steps": 22320, "loss": 0.41, "lr": 3.113799283154122e-06, "epoch": 0.12544802867383512, "percentage": 0.63, "elapsed_time": "0:00:21", "remaining_time": "0:57:46", "throughput": 2439.35, "total_tokens": 53376}
{"current_steps": 145, "total_steps": 22320, "loss": 0.6065, "lr": 3.225806451612903e-06, "epoch": 0.12992831541218638, "percentage": 0.65, "elapsed_time": "0:00:22", "remaining_time": "0:57:52", "throughput": 2442.01, "total_tokens": 55456}
{"current_steps": 150, "total_steps": 22320, "loss": 1.2668, "lr": 3.337813620071685e-06, "epoch": 0.13440860215053763, "percentage": 0.67, "elapsed_time": "0:00:23", "remaining_time": "0:57:43", "throughput": 2444.37, "total_tokens": 57280}
{"current_steps": 155, "total_steps": 22320, "loss": 0.5723, "lr": 3.4498207885304663e-06, "epoch": 0.1388888888888889, "percentage": 0.69, "elapsed_time": "0:00:24", "remaining_time": "0:57:45", "throughput": 2448.2, "total_tokens": 59328}
{"current_steps": 160, "total_steps": 22320, "loss": 0.1183, "lr": 3.5618279569892478e-06, "epoch": 0.14336917562724014, "percentage": 0.72, "elapsed_time": "0:00:24", "remaining_time": "0:57:40", "throughput": 2448.72, "total_tokens": 61184}
{"current_steps": 165, "total_steps": 22320, "loss": 0.0791, "lr": 3.673835125448029e-06, "epoch": 0.1478494623655914, "percentage": 0.74, "elapsed_time": "0:00:25", "remaining_time": "0:57:42", "throughput": 2452.18, "total_tokens": 63232}
{"current_steps": 170, "total_steps": 22320, "loss": 0.0846, "lr": 3.7858422939068104e-06, "epoch": 0.15232974910394265, "percentage": 0.76, "elapsed_time": "0:00:26", "remaining_time": "0:57:40", "throughput": 2457.91, "total_tokens": 65280}
{"current_steps": 175, "total_steps": 22320, "loss": 0.5018, "lr": 3.8978494623655915e-06, "epoch": 0.15681003584229392, "percentage": 0.78, "elapsed_time": "0:00:27", "remaining_time": "0:57:39", "throughput": 2461.85, "total_tokens": 67296}
{"current_steps": 180, "total_steps": 22320, "loss": 0.7717, "lr": 4.009856630824372e-06, "epoch": 0.16129032258064516, "percentage": 0.81, "elapsed_time": "0:00:28", "remaining_time": "0:57:30", "throughput": 2465.02, "total_tokens": 69152}
{"current_steps": 185, "total_steps": 22320, "loss": 0.4106, "lr": 4.121863799283155e-06, "epoch": 0.16577060931899643, "percentage": 0.83, "elapsed_time": "0:00:28", "remaining_time": "0:57:25", "throughput": 2463.66, "total_tokens": 70944}
{"current_steps": 190, "total_steps": 22320, "loss": 0.797, "lr": 4.233870967741935e-06, "epoch": 0.17025089605734767, "percentage": 0.85, "elapsed_time": "0:00:29", "remaining_time": "0:57:24", "throughput": 2464.17, "total_tokens": 72864}
{"current_steps": 195, "total_steps": 22320, "loss": 0.4614, "lr": 4.345878136200717e-06, "epoch": 0.17473118279569894, "percentage": 0.87, "elapsed_time": "0:00:30", "remaining_time": "0:57:26", "throughput": 2468.17, "total_tokens": 74976}
{"current_steps": 200, "total_steps": 22320, "loss": 0.698, "lr": 4.457885304659498e-06, "epoch": 0.17921146953405018, "percentage": 0.9, "elapsed_time": "0:00:31", "remaining_time": "0:57:31", "throughput": 2470.26, "total_tokens": 77088}
{"current_steps": 205, "total_steps": 22320, "loss": 0.5984, "lr": 4.56989247311828e-06, "epoch": 0.18369175627240145, "percentage": 0.92, "elapsed_time": "0:00:31", "remaining_time": "0:57:30", "throughput": 2470.3, "total_tokens": 79008}
{"current_steps": 210, "total_steps": 22320, "loss": 1.1934, "lr": 4.6818996415770606e-06, "epoch": 0.1881720430107527, "percentage": 0.94, "elapsed_time": "0:00:32", "remaining_time": "0:57:29", "throughput": 2472.32, "total_tokens": 80992}
{"current_steps": 215, "total_steps": 22320, "loss": 0.3976, "lr": 4.793906810035843e-06, "epoch": 0.19265232974910393, "percentage": 0.96, "elapsed_time": "0:00:33", "remaining_time": "0:57:28", "throughput": 2474.01, "total_tokens": 82976}
{"current_steps": 220, "total_steps": 22320, "loss": 0.7468, "lr": 4.905913978494624e-06, "epoch": 0.1971326164874552, "percentage": 0.99, "elapsed_time": "0:00:34", "remaining_time": "0:57:21", "throughput": 2472.46, "total_tokens": 84704}
{"current_steps": 225, "total_steps": 22320, "loss": 0.3572, "lr": 5.017921146953405e-06, "epoch": 0.20161290322580644, "percentage": 1.01, "elapsed_time": "0:00:34", "remaining_time": "0:57:14", "throughput": 2474.88, "total_tokens": 86560}
{"current_steps": 230, "total_steps": 22320, "loss": 0.3175, "lr": 5.129928315412187e-06, "epoch": 0.2060931899641577, "percentage": 1.03, "elapsed_time": "0:00:35", "remaining_time": "0:57:07", "throughput": 2476.4, "total_tokens": 88384}
{"current_steps": 235, "total_steps": 22320, "loss": 0.3456, "lr": 5.241935483870968e-06, "epoch": 0.21057347670250895, "percentage": 1.05, "elapsed_time": "0:00:36", "remaining_time": "0:57:07", "throughput": 2476.16, "total_tokens": 90304}
{"current_steps": 240, "total_steps": 22320, "loss": 1.119, "lr": 5.353942652329749e-06, "epoch": 0.21505376344086022, "percentage": 1.08, "elapsed_time": "0:00:37", "remaining_time": "0:57:06", "throughput": 2478.22, "total_tokens": 92288}
{"current_steps": 245, "total_steps": 22320, "loss": 0.6064, "lr": 5.465949820788531e-06, "epoch": 0.21953405017921146, "percentage": 1.1, "elapsed_time": "0:00:37", "remaining_time": "0:57:02", "throughput": 2477.86, "total_tokens": 94112}
{"current_steps": 250, "total_steps": 22320, "loss": 0.4863, "lr": 5.577956989247312e-06, "epoch": 0.22401433691756273, "percentage": 1.12, "elapsed_time": "0:00:38", "remaining_time": "0:56:58", "throughput": 2477.25, "total_tokens": 95936}
{"current_steps": 255, "total_steps": 22320, "loss": 0.3748, "lr": 5.6899641577060935e-06, "epoch": 0.22849462365591397, "percentage": 1.14, "elapsed_time": "0:00:39", "remaining_time": "0:56:52", "throughput": 2477.7, "total_tokens": 97728}
{"current_steps": 260, "total_steps": 22320, "loss": 0.2687, "lr": 5.801971326164875e-06, "epoch": 0.23297491039426524, "percentage": 1.16, "elapsed_time": "0:00:40", "remaining_time": "0:56:50", "throughput": 2477.6, "total_tokens": 99584}
{"current_steps": 265, "total_steps": 22320, "loss": 1.0935, "lr": 5.9139784946236566e-06, "epoch": 0.23745519713261648, "percentage": 1.19, "elapsed_time": "0:00:40", "remaining_time": "0:56:52", "throughput": 2480.4, "total_tokens": 101696}
{"current_steps": 270, "total_steps": 22320, "loss": 0.5318, "lr": 6.025985663082437e-06, "epoch": 0.24193548387096775, "percentage": 1.21, "elapsed_time": "0:00:41", "remaining_time": "0:56:49", "throughput": 2483.87, "total_tokens": 103712}
{"current_steps": 275, "total_steps": 22320, "loss": 0.138, "lr": 6.13799283154122e-06, "epoch": 0.246415770609319, "percentage": 1.23, "elapsed_time": "0:00:42", "remaining_time": "0:56:51", "throughput": 2485.0, "total_tokens": 105760}
{"current_steps": 280, "total_steps": 22320, "loss": 0.5887, "lr": 6.25e-06, "epoch": 0.25089605734767023, "percentage": 1.25, "elapsed_time": "0:00:43", "remaining_time": "0:56:47", "throughput": 2485.43, "total_tokens": 107584}
{"current_steps": 285, "total_steps": 22320, "loss": 0.3456, "lr": 6.362007168458782e-06, "epoch": 0.2553763440860215, "percentage": 1.28, "elapsed_time": "0:00:44", "remaining_time": "0:56:47", "throughput": 2487.11, "total_tokens": 109600}
{"current_steps": 290, "total_steps": 22320, "loss": 0.883, "lr": 6.4740143369175625e-06, "epoch": 0.25985663082437277, "percentage": 1.3, "elapsed_time": "0:00:44", "remaining_time": "0:56:44", "throughput": 2488.92, "total_tokens": 111552}
{"current_steps": 295, "total_steps": 22320, "loss": 0.4669, "lr": 6.586021505376344e-06, "epoch": 0.26433691756272404, "percentage": 1.32, "elapsed_time": "0:00:45", "remaining_time": "0:56:48", "throughput": 2489.34, "total_tokens": 113632}
{"current_steps": 300, "total_steps": 22320, "loss": 0.3623, "lr": 6.698028673835126e-06, "epoch": 0.26881720430107525, "percentage": 1.34, "elapsed_time": "0:00:46", "remaining_time": "0:56:47", "throughput": 2491.18, "total_tokens": 115648}
{"current_steps": 305, "total_steps": 22320, "loss": 0.3704, "lr": 6.810035842293908e-06, "epoch": 0.2732974910394265, "percentage": 1.37, "elapsed_time": "0:00:47", "remaining_time": "0:56:42", "throughput": 2491.77, "total_tokens": 117472}
{"current_steps": 310, "total_steps": 22320, "loss": 0.3892, "lr": 6.922043010752688e-06, "epoch": 0.2777777777777778, "percentage": 1.39, "elapsed_time": "0:00:47", "remaining_time": "0:56:40", "throughput": 2491.02, "total_tokens": 119296}
{"current_steps": 315, "total_steps": 22320, "loss": 0.252, "lr": 7.034050179211469e-06, "epoch": 0.28225806451612906, "percentage": 1.41, "elapsed_time": "0:00:48", "remaining_time": "0:56:37", "throughput": 2489.65, "total_tokens": 121088}
{"current_steps": 320, "total_steps": 22320, "loss": 0.2195, "lr": 7.146057347670252e-06, "epoch": 0.2867383512544803, "percentage": 1.43, "elapsed_time": "0:00:49", "remaining_time": "0:56:36", "throughput": 2488.85, "total_tokens": 122976}
{"current_steps": 325, "total_steps": 22320, "loss": 0.6521, "lr": 7.258064516129033e-06, "epoch": 0.29121863799283154, "percentage": 1.46, "elapsed_time": "0:00:50", "remaining_time": "0:56:36", "throughput": 2488.57, "total_tokens": 124896}
{"current_steps": 330, "total_steps": 22320, "loss": 0.8932, "lr": 7.370071684587813e-06, "epoch": 0.2956989247311828, "percentage": 1.48, "elapsed_time": "0:00:50", "remaining_time": "0:56:33", "throughput": 2487.56, "total_tokens": 126688}
{"current_steps": 335, "total_steps": 22320, "loss": 0.7969, "lr": 7.4820788530465954e-06, "epoch": 0.300179211469534, "percentage": 1.5, "elapsed_time": "0:00:51", "remaining_time": "0:56:29", "throughput": 2487.82, "total_tokens": 128480}
{"current_steps": 340, "total_steps": 22320, "loss": 0.4057, "lr": 7.594086021505377e-06, "epoch": 0.3046594982078853, "percentage": 1.52, "elapsed_time": "0:00:52", "remaining_time": "0:56:24", "throughput": 2489.31, "total_tokens": 130336}
{"current_steps": 345, "total_steps": 22320, "loss": 0.9385, "lr": 7.706093189964159e-06, "epoch": 0.30913978494623656, "percentage": 1.55, "elapsed_time": "0:00:53", "remaining_time": "0:56:22", "throughput": 2490.0, "total_tokens": 132224}
{"current_steps": 350, "total_steps": 22320, "loss": 0.6802, "lr": 7.81810035842294e-06, "epoch": 0.31362007168458783, "percentage": 1.57, "elapsed_time": "0:00:53", "remaining_time": "0:56:22", "throughput": 2490.95, "total_tokens": 134208}
{"current_steps": 355, "total_steps": 22320, "loss": 0.9067, "lr": 7.93010752688172e-06, "epoch": 0.31810035842293904, "percentage": 1.59, "elapsed_time": "0:00:54", "remaining_time": "0:56:19", "throughput": 2490.45, "total_tokens": 136032}
{"current_steps": 360, "total_steps": 22320, "loss": 1.1152, "lr": 8.042114695340502e-06, "epoch": 0.3225806451612903, "percentage": 1.61, "elapsed_time": "0:00:55", "remaining_time": "0:56:19", "throughput": 2490.98, "total_tokens": 137984}
{"current_steps": 365, "total_steps": 22320, "loss": 0.8845, "lr": 8.154121863799285e-06, "epoch": 0.3270609318996416, "percentage": 1.64, "elapsed_time": "0:00:56", "remaining_time": "0:56:16", "throughput": 2489.94, "total_tokens": 139776}
{"current_steps": 370, "total_steps": 22320, "loss": 0.4401, "lr": 8.266129032258065e-06, "epoch": 0.33154121863799285, "percentage": 1.66, "elapsed_time": "0:00:56", "remaining_time": "0:56:14", "throughput": 2490.43, "total_tokens": 141664}
{"current_steps": 375, "total_steps": 22320, "loss": 1.0284, "lr": 8.378136200716846e-06, "epoch": 0.33602150537634407, "percentage": 1.68, "elapsed_time": "0:00:57", "remaining_time": "0:56:10", "throughput": 2489.9, "total_tokens": 143424}
{"current_steps": 380, "total_steps": 22320, "loss": 0.2113, "lr": 8.490143369175628e-06, "epoch": 0.34050179211469533, "percentage": 1.7, "elapsed_time": "0:00:58", "remaining_time": "0:56:13", "throughput": 2489.76, "total_tokens": 145472}
{"current_steps": 385, "total_steps": 22320, "loss": 0.9801, "lr": 8.602150537634409e-06, "epoch": 0.3449820788530466, "percentage": 1.72, "elapsed_time": "0:00:59", "remaining_time": "0:56:13", "throughput": 2490.65, "total_tokens": 147456}
{"current_steps": 390, "total_steps": 22320, "loss": 0.3952, "lr": 8.71415770609319e-06, "epoch": 0.34946236559139787, "percentage": 1.75, "elapsed_time": "0:00:59", "remaining_time": "0:56:12", "throughput": 2490.39, "total_tokens": 149376}
{"current_steps": 395, "total_steps": 22320, "loss": 0.7884, "lr": 8.826164874551972e-06, "epoch": 0.3539426523297491, "percentage": 1.77, "elapsed_time": "0:01:00", "remaining_time": "0:56:10", "throughput": 2490.21, "total_tokens": 151232}
{"current_steps": 400, "total_steps": 22320, "loss": 0.6505, "lr": 8.938172043010753e-06, "epoch": 0.35842293906810035, "percentage": 1.79, "elapsed_time": "0:01:01", "remaining_time": "0:56:10", "throughput": 2491.49, "total_tokens": 153248}
{"current_steps": 405, "total_steps": 22320, "loss": 0.7866, "lr": 9.050179211469534e-06, "epoch": 0.3629032258064516, "percentage": 1.81, "elapsed_time": "0:01:02", "remaining_time": "0:56:10", "throughput": 2491.42, "total_tokens": 155168}
{"current_steps": 410, "total_steps": 22320, "loss": 0.312, "lr": 9.162186379928316e-06, "epoch": 0.3673835125448029, "percentage": 1.84, "elapsed_time": "0:01:03", "remaining_time": "0:56:07", "throughput": 2492.01, "total_tokens": 157056}
{"current_steps": 415, "total_steps": 22320, "loss": 0.5187, "lr": 9.274193548387097e-06, "epoch": 0.3718637992831541, "percentage": 1.86, "elapsed_time": "0:01:03", "remaining_time": "0:56:06", "throughput": 2492.85, "total_tokens": 158976}
{"current_steps": 420, "total_steps": 22320, "loss": 0.1762, "lr": 9.386200716845879e-06, "epoch": 0.3763440860215054, "percentage": 1.88, "elapsed_time": "0:01:04", "remaining_time": "0:56:05", "throughput": 2493.25, "total_tokens": 160928}
{"current_steps": 425, "total_steps": 22320, "loss": 0.5424, "lr": 9.49820788530466e-06, "epoch": 0.38082437275985664, "percentage": 1.9, "elapsed_time": "0:01:05", "remaining_time": "0:56:08", "throughput": 2494.34, "total_tokens": 163072}
{"current_steps": 430, "total_steps": 22320, "loss": 0.6677, "lr": 9.61021505376344e-06, "epoch": 0.38530465949820786, "percentage": 1.93, "elapsed_time": "0:01:06", "remaining_time": "0:56:09", "throughput": 2494.9, "total_tokens": 165120}
{"current_steps": 435, "total_steps": 22320, "loss": 0.3333, "lr": 9.722222222222223e-06, "epoch": 0.3897849462365591, "percentage": 1.95, "elapsed_time": "0:01:06", "remaining_time": "0:56:07", "throughput": 2495.03, "total_tokens": 167008}
{"current_steps": 440, "total_steps": 22320, "loss": 0.271, "lr": 9.834229390681005e-06, "epoch": 0.3942652329749104, "percentage": 1.97, "elapsed_time": "0:01:07", "remaining_time": "0:56:05", "throughput": 2496.3, "total_tokens": 168960}
{"current_steps": 445, "total_steps": 22320, "loss": 0.3637, "lr": 9.946236559139786e-06, "epoch": 0.39874551971326166, "percentage": 1.99, "elapsed_time": "0:01:08", "remaining_time": "0:56:05", "throughput": 2496.55, "total_tokens": 170912}
{"current_steps": 450, "total_steps": 22320, "loss": 0.3312, "lr": 1.0058243727598566e-05, "epoch": 0.4032258064516129, "percentage": 2.02, "elapsed_time": "0:01:09", "remaining_time": "0:56:02", "throughput": 2497.35, "total_tokens": 172768}
{"current_steps": 455, "total_steps": 22320, "loss": 0.5239, "lr": 1.0170250896057349e-05, "epoch": 0.40770609318996415, "percentage": 2.04, "elapsed_time": "0:01:09", "remaining_time": "0:56:01", "throughput": 2498.38, "total_tokens": 174784}
{"current_steps": 460, "total_steps": 22320, "loss": 0.9593, "lr": 1.028225806451613e-05, "epoch": 0.4121863799283154, "percentage": 2.06, "elapsed_time": "0:01:10", "remaining_time": "0:56:00", "throughput": 2497.39, "total_tokens": 176608}
{"current_steps": 465, "total_steps": 22320, "loss": 0.4693, "lr": 1.039426523297491e-05, "epoch": 0.4166666666666667, "percentage": 2.08, "elapsed_time": "0:01:11", "remaining_time": "0:55:59", "throughput": 2498.2, "total_tokens": 178560}
{"current_steps": 470, "total_steps": 22320, "loss": 0.6993, "lr": 1.0506272401433693e-05, "epoch": 0.4211469534050179, "percentage": 2.11, "elapsed_time": "0:01:12", "remaining_time": "0:55:56", "throughput": 2499.23, "total_tokens": 180448}
{"current_steps": 475, "total_steps": 22320, "loss": 0.5103, "lr": 1.0618279569892473e-05, "epoch": 0.42562724014336917, "percentage": 2.13, "elapsed_time": "0:01:12", "remaining_time": "0:55:54", "throughput": 2499.49, "total_tokens": 182336}
{"current_steps": 480, "total_steps": 22320, "loss": 0.4961, "lr": 1.0730286738351256e-05, "epoch": 0.43010752688172044, "percentage": 2.15, "elapsed_time": "0:01:13", "remaining_time": "0:55:55", "throughput": 2501.43, "total_tokens": 184448}
{"current_steps": 485, "total_steps": 22320, "loss": 0.321, "lr": 1.0842293906810036e-05, "epoch": 0.4345878136200717, "percentage": 2.17, "elapsed_time": "0:01:14", "remaining_time": "0:55:56", "throughput": 2501.71, "total_tokens": 186528}
{"current_steps": 490, "total_steps": 22320, "loss": 0.4652, "lr": 1.0954301075268817e-05, "epoch": 0.4390681003584229, "percentage": 2.2, "elapsed_time": "0:01:15", "remaining_time": "0:55:55", "throughput": 2500.49, "total_tokens": 188320}
{"current_steps": 495, "total_steps": 22320, "loss": 0.3923, "lr": 1.10663082437276e-05, "epoch": 0.4435483870967742, "percentage": 2.22, "elapsed_time": "0:01:16", "remaining_time": "0:55:52", "throughput": 2501.24, "total_tokens": 190176}
{"current_steps": 500, "total_steps": 22320, "loss": 0.1978, "lr": 1.117831541218638e-05, "epoch": 0.44802867383512546, "percentage": 2.24, "elapsed_time": "0:01:16", "remaining_time": "0:55:50", "throughput": 2501.43, "total_tokens": 192064}
{"current_steps": 505, "total_steps": 22320, "loss": 0.5553, "lr": 1.129032258064516e-05, "epoch": 0.4525089605734767, "percentage": 2.26, "elapsed_time": "0:01:17", "remaining_time": "0:55:51", "throughput": 2500.85, "total_tokens": 194016}
{"current_steps": 510, "total_steps": 22320, "loss": 0.9895, "lr": 1.1402329749103943e-05, "epoch": 0.45698924731182794, "percentage": 2.28, "elapsed_time": "0:01:18", "remaining_time": "0:55:49", "throughput": 2500.61, "total_tokens": 195872}
{"current_steps": 515, "total_steps": 22320, "loss": 0.6503, "lr": 1.1514336917562725e-05, "epoch": 0.4614695340501792, "percentage": 2.31, "elapsed_time": "0:01:19", "remaining_time": "0:55:50", "throughput": 2499.89, "total_tokens": 197824}
{"current_steps": 520, "total_steps": 22320, "loss": 0.3375, "lr": 1.1626344086021504e-05, "epoch": 0.4659498207885305, "percentage": 2.33, "elapsed_time": "0:01:19", "remaining_time": "0:55:49", "throughput": 2500.37, "total_tokens": 199744}
{"current_steps": 525, "total_steps": 22320, "loss": 0.5212, "lr": 1.1738351254480287e-05, "epoch": 0.47043010752688175, "percentage": 2.35, "elapsed_time": "0:01:20", "remaining_time": "0:55:49", "throughput": 2500.89, "total_tokens": 201792}
{"current_steps": 530, "total_steps": 22320, "loss": 0.63, "lr": 1.185035842293907e-05, "epoch": 0.47491039426523296, "percentage": 2.37, "elapsed_time": "0:01:21", "remaining_time": "0:55:47", "throughput": 2500.05, "total_tokens": 203584}
{"current_steps": 535, "total_steps": 22320, "loss": 0.3951, "lr": 1.196236559139785e-05, "epoch": 0.47939068100358423, "percentage": 2.4, "elapsed_time": "0:01:22", "remaining_time": "0:55:46", "throughput": 2500.04, "total_tokens": 205440}
{"current_steps": 540, "total_steps": 22320, "loss": 0.0334, "lr": 1.207437275985663e-05, "epoch": 0.4838709677419355, "percentage": 2.42, "elapsed_time": "0:01:22", "remaining_time": "0:55:45", "throughput": 2500.29, "total_tokens": 207392}
{"current_steps": 545, "total_steps": 22320, "loss": 0.4284, "lr": 1.2186379928315413e-05, "epoch": 0.4883512544802867, "percentage": 2.44, "elapsed_time": "0:01:23", "remaining_time": "0:55:42", "throughput": 2500.27, "total_tokens": 209184}
{"current_steps": 550, "total_steps": 22320, "loss": 0.2976, "lr": 1.2298387096774194e-05, "epoch": 0.492831541218638, "percentage": 2.46, "elapsed_time": "0:01:24", "remaining_time": "0:55:42", "throughput": 2500.76, "total_tokens": 211168}
{"current_steps": 555, "total_steps": 22320, "loss": 0.0206, "lr": 1.2410394265232976e-05, "epoch": 0.49731182795698925, "percentage": 2.49, "elapsed_time": "0:01:25", "remaining_time": "0:55:42", "throughput": 2502.15, "total_tokens": 213248}
{"current_steps": 560, "total_steps": 22320, "loss": 0.2356, "lr": 1.2522401433691758e-05, "epoch": 0.5017921146953405, "percentage": 2.51, "elapsed_time": "0:01:26", "remaining_time": "0:55:41", "throughput": 2502.54, "total_tokens": 215232}
{"current_steps": 565, "total_steps": 22320, "loss": 0.1333, "lr": 1.2634408602150537e-05, "epoch": 0.5062724014336918, "percentage": 2.53, "elapsed_time": "0:01:26", "remaining_time": "0:55:39", "throughput": 2503.61, "total_tokens": 217120}
{"current_steps": 570, "total_steps": 22320, "loss": 0.591, "lr": 1.2746415770609318e-05, "epoch": 0.510752688172043, "percentage": 2.55, "elapsed_time": "0:01:27", "remaining_time": "0:55:38", "throughput": 2503.8, "total_tokens": 219072}
{"current_steps": 575, "total_steps": 22320, "loss": 0.2303, "lr": 1.2858422939068102e-05, "epoch": 0.5152329749103942, "percentage": 2.58, "elapsed_time": "0:01:28", "remaining_time": "0:55:41", "throughput": 2504.09, "total_tokens": 221248}
{"current_steps": 580, "total_steps": 22320, "loss": 0.6713, "lr": 1.2970430107526881e-05, "epoch": 0.5197132616487455, "percentage": 2.6, "elapsed_time": "0:01:29", "remaining_time": "0:55:39", "throughput": 2504.68, "total_tokens": 223168}
{"current_steps": 585, "total_steps": 22320, "loss": 0.6727, "lr": 1.3082437275985665e-05, "epoch": 0.5241935483870968, "percentage": 2.62, "elapsed_time": "0:01:29", "remaining_time": "0:55:38", "throughput": 2504.94, "total_tokens": 225056}
{"current_steps": 590, "total_steps": 22320, "loss": 0.4865, "lr": 1.3194444444444446e-05, "epoch": 0.5286738351254481, "percentage": 2.64, "elapsed_time": "0:01:30", "remaining_time": "0:55:35", "throughput": 2503.55, "total_tokens": 226752}
{"current_steps": 595, "total_steps": 22320, "loss": 0.6252, "lr": 1.3306451612903225e-05, "epoch": 0.5331541218637993, "percentage": 2.67, "elapsed_time": "0:01:31", "remaining_time": "0:55:34", "throughput": 2503.45, "total_tokens": 228608}
{"current_steps": 600, "total_steps": 22320, "loss": 0.321, "lr": 1.3418458781362009e-05, "epoch": 0.5376344086021505, "percentage": 2.69, "elapsed_time": "0:01:32", "remaining_time": "0:55:31", "throughput": 2504.43, "total_tokens": 230496}
{"current_steps": 605, "total_steps": 22320, "loss": 0.4843, "lr": 1.353046594982079e-05, "epoch": 0.5421146953405018, "percentage": 2.71, "elapsed_time": "0:01:32", "remaining_time": "0:55:30", "throughput": 2504.51, "total_tokens": 232384}
{"current_steps": 610, "total_steps": 22320, "loss": 0.7556, "lr": 1.364247311827957e-05, "epoch": 0.546594982078853, "percentage": 2.73, "elapsed_time": "0:01:33", "remaining_time": "0:55:31", "throughput": 2505.14, "total_tokens": 234464}
{"current_steps": 615, "total_steps": 22320, "loss": 0.5438, "lr": 1.3754480286738353e-05, "epoch": 0.5510752688172043, "percentage": 2.76, "elapsed_time": "0:01:34", "remaining_time": "0:55:28", "throughput": 2505.02, "total_tokens": 236256}
{"current_steps": 620, "total_steps": 22320, "loss": 0.976, "lr": 1.3866487455197133e-05, "epoch": 0.5555555555555556, "percentage": 2.78, "elapsed_time": "0:01:35", "remaining_time": "0:55:26", "throughput": 2503.89, "total_tokens": 237952}
{"current_steps": 625, "total_steps": 22320, "loss": 0.7232, "lr": 1.3978494623655914e-05, "epoch": 0.5600358422939068, "percentage": 2.8, "elapsed_time": "0:01:35", "remaining_time": "0:55:24", "throughput": 2503.77, "total_tokens": 239808}
{"current_steps": 630, "total_steps": 22320, "loss": 0.6839, "lr": 1.4090501792114696e-05, "epoch": 0.5645161290322581, "percentage": 2.82, "elapsed_time": "0:01:36", "remaining_time": "0:55:24", "throughput": 2503.66, "total_tokens": 241728}
{"current_steps": 635, "total_steps": 22320, "loss": 0.9764, "lr": 1.4202508960573477e-05, "epoch": 0.5689964157706093, "percentage": 2.84, "elapsed_time": "0:01:37", "remaining_time": "0:55:23", "throughput": 2502.87, "total_tokens": 243584}
{"current_steps": 640, "total_steps": 22320, "loss": 0.8917, "lr": 1.431451612903226e-05, "epoch": 0.5734767025089605, "percentage": 2.87, "elapsed_time": "0:01:38", "remaining_time": "0:55:23", "throughput": 2502.53, "total_tokens": 245504}
{"current_steps": 645, "total_steps": 22320, "loss": 0.1861, "lr": 1.442652329749104e-05, "epoch": 0.5779569892473119, "percentage": 2.89, "elapsed_time": "0:01:38", "remaining_time": "0:55:21", "throughput": 2503.54, "total_tokens": 247488}
{"current_steps": 650, "total_steps": 22320, "loss": 0.6675, "lr": 1.4538530465949821e-05, "epoch": 0.5824372759856631, "percentage": 2.91, "elapsed_time": "0:01:39", "remaining_time": "0:55:20", "throughput": 2503.1, "total_tokens": 249312}
{"current_steps": 655, "total_steps": 22320, "loss": 0.5956, "lr": 1.4650537634408603e-05, "epoch": 0.5869175627240143, "percentage": 2.93, "elapsed_time": "0:01:40", "remaining_time": "0:55:18", "throughput": 2503.6, "total_tokens": 251168}
{"current_steps": 660, "total_steps": 22320, "loss": 0.6105, "lr": 1.4762544802867384e-05, "epoch": 0.5913978494623656, "percentage": 2.96, "elapsed_time": "0:01:41", "remaining_time": "0:55:16", "throughput": 2503.51, "total_tokens": 253024}
{"current_steps": 665, "total_steps": 22320, "loss": 0.159, "lr": 1.4874551971326165e-05, "epoch": 0.5958781362007168, "percentage": 2.98, "elapsed_time": "0:01:41", "remaining_time": "0:55:16", "throughput": 2503.47, "total_tokens": 254944}
{"current_steps": 670, "total_steps": 22320, "loss": 0.5005, "lr": 1.4986559139784947e-05, "epoch": 0.600358422939068, "percentage": 3.0, "elapsed_time": "0:01:42", "remaining_time": "0:55:14", "throughput": 2502.73, "total_tokens": 256736}
{"current_steps": 675, "total_steps": 22320, "loss": 0.7048, "lr": 1.5098566308243728e-05, "epoch": 0.6048387096774194, "percentage": 3.02, "elapsed_time": "0:01:43", "remaining_time": "0:55:13", "throughput": 2501.96, "total_tokens": 258528}
{"current_steps": 680, "total_steps": 22320, "loss": 0.6209, "lr": 1.5210573476702512e-05, "epoch": 0.6093189964157706, "percentage": 3.05, "elapsed_time": "0:01:44", "remaining_time": "0:55:12", "throughput": 2501.27, "total_tokens": 260320}
{"current_steps": 685, "total_steps": 22320, "loss": 0.7793, "lr": 1.5322580645161292e-05, "epoch": 0.6137992831541219, "percentage": 3.07, "elapsed_time": "0:01:44", "remaining_time": "0:55:10", "throughput": 2501.5, "total_tokens": 262208}
{"current_steps": 690, "total_steps": 22320, "loss": 0.5574, "lr": 1.5434587813620073e-05, "epoch": 0.6182795698924731, "percentage": 3.09, "elapsed_time": "0:01:45", "remaining_time": "0:55:10", "throughput": 2502.08, "total_tokens": 264224}
{"current_steps": 695, "total_steps": 22320, "loss": 0.281, "lr": 1.5546594982078854e-05, "epoch": 0.6227598566308243, "percentage": 3.11, "elapsed_time": "0:01:46", "remaining_time": "0:55:09", "throughput": 2501.71, "total_tokens": 266112}
{"current_steps": 700, "total_steps": 22320, "loss": 0.7409, "lr": 1.5658602150537635e-05, "epoch": 0.6272401433691757, "percentage": 3.14, "elapsed_time": "0:01:47", "remaining_time": "0:55:08", "throughput": 2502.34, "total_tokens": 268064}
{"current_steps": 705, "total_steps": 22320, "loss": 0.9796, "lr": 1.5770609318996415e-05, "epoch": 0.6317204301075269, "percentage": 3.16, "elapsed_time": "0:01:47", "remaining_time": "0:55:07", "throughput": 2502.94, "total_tokens": 270016}
{"current_steps": 710, "total_steps": 22320, "loss": 0.3543, "lr": 1.58826164874552e-05, "epoch": 0.6362007168458781, "percentage": 3.18, "elapsed_time": "0:01:48", "remaining_time": "0:55:07", "throughput": 2503.02, "total_tokens": 271968}
{"current_steps": 715, "total_steps": 22320, "loss": 0.6082, "lr": 1.599462365591398e-05, "epoch": 0.6406810035842294, "percentage": 3.2, "elapsed_time": "0:01:49", "remaining_time": "0:55:08", "throughput": 2503.53, "total_tokens": 274080}
{"current_steps": 720, "total_steps": 22320, "loss": 0.7931, "lr": 1.610663082437276e-05, "epoch": 0.6451612903225806, "percentage": 3.23, "elapsed_time": "0:01:50", "remaining_time": "0:55:08", "throughput": 2503.35, "total_tokens": 276064}
{"current_steps": 725, "total_steps": 22320, "loss": 0.2323, "lr": 1.621863799283154e-05, "epoch": 0.649641577060932, "percentage": 3.25, "elapsed_time": "0:01:50", "remaining_time": "0:55:06", "throughput": 2503.86, "total_tokens": 277920}
{"current_steps": 730, "total_steps": 22320, "loss": 0.6261, "lr": 1.6330645161290322e-05, "epoch": 0.6541218637992832, "percentage": 3.27, "elapsed_time": "0:01:51", "remaining_time": "0:55:05", "throughput": 2503.19, "total_tokens": 279776}
{"current_steps": 735, "total_steps": 22320, "loss": 0.4431, "lr": 1.6442652329749106e-05, "epoch": 0.6586021505376344, "percentage": 3.29, "elapsed_time": "0:01:52", "remaining_time": "0:55:04", "throughput": 2504.2, "total_tokens": 281760}
{"current_steps": 740, "total_steps": 22320, "loss": 0.4891, "lr": 1.6554659498207887e-05, "epoch": 0.6630824372759857, "percentage": 3.32, "elapsed_time": "0:01:53", "remaining_time": "0:55:02", "throughput": 2504.14, "total_tokens": 283616}
{"current_steps": 745, "total_steps": 22320, "loss": 0.45, "lr": 1.6666666666666667e-05, "epoch": 0.6675627240143369, "percentage": 3.34, "elapsed_time": "0:01:54", "remaining_time": "0:55:02", "throughput": 2504.43, "total_tokens": 285600}
{"current_steps": 750, "total_steps": 22320, "loss": 0.8572, "lr": 1.6778673835125448e-05, "epoch": 0.6720430107526881, "percentage": 3.36, "elapsed_time": "0:01:54", "remaining_time": "0:55:01", "throughput": 2504.7, "total_tokens": 287488}
{"current_steps": 755, "total_steps": 22320, "loss": 0.3229, "lr": 1.689068100358423e-05, "epoch": 0.6765232974910395, "percentage": 3.38, "elapsed_time": "0:01:55", "remaining_time": "0:54:59", "throughput": 2504.08, "total_tokens": 289280}
{"current_steps": 760, "total_steps": 22320, "loss": 0.548, "lr": 1.700268817204301e-05, "epoch": 0.6810035842293907, "percentage": 3.41, "elapsed_time": "0:01:56", "remaining_time": "0:54:57", "throughput": 2504.31, "total_tokens": 291104}
{"current_steps": 765, "total_steps": 22320, "loss": 0.2596, "lr": 1.7114695340501794e-05, "epoch": 0.6854838709677419, "percentage": 3.43, "elapsed_time": "0:01:57", "remaining_time": "0:54:57", "throughput": 2504.39, "total_tokens": 293056}
{"current_steps": 770, "total_steps": 22320, "loss": 0.016, "lr": 1.7226702508960574e-05, "epoch": 0.6899641577060932, "percentage": 3.45, "elapsed_time": "0:01:57", "remaining_time": "0:54:56", "throughput": 2504.78, "total_tokens": 295040}
{"current_steps": 775, "total_steps": 22320, "loss": 0.4213, "lr": 1.733870967741936e-05, "epoch": 0.6944444444444444, "percentage": 3.47, "elapsed_time": "0:01:58", "remaining_time": "0:54:56", "throughput": 2504.87, "total_tokens": 296992}
{"current_steps": 780, "total_steps": 22320, "loss": 0.9288, "lr": 1.7450716845878136e-05, "epoch": 0.6989247311827957, "percentage": 3.49, "elapsed_time": "0:01:59", "remaining_time": "0:54:54", "throughput": 2504.02, "total_tokens": 298752}
{"current_steps": 785, "total_steps": 22320, "loss": 0.0371, "lr": 1.7562724014336916e-05, "epoch": 0.703405017921147, "percentage": 3.52, "elapsed_time": "0:02:00", "remaining_time": "0:54:54", "throughput": 2503.9, "total_tokens": 300672}
{"current_steps": 790, "total_steps": 22320, "loss": 0.7117, "lr": 1.76747311827957e-05, "epoch": 0.7078853046594982, "percentage": 3.54, "elapsed_time": "0:02:01", "remaining_time": "0:54:58", "throughput": 2499.03, "total_tokens": 302432}
{"current_steps": 795, "total_steps": 22320, "loss": 0.4057, "lr": 1.778673835125448e-05, "epoch": 0.7123655913978495, "percentage": 3.56, "elapsed_time": "0:02:01", "remaining_time": "0:54:57", "throughput": 2498.97, "total_tokens": 304352}
{"current_steps": 800, "total_steps": 22320, "loss": 0.4516, "lr": 1.7898745519713262e-05, "epoch": 0.7168458781362007, "percentage": 3.58, "elapsed_time": "0:02:02", "remaining_time": "0:54:56", "throughput": 2498.71, "total_tokens": 306176}
{"current_steps": 805, "total_steps": 22320, "loss": 0.9836, "lr": 1.8010752688172046e-05, "epoch": 0.7213261648745519, "percentage": 3.61, "elapsed_time": "0:02:03", "remaining_time": "0:54:56", "throughput": 2500.16, "total_tokens": 308384}
{"current_steps": 810, "total_steps": 22320, "loss": 0.3821, "lr": 1.8122759856630827e-05, "epoch": 0.7258064516129032, "percentage": 3.63, "elapsed_time": "0:02:04", "remaining_time": "0:54:56", "throughput": 2500.27, "total_tokens": 310400}
{"current_steps": 815, "total_steps": 22320, "loss": 0.3619, "lr": 1.8234767025089607e-05, "epoch": 0.7302867383512545, "percentage": 3.65, "elapsed_time": "0:02:04", "remaining_time": "0:54:56", "throughput": 2500.37, "total_tokens": 312352}
{"current_steps": 820, "total_steps": 22320, "loss": 0.3926, "lr": 1.8346774193548388e-05, "epoch": 0.7347670250896058, "percentage": 3.67, "elapsed_time": "0:02:05", "remaining_time": "0:54:55", "throughput": 2500.28, "total_tokens": 314272}
{"current_steps": 825, "total_steps": 22320, "loss": 0.3082, "lr": 1.845878136200717e-05, "epoch": 0.739247311827957, "percentage": 3.7, "elapsed_time": "0:02:06", "remaining_time": "0:54:53", "throughput": 2499.28, "total_tokens": 315936}
{"current_steps": 830, "total_steps": 22320, "loss": 0.1763, "lr": 1.8570788530465953e-05, "epoch": 0.7437275985663082, "percentage": 3.72, "elapsed_time": "0:02:07", "remaining_time": "0:54:54", "throughput": 2500.3, "total_tokens": 318144}
{"current_steps": 835, "total_steps": 22320, "loss": 0.5911, "lr": 1.8682795698924733e-05, "epoch": 0.7482078853046595, "percentage": 3.74, "elapsed_time": "0:02:07", "remaining_time": "0:54:53", "throughput": 2500.28, "total_tokens": 320000}
{"current_steps": 840, "total_steps": 22320, "loss": 0.4838, "lr": 1.8794802867383514e-05, "epoch": 0.7526881720430108, "percentage": 3.76, "elapsed_time": "0:02:08", "remaining_time": "0:54:52", "throughput": 2500.16, "total_tokens": 321920}
{"current_steps": 845, "total_steps": 22320, "loss": 0.3201, "lr": 1.8906810035842295e-05, "epoch": 0.757168458781362, "percentage": 3.79, "elapsed_time": "0:02:09", "remaining_time": "0:54:50", "throughput": 2499.62, "total_tokens": 323648}
{"current_steps": 850, "total_steps": 22320, "loss": 0.8224, "lr": 1.9018817204301075e-05, "epoch": 0.7616487455197133, "percentage": 3.81, "elapsed_time": "0:02:10", "remaining_time": "0:54:50", "throughput": 2500.69, "total_tokens": 325792}
{"current_steps": 855, "total_steps": 22320, "loss": 0.1675, "lr": 1.9130824372759856e-05, "epoch": 0.7661290322580645, "percentage": 3.83, "elapsed_time": "0:02:11", "remaining_time": "0:54:49", "throughput": 2500.67, "total_tokens": 327648}
{"current_steps": 860, "total_steps": 22320, "loss": 0.195, "lr": 1.924283154121864e-05, "epoch": 0.7706093189964157, "percentage": 3.85, "elapsed_time": "0:02:11", "remaining_time": "0:54:47", "throughput": 2499.91, "total_tokens": 329344}
{"current_steps": 865, "total_steps": 22320, "loss": 0.5966, "lr": 1.935483870967742e-05, "epoch": 0.775089605734767, "percentage": 3.88, "elapsed_time": "0:02:12", "remaining_time": "0:54:46", "throughput": 2500.32, "total_tokens": 331264}
{"current_steps": 870, "total_steps": 22320, "loss": 0.5672, "lr": 1.94668458781362e-05, "epoch": 0.7795698924731183, "percentage": 3.9, "elapsed_time": "0:02:13", "remaining_time": "0:54:46", "throughput": 2500.68, "total_tokens": 333312}
{"current_steps": 875, "total_steps": 22320, "loss": 0.2481, "lr": 1.9578853046594982e-05, "epoch": 0.7840501792114696, "percentage": 3.92, "elapsed_time": "0:02:14", "remaining_time": "0:54:44", "throughput": 2500.62, "total_tokens": 335168}
{"current_steps": 880, "total_steps": 22320, "loss": 0.671, "lr": 1.9690860215053763e-05, "epoch": 0.7885304659498208, "percentage": 3.94, "elapsed_time": "0:02:14", "remaining_time": "0:54:44", "throughput": 2500.02, "total_tokens": 337024}
{"current_steps": 885, "total_steps": 22320, "loss": 0.4557, "lr": 1.9802867383512547e-05, "epoch": 0.793010752688172, "percentage": 3.97, "elapsed_time": "0:02:15", "remaining_time": "0:54:43", "throughput": 2499.9, "total_tokens": 338944}
{"current_steps": 890, "total_steps": 22320, "loss": 0.5454, "lr": 1.9914874551971328e-05, "epoch": 0.7974910394265233, "percentage": 3.99, "elapsed_time": "0:02:16", "remaining_time": "0:54:41", "throughput": 2499.61, "total_tokens": 340704}
{"current_steps": 895, "total_steps": 22320, "loss": 0.7283, "lr": 2.002688172043011e-05, "epoch": 0.8019713261648745, "percentage": 4.01, "elapsed_time": "0:02:17", "remaining_time": "0:54:41", "throughput": 2499.31, "total_tokens": 342592}
{"current_steps": 900, "total_steps": 22320, "loss": 0.7909, "lr": 2.013888888888889e-05, "epoch": 0.8064516129032258, "percentage": 4.03, "elapsed_time": "0:02:17", "remaining_time": "0:54:40", "throughput": 2499.47, "total_tokens": 344480}
{"current_steps": 905, "total_steps": 22320, "loss": 0.187, "lr": 2.025089605734767e-05, "epoch": 0.8109318996415771, "percentage": 4.05, "elapsed_time": "0:02:18", "remaining_time": "0:54:39", "throughput": 2499.79, "total_tokens": 346400}
{"current_steps": 910, "total_steps": 22320, "loss": 0.603, "lr": 2.0362903225806454e-05, "epoch": 0.8154121863799283, "percentage": 4.08, "elapsed_time": "0:02:19", "remaining_time": "0:54:38", "throughput": 2500.12, "total_tokens": 348384}
{"current_steps": 915, "total_steps": 22320, "loss": 0.5976, "lr": 2.0474910394265234e-05, "epoch": 0.8198924731182796, "percentage": 4.1, "elapsed_time": "0:02:20", "remaining_time": "0:54:37", "throughput": 2499.88, "total_tokens": 350208}
{"current_steps": 920, "total_steps": 22320, "loss": 0.346, "lr": 2.0586917562724015e-05, "epoch": 0.8243727598566308, "percentage": 4.12, "elapsed_time": "0:02:20", "remaining_time": "0:54:36", "throughput": 2500.01, "total_tokens": 352096}
{"current_steps": 925, "total_steps": 22320, "loss": 0.165, "lr": 2.06989247311828e-05, "epoch": 0.828853046594982, "percentage": 4.14, "elapsed_time": "0:02:21", "remaining_time": "0:54:34", "throughput": 2499.98, "total_tokens": 353952}
{"current_steps": 930, "total_steps": 22320, "loss": 0.3194, "lr": 2.0810931899641577e-05, "epoch": 0.8333333333333334, "percentage": 4.17, "elapsed_time": "0:02:22", "remaining_time": "0:54:34", "throughput": 2500.34, "total_tokens": 355936}
{"current_steps": 935, "total_steps": 22320, "loss": 0.3586, "lr": 2.0922939068100357e-05, "epoch": 0.8378136200716846, "percentage": 4.19, "elapsed_time": "0:02:23", "remaining_time": "0:54:33", "throughput": 2500.67, "total_tokens": 357920}
{"current_steps": 940, "total_steps": 22320, "loss": 0.4995, "lr": 2.103494623655914e-05, "epoch": 0.8422939068100358, "percentage": 4.21, "elapsed_time": "0:02:23", "remaining_time": "0:54:31", "throughput": 2500.19, "total_tokens": 359648}
{"current_steps": 945, "total_steps": 22320, "loss": 0.6559, "lr": 2.1146953405017922e-05, "epoch": 0.8467741935483871, "percentage": 4.23, "elapsed_time": "0:02:24", "remaining_time": "0:54:30", "throughput": 2500.13, "total_tokens": 361504}
{"current_steps": 950, "total_steps": 22320, "loss": 0.6549, "lr": 2.1258960573476703e-05, "epoch": 0.8512544802867383, "percentage": 4.26, "elapsed_time": "0:02:25", "remaining_time": "0:54:30", "throughput": 2500.18, "total_tokens": 363520}
{"current_steps": 955, "total_steps": 22320, "loss": 0.3485, "lr": 2.1370967741935487e-05, "epoch": 0.8557347670250897, "percentage": 4.28, "elapsed_time": "0:02:26", "remaining_time": "0:54:29", "throughput": 2499.76, "total_tokens": 365312}
{"current_steps": 960, "total_steps": 22320, "loss": 0.4881, "lr": 2.1482974910394264e-05, "epoch": 0.8602150537634409, "percentage": 4.3, "elapsed_time": "0:02:26", "remaining_time": "0:54:28", "throughput": 2499.88, "total_tokens": 367200}
{"current_steps": 965, "total_steps": 22320, "loss": 0.3639, "lr": 2.1594982078853048e-05, "epoch": 0.8646953405017921, "percentage": 4.32, "elapsed_time": "0:02:27", "remaining_time": "0:54:28", "throughput": 2500.03, "total_tokens": 369216}
{"current_steps": 970, "total_steps": 22320, "loss": 0.3529, "lr": 2.170698924731183e-05, "epoch": 0.8691756272401434, "percentage": 4.35, "elapsed_time": "0:02:28", "remaining_time": "0:54:27", "throughput": 2499.77, "total_tokens": 371104}
{"current_steps": 975, "total_steps": 22320, "loss": 0.3365, "lr": 2.181899641577061e-05, "epoch": 0.8736559139784946, "percentage": 4.37, "elapsed_time": "0:02:29", "remaining_time": "0:54:25", "throughput": 2499.36, "total_tokens": 372832}
{"current_steps": 980, "total_steps": 22320, "loss": 0.5711, "lr": 2.1931003584229394e-05, "epoch": 0.8781362007168458, "percentage": 4.39, "elapsed_time": "0:02:29", "remaining_time": "0:54:24", "throughput": 2498.83, "total_tokens": 374624}
{"current_steps": 985, "total_steps": 22320, "loss": 0.6758, "lr": 2.2043010752688174e-05, "epoch": 0.8826164874551972, "percentage": 4.41, "elapsed_time": "0:02:30", "remaining_time": "0:54:22", "throughput": 2498.29, "total_tokens": 376352}
{"current_steps": 990, "total_steps": 22320, "loss": 0.3817, "lr": 2.2155017921146955e-05, "epoch": 0.8870967741935484, "percentage": 4.44, "elapsed_time": "0:02:31", "remaining_time": "0:54:21", "throughput": 2498.49, "total_tokens": 378240}
{"current_steps": 995, "total_steps": 22320, "loss": 0.6357, "lr": 2.2267025089605736e-05, "epoch": 0.8915770609318996, "percentage": 4.46, "elapsed_time": "0:02:32", "remaining_time": "0:54:21", "throughput": 2498.64, "total_tokens": 380192}
{"current_steps": 1000, "total_steps": 22320, "loss": 0.2808, "lr": 2.2379032258064516e-05, "epoch": 0.8960573476702509, "percentage": 4.48, "elapsed_time": "0:02:32", "remaining_time": "0:54:19", "throughput": 2498.59, "total_tokens": 382048}
{"current_steps": 1005, "total_steps": 22320, "loss": 0.3951, "lr": 2.24910394265233e-05, "epoch": 0.9005376344086021, "percentage": 4.5, "elapsed_time": "0:02:33", "remaining_time": "0:54:18", "throughput": 2498.74, "total_tokens": 383936}
{"current_steps": 1010, "total_steps": 22320, "loss": 0.4673, "lr": 2.260304659498208e-05, "epoch": 0.9050179211469535, "percentage": 4.53, "elapsed_time": "0:02:34", "remaining_time": "0:54:17", "throughput": 2499.05, "total_tokens": 385856}
{"current_steps": 1015, "total_steps": 22320, "loss": 0.1285, "lr": 2.271505376344086e-05, "epoch": 0.9094982078853047, "percentage": 4.55, "elapsed_time": "0:02:35", "remaining_time": "0:54:17", "throughput": 2499.4, "total_tokens": 387840}
{"current_steps": 1020, "total_steps": 22320, "loss": 0.194, "lr": 2.2827060931899642e-05, "epoch": 0.9139784946236559, "percentage": 4.57, "elapsed_time": "0:02:35", "remaining_time": "0:54:15", "throughput": 2499.41, "total_tokens": 389696}
{"current_steps": 1025, "total_steps": 22320, "loss": 0.3775, "lr": 2.2939068100358423e-05, "epoch": 0.9184587813620072, "percentage": 4.59, "elapsed_time": "0:02:36", "remaining_time": "0:54:15", "throughput": 2499.55, "total_tokens": 391648}
{"current_steps": 1030, "total_steps": 22320, "loss": 0.3799, "lr": 2.3051075268817204e-05, "epoch": 0.9229390681003584, "percentage": 4.61, "elapsed_time": "0:02:37", "remaining_time": "0:54:14", "throughput": 2500.23, "total_tokens": 393632}
{"current_steps": 1035, "total_steps": 22320, "loss": 0.2621, "lr": 2.3163082437275988e-05, "epoch": 0.9274193548387096, "percentage": 4.64, "elapsed_time": "0:02:38", "remaining_time": "0:54:12", "throughput": 2500.58, "total_tokens": 395488}
{"current_steps": 1040, "total_steps": 22320, "loss": 0.2157, "lr": 2.327508960573477e-05, "epoch": 0.931899641577061, "percentage": 4.66, "elapsed_time": "0:02:38", "remaining_time": "0:54:12", "throughput": 2501.02, "total_tokens": 397504}
{"current_steps": 1045, "total_steps": 22320, "loss": 0.4109, "lr": 2.338709677419355e-05, "epoch": 0.9363799283154122, "percentage": 4.68, "elapsed_time": "0:02:39", "remaining_time": "0:54:10", "throughput": 2500.79, "total_tokens": 399328}
{"current_steps": 1050, "total_steps": 22320, "loss": 0.2281, "lr": 2.349910394265233e-05, "epoch": 0.9408602150537635, "percentage": 4.7, "elapsed_time": "0:02:40", "remaining_time": "0:54:09", "throughput": 2500.78, "total_tokens": 401184}
{"current_steps": 1055, "total_steps": 22320, "loss": 0.5954, "lr": 2.361111111111111e-05, "epoch": 0.9453405017921147, "percentage": 4.73, "elapsed_time": "0:02:41", "remaining_time": "0:54:07", "throughput": 2500.78, "total_tokens": 402976}
{"current_steps": 1060, "total_steps": 22320, "loss": 0.6111, "lr": 2.3723118279569895e-05, "epoch": 0.9498207885304659, "percentage": 4.75, "elapsed_time": "0:02:41", "remaining_time": "0:54:06", "throughput": 2500.53, "total_tokens": 404800}
{"current_steps": 1065, "total_steps": 22320, "loss": 0.6238, "lr": 2.3835125448028675e-05, "epoch": 0.9543010752688172, "percentage": 4.77, "elapsed_time": "0:02:42", "remaining_time": "0:54:06", "throughput": 2501.19, "total_tokens": 406848}
{"current_steps": 1070, "total_steps": 22320, "loss": 0.2241, "lr": 2.3947132616487456e-05, "epoch": 0.9587813620071685, "percentage": 4.79, "elapsed_time": "0:02:43", "remaining_time": "0:54:04", "throughput": 2501.43, "total_tokens": 408672}
{"current_steps": 1075, "total_steps": 22320, "loss": 0.198, "lr": 2.405913978494624e-05, "epoch": 0.9632616487455197, "percentage": 4.82, "elapsed_time": "0:02:44", "remaining_time": "0:54:03", "throughput": 2501.42, "total_tokens": 410528}
{"current_steps": 1080, "total_steps": 22320, "loss": 0.0783, "lr": 2.4171146953405017e-05, "epoch": 0.967741935483871, "percentage": 4.84, "elapsed_time": "0:02:44", "remaining_time": "0:54:03", "throughput": 2501.84, "total_tokens": 412608}
{"current_steps": 1085, "total_steps": 22320, "loss": 0.2988, "lr": 2.4283154121863798e-05, "epoch": 0.9722222222222222, "percentage": 4.86, "elapsed_time": "0:02:45", "remaining_time": "0:54:02", "throughput": 2502.1, "total_tokens": 414592}
{"current_steps": 1090, "total_steps": 22320, "loss": 0.3516, "lr": 2.4395161290322582e-05, "epoch": 0.9767025089605734, "percentage": 4.88, "elapsed_time": "0:02:46", "remaining_time": "0:54:01", "throughput": 2501.64, "total_tokens": 416384}
{"current_steps": 1095, "total_steps": 22320, "loss": 0.4137, "lr": 2.4507168458781363e-05, "epoch": 0.9811827956989247, "percentage": 4.91, "elapsed_time": "0:02:47", "remaining_time": "0:54:01", "throughput": 2502.43, "total_tokens": 418464}
{"current_steps": 1100, "total_steps": 22320, "loss": 0.2417, "lr": 2.4619175627240147e-05, "epoch": 0.985663082437276, "percentage": 4.93, "elapsed_time": "0:02:47", "remaining_time": "0:54:00", "throughput": 2502.0, "total_tokens": 420320}
{"current_steps": 1105, "total_steps": 22320, "loss": 0.3452, "lr": 2.4731182795698928e-05, "epoch": 0.9901433691756273, "percentage": 4.95, "elapsed_time": "0:02:48", "remaining_time": "0:53:59", "throughput": 2502.0, "total_tokens": 422112}
{"current_steps": 1110, "total_steps": 22320, "loss": 0.1934, "lr": 2.4843189964157705e-05, "epoch": 0.9946236559139785, "percentage": 4.97, "elapsed_time": "0:02:49", "remaining_time": "0:53:57", "throughput": 2501.04, "total_tokens": 423744}
{"current_steps": 1115, "total_steps": 22320, "loss": 0.2553, "lr": 2.495519713261649e-05, "epoch": 0.9991039426523297, "percentage": 5.0, "elapsed_time": "0:02:50", "remaining_time": "0:53:56", "throughput": 2500.68, "total_tokens": 425536}
{"current_steps": 1116, "total_steps": 22320, "eval_loss": 0.39862996339797974, "epoch": 1.0, "percentage": 5.0, "elapsed_time": "0:02:56", "remaining_time": "0:55:46", "throughput": 2416.22, "total_tokens": 425624}
{"current_steps": 1120, "total_steps": 22320, "loss": 0.5559, "lr": 2.506720430107527e-05, "epoch": 1.003584229390681, "percentage": 5.02, "elapsed_time": "0:02:57", "remaining_time": "0:56:02", "throughput": 2404.47, "total_tokens": 427096}
{"current_steps": 1125, "total_steps": 22320, "loss": 0.475, "lr": 2.5179211469534054e-05, "epoch": 1.0080645161290323, "percentage": 5.04, "elapsed_time": "0:02:58", "remaining_time": "0:56:00", "throughput": 2404.55, "total_tokens": 428952}
{"current_steps": 1130, "total_steps": 22320, "loss": 0.568, "lr": 2.529121863799283e-05, "epoch": 1.0125448028673836, "percentage": 5.06, "elapsed_time": "0:02:59", "remaining_time": "0:56:00", "throughput": 2405.36, "total_tokens": 431096}
{"current_steps": 1135, "total_steps": 22320, "loss": 0.2781, "lr": 2.5403225806451615e-05, "epoch": 1.0170250896057347, "percentage": 5.09, "elapsed_time": "0:02:59", "remaining_time": "0:55:59", "throughput": 2404.75, "total_tokens": 432824}
{"current_steps": 1140, "total_steps": 22320, "loss": 0.1999, "lr": 2.5515232974910396e-05, "epoch": 1.021505376344086, "percentage": 5.11, "elapsed_time": "0:03:00", "remaining_time": "0:55:58", "throughput": 2405.09, "total_tokens": 434744}
{"current_steps": 1145, "total_steps": 22320, "loss": 0.5623, "lr": 2.5627240143369173e-05, "epoch": 1.0259856630824373, "percentage": 5.13, "elapsed_time": "0:03:01", "remaining_time": "0:55:57", "throughput": 2405.53, "total_tokens": 436728}
{"current_steps": 1150, "total_steps": 22320, "loss": 0.2475, "lr": 2.5739247311827957e-05, "epoch": 1.0304659498207884, "percentage": 5.15, "elapsed_time": "0:03:02", "remaining_time": "0:55:56", "throughput": 2405.78, "total_tokens": 438616}
{"current_steps": 1155, "total_steps": 22320, "loss": 0.1974, "lr": 2.585125448028674e-05, "epoch": 1.0349462365591398, "percentage": 5.17, "elapsed_time": "0:03:03", "remaining_time": "0:55:55", "throughput": 2406.02, "total_tokens": 440536}
{"current_steps": 1160, "total_steps": 22320, "loss": 0.2639, "lr": 2.596326164874552e-05, "epoch": 1.039426523297491, "percentage": 5.2, "elapsed_time": "0:03:03", "remaining_time": "0:55:53", "throughput": 2406.38, "total_tokens": 442360}
{"current_steps": 1165, "total_steps": 22320, "loss": 0.3875, "lr": 2.6075268817204303e-05, "epoch": 1.0439068100358422, "percentage": 5.22, "elapsed_time": "0:03:04", "remaining_time": "0:55:52", "throughput": 2406.99, "total_tokens": 444344}
{"current_steps": 1170, "total_steps": 22320, "loss": 0.3316, "lr": 2.6187275985663083e-05, "epoch": 1.0483870967741935, "percentage": 5.24, "elapsed_time": "0:03:05", "remaining_time": "0:55:51", "throughput": 2407.24, "total_tokens": 446328}
{"current_steps": 1175, "total_steps": 22320, "loss": 0.4495, "lr": 2.6299283154121867e-05, "epoch": 1.0528673835125448, "percentage": 5.26, "elapsed_time": "0:03:06", "remaining_time": "0:55:51", "throughput": 2408.14, "total_tokens": 448504}
{"current_steps": 1180, "total_steps": 22320, "loss": 0.2988, "lr": 2.6411290322580645e-05, "epoch": 1.0573476702508962, "percentage": 5.29, "elapsed_time": "0:03:07", "remaining_time": "0:55:50", "throughput": 2408.16, "total_tokens": 450392}
{"current_steps": 1185, "total_steps": 22320, "loss": 0.388, "lr": 2.652329749103943e-05, "epoch": 1.0618279569892473, "percentage": 5.31, "elapsed_time": "0:03:07", "remaining_time": "0:55:49", "throughput": 2408.73, "total_tokens": 452376}
{"current_steps": 1190, "total_steps": 22320, "loss": 0.4558, "lr": 2.6635304659498213e-05, "epoch": 1.0663082437275986, "percentage": 5.33, "elapsed_time": "0:03:08", "remaining_time": "0:55:47", "throughput": 2408.43, "total_tokens": 454072}
{"current_steps": 1195, "total_steps": 22320, "loss": 0.4358, "lr": 2.674731182795699e-05, "epoch": 1.07078853046595, "percentage": 5.35, "elapsed_time": "0:03:09", "remaining_time": "0:55:45", "throughput": 2408.9, "total_tokens": 455928}
{"current_steps": 1200, "total_steps": 22320, "loss": 0.7141, "lr": 2.685931899641577e-05, "epoch": 1.075268817204301, "percentage": 5.38, "elapsed_time": "0:03:10", "remaining_time": "0:55:44", "throughput": 2409.38, "total_tokens": 457848}
{"current_steps": 1205, "total_steps": 22320, "loss": 0.4969, "lr": 2.6971326164874555e-05, "epoch": 1.0797491039426523, "percentage": 5.4, "elapsed_time": "0:03:10", "remaining_time": "0:55:43", "throughput": 2409.71, "total_tokens": 459800}
{"current_steps": 1210, "total_steps": 22320, "loss": 0.7206, "lr": 2.7083333333333332e-05, "epoch": 1.0842293906810037, "percentage": 5.42, "elapsed_time": "0:03:11", "remaining_time": "0:55:41", "throughput": 2409.84, "total_tokens": 461592}
{"current_steps": 1215, "total_steps": 22320, "loss": 0.372, "lr": 2.7195340501792116e-05, "epoch": 1.0887096774193548, "percentage": 5.44, "elapsed_time": "0:03:12", "remaining_time": "0:55:41", "throughput": 2409.16, "total_tokens": 463448}
{"current_steps": 1220, "total_steps": 22320, "loss": 0.4173, "lr": 2.73073476702509e-05, "epoch": 1.093189964157706, "percentage": 5.47, "elapsed_time": "0:03:13", "remaining_time": "0:55:40", "throughput": 2409.09, "total_tokens": 465240}