train_codealpacapy_1756727022 / trainer_log.jsonl
rbelanec's picture
Training in progress, step 1908
7bf79ca verified
{"current_steps": 5, "total_steps": 38150, "loss": 6.7972, "lr": 5.2424639580602885e-08, "epoch": 0.001310615989515072, "percentage": 0.01, "elapsed_time": "0:00:01", "remaining_time": "3:03:37", "throughput": 819.81, "total_tokens": 1184}
{"current_steps": 10, "total_steps": 38150, "loss": 6.4561, "lr": 1.1795543905635651e-07, "epoch": 0.002621231979030144, "percentage": 0.03, "elapsed_time": "0:00:02", "remaining_time": "2:12:29", "throughput": 1174.52, "total_tokens": 2448}
{"current_steps": 15, "total_steps": 38150, "loss": 6.3346, "lr": 1.8348623853211012e-07, "epoch": 0.003931847968545216, "percentage": 0.04, "elapsed_time": "0:00:02", "remaining_time": "1:55:44", "throughput": 1511.15, "total_tokens": 4128}
{"current_steps": 20, "total_steps": 38150, "loss": 6.3685, "lr": 2.490170380078637e-07, "epoch": 0.005242463958060288, "percentage": 0.05, "elapsed_time": "0:00:03", "remaining_time": "1:47:10", "throughput": 1679.35, "total_tokens": 5664}
{"current_steps": 25, "total_steps": 38150, "loss": 6.9961, "lr": 3.1454783748361734e-07, "epoch": 0.00655307994757536, "percentage": 0.07, "elapsed_time": "0:00:04", "remaining_time": "1:41:53", "throughput": 1756.14, "total_tokens": 7040}
{"current_steps": 30, "total_steps": 38150, "loss": 6.2504, "lr": 3.8007863695937093e-07, "epoch": 0.007863695937090432, "percentage": 0.08, "elapsed_time": "0:00:04", "remaining_time": "1:37:28", "throughput": 1814.57, "total_tokens": 8352}
{"current_steps": 35, "total_steps": 38150, "loss": 6.3378, "lr": 4.4560943643512453e-07, "epoch": 0.009174311926605505, "percentage": 0.09, "elapsed_time": "0:00:05", "remaining_time": "1:31:41", "throughput": 1916.09, "total_tokens": 9680}
{"current_steps": 40, "total_steps": 38150, "loss": 6.0318, "lr": 5.111402359108782e-07, "epoch": 0.010484927916120577, "percentage": 0.1, "elapsed_time": "0:00:05", "remaining_time": "1:28:11", "throughput": 2057.11, "total_tokens": 11424}
{"current_steps": 45, "total_steps": 38150, "loss": 6.1302, "lr": 5.766710353866317e-07, "epoch": 0.011795543905635648, "percentage": 0.12, "elapsed_time": "0:00:05", "remaining_time": "1:24:26", "throughput": 2096.57, "total_tokens": 12544}
{"current_steps": 50, "total_steps": 38150, "loss": 5.9295, "lr": 6.422018348623854e-07, "epoch": 0.01310615989515072, "percentage": 0.13, "elapsed_time": "0:00:06", "remaining_time": "1:22:23", "throughput": 2175.28, "total_tokens": 14112}
{"current_steps": 55, "total_steps": 38150, "loss": 6.1965, "lr": 7.07732634338139e-07, "epoch": 0.014416775884665793, "percentage": 0.14, "elapsed_time": "0:00:06", "remaining_time": "1:20:04", "throughput": 2195.95, "total_tokens": 15232}
{"current_steps": 60, "total_steps": 38150, "loss": 5.5455, "lr": 7.732634338138926e-07, "epoch": 0.015727391874180863, "percentage": 0.16, "elapsed_time": "0:00:07", "remaining_time": "1:18:38", "throughput": 2262.62, "total_tokens": 16816}
{"current_steps": 65, "total_steps": 38150, "loss": 5.168, "lr": 8.387942332896462e-07, "epoch": 0.01703800786369594, "percentage": 0.17, "elapsed_time": "0:00:07", "remaining_time": "1:16:52", "throughput": 2256.12, "total_tokens": 17760}
{"current_steps": 70, "total_steps": 38150, "loss": 4.9755, "lr": 9.043250327653998e-07, "epoch": 0.01834862385321101, "percentage": 0.18, "elapsed_time": "0:00:08", "remaining_time": "1:15:24", "throughput": 2281.53, "total_tokens": 18976}
{"current_steps": 75, "total_steps": 38150, "loss": 4.9763, "lr": 9.698558322411533e-07, "epoch": 0.019659239842726082, "percentage": 0.2, "elapsed_time": "0:00:08", "remaining_time": "1:14:23", "throughput": 2316.84, "total_tokens": 20368}
{"current_steps": 80, "total_steps": 38150, "loss": 4.647, "lr": 1.035386631716907e-06, "epoch": 0.020969855832241154, "percentage": 0.21, "elapsed_time": "0:00:09", "remaining_time": "1:13:13", "throughput": 2323.77, "total_tokens": 21456}
{"current_steps": 85, "total_steps": 38150, "loss": 4.7302, "lr": 1.1009174311926608e-06, "epoch": 0.022280471821756225, "percentage": 0.22, "elapsed_time": "0:00:09", "remaining_time": "1:12:14", "throughput": 2334.27, "total_tokens": 22592}
{"current_steps": 90, "total_steps": 38150, "loss": 4.7732, "lr": 1.1664482306684142e-06, "epoch": 0.023591087811271297, "percentage": 0.24, "elapsed_time": "0:00:10", "remaining_time": "1:11:18", "throughput": 2345.15, "total_tokens": 23728}
{"current_steps": 95, "total_steps": 38150, "loss": 4.1705, "lr": 1.2319790301441677e-06, "epoch": 0.02490170380078637, "percentage": 0.25, "elapsed_time": "0:00:10", "remaining_time": "1:10:29", "throughput": 2354.83, "total_tokens": 24864}
{"current_steps": 100, "total_steps": 38150, "loss": 4.3175, "lr": 1.2975098296199214e-06, "epoch": 0.02621231979030144, "percentage": 0.26, "elapsed_time": "0:00:11", "remaining_time": "1:09:51", "throughput": 2385.08, "total_tokens": 26272}
{"current_steps": 105, "total_steps": 38150, "loss": 4.3562, "lr": 1.363040629095675e-06, "epoch": 0.027522935779816515, "percentage": 0.28, "elapsed_time": "0:00:11", "remaining_time": "1:09:09", "throughput": 2379.34, "total_tokens": 27248}
{"current_steps": 110, "total_steps": 38150, "loss": 3.3354, "lr": 1.4285714285714286e-06, "epoch": 0.028833551769331587, "percentage": 0.29, "elapsed_time": "0:00:11", "remaining_time": "1:08:48", "throughput": 2403.25, "total_tokens": 28688}
{"current_steps": 115, "total_steps": 38150, "loss": 4.377, "lr": 1.4941022280471821e-06, "epoch": 0.03014416775884666, "percentage": 0.3, "elapsed_time": "0:00:12", "remaining_time": "1:08:16", "throughput": 2405.33, "total_tokens": 29792}
{"current_steps": 120, "total_steps": 38150, "loss": 3.5164, "lr": 1.559633027522936e-06, "epoch": 0.03145478374836173, "percentage": 0.31, "elapsed_time": "0:00:12", "remaining_time": "1:08:08", "throughput": 2435.87, "total_tokens": 31424}
{"current_steps": 125, "total_steps": 38150, "loss": 3.867, "lr": 1.6251638269986893e-06, "epoch": 0.0327653997378768, "percentage": 0.33, "elapsed_time": "0:00:13", "remaining_time": "1:07:35", "throughput": 2428.91, "total_tokens": 32384}
{"current_steps": 130, "total_steps": 38150, "loss": 3.8949, "lr": 1.690694626474443e-06, "epoch": 0.03407601572739188, "percentage": 0.34, "elapsed_time": "0:00:13", "remaining_time": "1:07:13", "throughput": 2451.61, "total_tokens": 33808}
{"current_steps": 135, "total_steps": 38150, "loss": 3.641, "lr": 1.7562254259501965e-06, "epoch": 0.035386631716906945, "percentage": 0.35, "elapsed_time": "0:00:14", "remaining_time": "1:06:46", "throughput": 2453.99, "total_tokens": 34912}
{"current_steps": 140, "total_steps": 38150, "loss": 3.5177, "lr": 1.8217562254259502e-06, "epoch": 0.03669724770642202, "percentage": 0.37, "elapsed_time": "0:00:14", "remaining_time": "1:06:36", "throughput": 2485.05, "total_tokens": 36576}
{"current_steps": 145, "total_steps": 38150, "loss": 3.6426, "lr": 1.8872870249017041e-06, "epoch": 0.03800786369593709, "percentage": 0.38, "elapsed_time": "0:00:15", "remaining_time": "1:06:14", "throughput": 2484.07, "total_tokens": 37664}
{"current_steps": 150, "total_steps": 38150, "loss": 3.6373, "lr": 1.9528178243774574e-06, "epoch": 0.039318479685452164, "percentage": 0.39, "elapsed_time": "0:00:15", "remaining_time": "1:05:52", "throughput": 2488.06, "total_tokens": 38816}
{"current_steps": 155, "total_steps": 38150, "loss": 3.2506, "lr": 2.0183486238532113e-06, "epoch": 0.04062909567496723, "percentage": 0.41, "elapsed_time": "0:00:16", "remaining_time": "1:05:44", "throughput": 2502.52, "total_tokens": 40272}
{"current_steps": 160, "total_steps": 38150, "loss": 3.3949, "lr": 2.083879423328965e-06, "epoch": 0.04193971166448231, "percentage": 0.42, "elapsed_time": "0:00:16", "remaining_time": "1:05:26", "throughput": 2502.97, "total_tokens": 41392}
{"current_steps": 165, "total_steps": 38150, "loss": 3.6481, "lr": 2.1494102228047183e-06, "epoch": 0.04325032765399738, "percentage": 0.43, "elapsed_time": "0:00:17", "remaining_time": "1:05:20", "throughput": 2533.71, "total_tokens": 43152}
{"current_steps": 170, "total_steps": 38150, "loss": 3.3615, "lr": 2.2149410222804718e-06, "epoch": 0.04456094364351245, "percentage": 0.45, "elapsed_time": "0:00:17", "remaining_time": "1:05:27", "throughput": 2559.35, "total_tokens": 44992}
{"current_steps": 175, "total_steps": 38150, "loss": 3.0702, "lr": 2.2804718217562257e-06, "epoch": 0.045871559633027525, "percentage": 0.46, "elapsed_time": "0:00:18", "remaining_time": "1:05:10", "throughput": 2563.52, "total_tokens": 46192}
{"current_steps": 180, "total_steps": 38150, "loss": 3.4623, "lr": 2.346002621231979e-06, "epoch": 0.047182175622542594, "percentage": 0.47, "elapsed_time": "0:00:18", "remaining_time": "1:04:52", "throughput": 2558.47, "total_tokens": 47216}
{"current_steps": 185, "total_steps": 38150, "loss": 2.6485, "lr": 2.4115334207077327e-06, "epoch": 0.04849279161205767, "percentage": 0.48, "elapsed_time": "0:00:18", "remaining_time": "1:04:52", "throughput": 2580.04, "total_tokens": 48944}
{"current_steps": 190, "total_steps": 38150, "loss": 2.7591, "lr": 2.4770642201834866e-06, "epoch": 0.04980340760157274, "percentage": 0.5, "elapsed_time": "0:00:19", "remaining_time": "1:04:38", "throughput": 2590.23, "total_tokens": 50288}
{"current_steps": 195, "total_steps": 38150, "loss": 2.5822, "lr": 2.54259501965924e-06, "epoch": 0.05111402359108781, "percentage": 0.51, "elapsed_time": "0:00:19", "remaining_time": "1:04:25", "throughput": 2595.57, "total_tokens": 51552}
{"current_steps": 200, "total_steps": 38150, "loss": 3.2761, "lr": 2.6081258191349936e-06, "epoch": 0.05242463958060288, "percentage": 0.52, "elapsed_time": "0:00:20", "remaining_time": "1:04:24", "throughput": 2618.24, "total_tokens": 53328}
{"current_steps": 205, "total_steps": 38150, "loss": 2.5969, "lr": 2.673656618610747e-06, "epoch": 0.053735255570117955, "percentage": 0.54, "elapsed_time": "0:00:20", "remaining_time": "1:04:15", "throughput": 2623.07, "total_tokens": 54640}
{"current_steps": 210, "total_steps": 38150, "loss": 2.7451, "lr": 2.739187418086501e-06, "epoch": 0.05504587155963303, "percentage": 0.55, "elapsed_time": "0:00:21", "remaining_time": "1:04:06", "throughput": 2619.49, "total_tokens": 55776}
{"current_steps": 215, "total_steps": 38150, "loss": 2.5944, "lr": 2.8047182175622545e-06, "epoch": 0.0563564875491481, "percentage": 0.56, "elapsed_time": "0:00:21", "remaining_time": "1:03:57", "throughput": 2630.75, "total_tokens": 57216}
{"current_steps": 220, "total_steps": 38150, "loss": 2.7142, "lr": 2.870249017038008e-06, "epoch": 0.057667103538663174, "percentage": 0.58, "elapsed_time": "0:00:22", "remaining_time": "1:04:16", "throughput": 2653.64, "total_tokens": 59360}
{"current_steps": 225, "total_steps": 38150, "loss": 2.8803, "lr": 2.935779816513762e-06, "epoch": 0.05897771952817824, "percentage": 0.59, "elapsed_time": "0:00:22", "remaining_time": "1:04:02", "throughput": 2648.5, "total_tokens": 60384}
{"current_steps": 230, "total_steps": 38150, "loss": 2.4625, "lr": 3.0013106159895154e-06, "epoch": 0.06028833551769332, "percentage": 0.6, "elapsed_time": "0:00:23", "remaining_time": "1:03:59", "throughput": 2657.56, "total_tokens": 61888}
{"current_steps": 235, "total_steps": 38150, "loss": 2.257, "lr": 3.066841415465269e-06, "epoch": 0.061598951507208385, "percentage": 0.62, "elapsed_time": "0:00:23", "remaining_time": "1:03:46", "throughput": 2648.01, "total_tokens": 62800}
{"current_steps": 240, "total_steps": 38150, "loss": 2.3184, "lr": 3.1323722149410228e-06, "epoch": 0.06290956749672345, "percentage": 0.63, "elapsed_time": "0:00:24", "remaining_time": "1:03:37", "throughput": 2658.95, "total_tokens": 64256}
{"current_steps": 245, "total_steps": 38150, "loss": 2.4645, "lr": 3.1979030144167763e-06, "epoch": 0.06422018348623854, "percentage": 0.64, "elapsed_time": "0:00:24", "remaining_time": "1:03:36", "throughput": 2664.34, "total_tokens": 65728}
{"current_steps": 250, "total_steps": 38150, "loss": 2.6083, "lr": 3.2634338138925293e-06, "epoch": 0.0655307994757536, "percentage": 0.66, "elapsed_time": "0:00:25", "remaining_time": "1:03:27", "throughput": 2664.39, "total_tokens": 66912}
{"current_steps": 255, "total_steps": 38150, "loss": 2.2214, "lr": 3.328964613368283e-06, "epoch": 0.06684141546526867, "percentage": 0.67, "elapsed_time": "0:00:25", "remaining_time": "1:03:15", "throughput": 2658.72, "total_tokens": 67904}
{"current_steps": 260, "total_steps": 38150, "loss": 2.3409, "lr": 3.394495412844037e-06, "epoch": 0.06815203145478375, "percentage": 0.68, "elapsed_time": "0:00:26", "remaining_time": "1:03:09", "throughput": 2671.09, "total_tokens": 69456}
{"current_steps": 265, "total_steps": 38150, "loss": 2.8772, "lr": 3.4600262123197906e-06, "epoch": 0.06946264744429882, "percentage": 0.69, "elapsed_time": "0:00:26", "remaining_time": "1:03:04", "throughput": 2675.47, "total_tokens": 70832}
{"current_steps": 270, "total_steps": 38150, "loss": 2.7472, "lr": 3.5255570117955437e-06, "epoch": 0.07077326343381389, "percentage": 0.71, "elapsed_time": "0:00:26", "remaining_time": "1:03:03", "throughput": 2686.79, "total_tokens": 72448}
{"current_steps": 275, "total_steps": 38150, "loss": 2.505, "lr": 3.591087811271298e-06, "epoch": 0.07208387942332896, "percentage": 0.72, "elapsed_time": "0:00:27", "remaining_time": "1:02:59", "throughput": 2693.48, "total_tokens": 73920}
{"current_steps": 280, "total_steps": 38150, "loss": 2.5397, "lr": 3.6566186107470515e-06, "epoch": 0.07339449541284404, "percentage": 0.73, "elapsed_time": "0:00:27", "remaining_time": "1:02:57", "throughput": 2702.45, "total_tokens": 75472}
{"current_steps": 285, "total_steps": 38150, "loss": 2.4062, "lr": 3.7221494102228046e-06, "epoch": 0.07470511140235911, "percentage": 0.75, "elapsed_time": "0:00:28", "remaining_time": "1:02:53", "throughput": 2709.87, "total_tokens": 76976}
{"current_steps": 290, "total_steps": 38150, "loss": 2.3223, "lr": 3.787680209698558e-06, "epoch": 0.07601572739187418, "percentage": 0.76, "elapsed_time": "0:00:28", "remaining_time": "1:02:47", "throughput": 2719.72, "total_tokens": 78496}
{"current_steps": 295, "total_steps": 38150, "loss": 1.9656, "lr": 3.853211009174312e-06, "epoch": 0.07732634338138926, "percentage": 0.77, "elapsed_time": "0:00:29", "remaining_time": "1:02:45", "throughput": 2727.69, "total_tokens": 80032}
{"current_steps": 300, "total_steps": 38150, "loss": 1.6907, "lr": 3.918741808650066e-06, "epoch": 0.07863695937090433, "percentage": 0.79, "elapsed_time": "0:00:29", "remaining_time": "1:02:35", "throughput": 2724.67, "total_tokens": 81104}
{"current_steps": 305, "total_steps": 38150, "loss": 1.7122, "lr": 3.984272608125819e-06, "epoch": 0.0799475753604194, "percentage": 0.8, "elapsed_time": "0:00:30", "remaining_time": "1:02:27", "throughput": 2722.56, "total_tokens": 82224}
{"current_steps": 310, "total_steps": 38150, "loss": 1.7153, "lr": 4.049803407601573e-06, "epoch": 0.08125819134993446, "percentage": 0.81, "elapsed_time": "0:00:30", "remaining_time": "1:02:34", "throughput": 2735.81, "total_tokens": 84160}
{"current_steps": 315, "total_steps": 38150, "loss": 2.0073, "lr": 4.115334207077327e-06, "epoch": 0.08256880733944955, "percentage": 0.83, "elapsed_time": "0:00:31", "remaining_time": "1:02:28", "throughput": 2739.27, "total_tokens": 85488}
{"current_steps": 320, "total_steps": 38150, "loss": 2.2041, "lr": 4.18086500655308e-06, "epoch": 0.08387942332896461, "percentage": 0.84, "elapsed_time": "0:00:31", "remaining_time": "1:02:22", "throughput": 2740.95, "total_tokens": 86768}
{"current_steps": 325, "total_steps": 38150, "loss": 1.6712, "lr": 4.246395806028834e-06, "epoch": 0.08519003931847968, "percentage": 0.85, "elapsed_time": "0:00:32", "remaining_time": "1:02:14", "throughput": 2739.64, "total_tokens": 87920}
{"current_steps": 330, "total_steps": 38150, "loss": 2.5755, "lr": 4.311926605504588e-06, "epoch": 0.08650065530799476, "percentage": 0.87, "elapsed_time": "0:00:32", "remaining_time": "1:02:11", "throughput": 2737.61, "total_tokens": 89136}
{"current_steps": 335, "total_steps": 38150, "loss": 1.9194, "lr": 4.377457404980341e-06, "epoch": 0.08781127129750983, "percentage": 0.88, "elapsed_time": "0:00:33", "remaining_time": "1:02:05", "throughput": 2741.24, "total_tokens": 90464}
{"current_steps": 340, "total_steps": 38150, "loss": 1.8488, "lr": 4.442988204456095e-06, "epoch": 0.0891218872870249, "percentage": 0.89, "elapsed_time": "0:00:33", "remaining_time": "1:02:00", "throughput": 2749.2, "total_tokens": 91984}
{"current_steps": 345, "total_steps": 38150, "loss": 1.3468, "lr": 4.508519003931848e-06, "epoch": 0.09043250327653997, "percentage": 0.9, "elapsed_time": "0:00:33", "remaining_time": "1:01:52", "throughput": 2745.65, "total_tokens": 93024}
{"current_steps": 350, "total_steps": 38150, "loss": 1.5066, "lr": 4.574049803407602e-06, "epoch": 0.09174311926605505, "percentage": 0.92, "elapsed_time": "0:00:34", "remaining_time": "1:01:51", "throughput": 2749.13, "total_tokens": 94464}
{"current_steps": 355, "total_steps": 38150, "loss": 1.7812, "lr": 4.639580602883356e-06, "epoch": 0.09305373525557012, "percentage": 0.93, "elapsed_time": "0:00:34", "remaining_time": "1:01:44", "throughput": 2748.97, "total_tokens": 95664}
{"current_steps": 360, "total_steps": 38150, "loss": 1.4335, "lr": 4.705111402359109e-06, "epoch": 0.09436435124508519, "percentage": 0.94, "elapsed_time": "0:00:35", "remaining_time": "1:01:39", "throughput": 2741.5, "total_tokens": 96608}
{"current_steps": 365, "total_steps": 38150, "loss": 1.339, "lr": 4.7706422018348626e-06, "epoch": 0.09567496723460026, "percentage": 0.96, "elapsed_time": "0:00:35", "remaining_time": "1:01:34", "throughput": 2745.08, "total_tokens": 97968}
{"current_steps": 370, "total_steps": 38150, "loss": 1.1823, "lr": 4.8361730013106165e-06, "epoch": 0.09698558322411534, "percentage": 0.97, "elapsed_time": "0:00:36", "remaining_time": "1:01:28", "throughput": 2744.9, "total_tokens": 99152}
{"current_steps": 375, "total_steps": 38150, "loss": 1.2536, "lr": 4.9017038007863695e-06, "epoch": 0.0982961992136304, "percentage": 0.98, "elapsed_time": "0:00:36", "remaining_time": "1:01:24", "throughput": 2750.43, "total_tokens": 100592}
{"current_steps": 380, "total_steps": 38150, "loss": 1.0514, "lr": 4.9672346002621235e-06, "epoch": 0.09960681520314547, "percentage": 1.0, "elapsed_time": "0:00:37", "remaining_time": "1:01:19", "throughput": 2751.77, "total_tokens": 101856}
{"current_steps": 385, "total_steps": 38150, "loss": 1.0798, "lr": 5.032765399737877e-06, "epoch": 0.10091743119266056, "percentage": 1.01, "elapsed_time": "0:00:37", "remaining_time": "1:01:13", "throughput": 2750.67, "total_tokens": 103024}
{"current_steps": 390, "total_steps": 38150, "loss": 0.7388, "lr": 5.0982961992136304e-06, "epoch": 0.10222804718217562, "percentage": 1.02, "elapsed_time": "0:00:37", "remaining_time": "1:01:13", "throughput": 2754.71, "total_tokens": 104512}
{"current_steps": 395, "total_steps": 38150, "loss": 0.7521, "lr": 5.163826998689384e-06, "epoch": 0.10353866317169069, "percentage": 1.04, "elapsed_time": "0:00:38", "remaining_time": "1:01:09", "throughput": 2758.49, "total_tokens": 105904}
{"current_steps": 400, "total_steps": 38150, "loss": 0.8209, "lr": 5.229357798165138e-06, "epoch": 0.10484927916120576, "percentage": 1.05, "elapsed_time": "0:00:38", "remaining_time": "1:01:04", "throughput": 2757.85, "total_tokens": 107088}
{"current_steps": 405, "total_steps": 38150, "loss": 1.0898, "lr": 5.294888597640891e-06, "epoch": 0.10615989515072084, "percentage": 1.06, "elapsed_time": "0:00:39", "remaining_time": "1:00:59", "throughput": 2756.41, "total_tokens": 108240}
{"current_steps": 410, "total_steps": 38150, "loss": 0.8162, "lr": 5.360419397116645e-06, "epoch": 0.10747051114023591, "percentage": 1.07, "elapsed_time": "0:00:39", "remaining_time": "1:00:55", "throughput": 2756.43, "total_tokens": 109472}
{"current_steps": 415, "total_steps": 38150, "loss": 0.862, "lr": 5.425950196592398e-06, "epoch": 0.10878112712975098, "percentage": 1.09, "elapsed_time": "0:00:40", "remaining_time": "1:00:56", "throughput": 2761.36, "total_tokens": 111056}
{"current_steps": 420, "total_steps": 38150, "loss": 0.6981, "lr": 5.491480996068152e-06, "epoch": 0.11009174311926606, "percentage": 1.1, "elapsed_time": "0:00:40", "remaining_time": "1:00:54", "throughput": 2764.27, "total_tokens": 112448}
{"current_steps": 425, "total_steps": 38150, "loss": 0.8855, "lr": 5.557011795543906e-06, "epoch": 0.11140235910878113, "percentage": 1.11, "elapsed_time": "0:00:41", "remaining_time": "1:00:49", "throughput": 2763.87, "total_tokens": 113648}
{"current_steps": 430, "total_steps": 38150, "loss": 1.2292, "lr": 5.622542595019659e-06, "epoch": 0.1127129750982962, "percentage": 1.13, "elapsed_time": "0:00:41", "remaining_time": "1:00:47", "throughput": 2762.13, "total_tokens": 114864}
{"current_steps": 435, "total_steps": 38150, "loss": 0.8121, "lr": 5.688073394495413e-06, "epoch": 0.11402359108781127, "percentage": 1.14, "elapsed_time": "0:00:42", "remaining_time": "1:00:42", "throughput": 2760.19, "total_tokens": 115968}
{"current_steps": 440, "total_steps": 38150, "loss": 0.8096, "lr": 5.753604193971167e-06, "epoch": 0.11533420707732635, "percentage": 1.15, "elapsed_time": "0:00:42", "remaining_time": "1:00:44", "throughput": 2767.62, "total_tokens": 117680}
{"current_steps": 445, "total_steps": 38150, "loss": 0.7607, "lr": 5.81913499344692e-06, "epoch": 0.11664482306684142, "percentage": 1.17, "elapsed_time": "0:00:42", "remaining_time": "1:00:39", "throughput": 2763.98, "total_tokens": 118720}
{"current_steps": 450, "total_steps": 38150, "loss": 0.5715, "lr": 5.884665792922674e-06, "epoch": 0.11795543905635648, "percentage": 1.18, "elapsed_time": "0:00:43", "remaining_time": "1:00:38", "throughput": 2769.81, "total_tokens": 120288}
{"current_steps": 455, "total_steps": 38150, "loss": 0.6914, "lr": 5.950196592398428e-06, "epoch": 0.11926605504587157, "percentage": 1.19, "elapsed_time": "0:00:43", "remaining_time": "1:00:38", "throughput": 2771.73, "total_tokens": 121728}
{"current_steps": 460, "total_steps": 38150, "loss": 0.6539, "lr": 6.015727391874181e-06, "epoch": 0.12057667103538663, "percentage": 1.21, "elapsed_time": "0:00:44", "remaining_time": "1:00:34", "throughput": 2770.28, "total_tokens": 122880}
{"current_steps": 465, "total_steps": 38150, "loss": 0.766, "lr": 6.081258191349935e-06, "epoch": 0.1218872870249017, "percentage": 1.22, "elapsed_time": "0:00:44", "remaining_time": "1:00:29", "throughput": 2771.08, "total_tokens": 124112}
{"current_steps": 470, "total_steps": 38150, "loss": 0.9506, "lr": 6.146788990825689e-06, "epoch": 0.12319790301441677, "percentage": 1.23, "elapsed_time": "0:00:45", "remaining_time": "1:00:29", "throughput": 2773.93, "total_tokens": 125568}
{"current_steps": 475, "total_steps": 38150, "loss": 0.6434, "lr": 6.212319790301442e-06, "epoch": 0.12450851900393185, "percentage": 1.25, "elapsed_time": "0:00:45", "remaining_time": "1:00:30", "throughput": 2779.41, "total_tokens": 127216}
{"current_steps": 480, "total_steps": 38150, "loss": 0.9105, "lr": 6.277850589777196e-06, "epoch": 0.1258191349934469, "percentage": 1.26, "elapsed_time": "0:00:46", "remaining_time": "1:00:29", "throughput": 2780.07, "total_tokens": 128560}
{"current_steps": 485, "total_steps": 38150, "loss": 0.6407, "lr": 6.343381389252949e-06, "epoch": 0.127129750982962, "percentage": 1.27, "elapsed_time": "0:00:46", "remaining_time": "1:00:27", "throughput": 2781.93, "total_tokens": 129952}
{"current_steps": 490, "total_steps": 38150, "loss": 0.8413, "lr": 6.408912188728703e-06, "epoch": 0.12844036697247707, "percentage": 1.28, "elapsed_time": "0:00:47", "remaining_time": "1:00:23", "throughput": 2778.39, "total_tokens": 130976}
{"current_steps": 495, "total_steps": 38150, "loss": 0.4142, "lr": 6.474442988204456e-06, "epoch": 0.12975098296199214, "percentage": 1.3, "elapsed_time": "0:00:47", "remaining_time": "1:00:18", "throughput": 2780.09, "total_tokens": 132256}
{"current_steps": 500, "total_steps": 38150, "loss": 0.8492, "lr": 6.539973787680211e-06, "epoch": 0.1310615989515072, "percentage": 1.31, "elapsed_time": "0:00:48", "remaining_time": "1:00:23", "throughput": 2790.06, "total_tokens": 134256}
{"current_steps": 505, "total_steps": 38150, "loss": 0.5136, "lr": 6.605504587155964e-06, "epoch": 0.13237221494102228, "percentage": 1.32, "elapsed_time": "0:00:48", "remaining_time": "1:00:20", "throughput": 2791.74, "total_tokens": 135584}
{"current_steps": 510, "total_steps": 38150, "loss": 0.6483, "lr": 6.671035386631718e-06, "epoch": 0.13368283093053734, "percentage": 1.34, "elapsed_time": "0:00:48", "remaining_time": "1:00:15", "throughput": 2787.96, "total_tokens": 136576}
{"current_steps": 515, "total_steps": 38150, "loss": 0.5974, "lr": 6.736566186107471e-06, "epoch": 0.1349934469200524, "percentage": 1.35, "elapsed_time": "0:00:49", "remaining_time": "1:00:12", "throughput": 2790.71, "total_tokens": 137952}
{"current_steps": 520, "total_steps": 38150, "loss": 0.5272, "lr": 6.8020969855832246e-06, "epoch": 0.1363040629095675, "percentage": 1.36, "elapsed_time": "0:00:49", "remaining_time": "1:00:08", "throughput": 2788.12, "total_tokens": 139024}
{"current_steps": 525, "total_steps": 38150, "loss": 0.5598, "lr": 6.867627785058978e-06, "epoch": 0.13761467889908258, "percentage": 1.38, "elapsed_time": "0:00:50", "remaining_time": "1:00:08", "throughput": 2789.88, "total_tokens": 140464}
{"current_steps": 530, "total_steps": 38150, "loss": 0.5286, "lr": 6.933158584534731e-06, "epoch": 0.13892529488859764, "percentage": 1.39, "elapsed_time": "0:00:50", "remaining_time": "1:00:06", "throughput": 2790.93, "total_tokens": 141824}
{"current_steps": 535, "total_steps": 38150, "loss": 0.5995, "lr": 6.9986893840104855e-06, "epoch": 0.1402359108781127, "percentage": 1.4, "elapsed_time": "0:00:51", "remaining_time": "1:00:02", "throughput": 2788.0, "total_tokens": 142864}
{"current_steps": 540, "total_steps": 38150, "loss": 0.8432, "lr": 7.064220183486239e-06, "epoch": 0.14154652686762778, "percentage": 1.42, "elapsed_time": "0:00:51", "remaining_time": "0:59:59", "throughput": 2783.82, "total_tokens": 143856}
{"current_steps": 545, "total_steps": 38150, "loss": 0.5334, "lr": 7.1297509829619924e-06, "epoch": 0.14285714285714285, "percentage": 1.43, "elapsed_time": "0:00:52", "remaining_time": "0:59:59", "throughput": 2784.59, "total_tokens": 145248}
{"current_steps": 550, "total_steps": 38150, "loss": 0.5272, "lr": 7.195281782437746e-06, "epoch": 0.14416775884665792, "percentage": 1.44, "elapsed_time": "0:00:52", "remaining_time": "0:59:58", "throughput": 2790.75, "total_tokens": 146912}
{"current_steps": 555, "total_steps": 38150, "loss": 0.5748, "lr": 7.260812581913499e-06, "epoch": 0.145478374836173, "percentage": 1.45, "elapsed_time": "0:00:53", "remaining_time": "0:59:57", "throughput": 2791.08, "total_tokens": 148224}
{"current_steps": 560, "total_steps": 38150, "loss": 0.5451, "lr": 7.326343381389253e-06, "epoch": 0.14678899082568808, "percentage": 1.47, "elapsed_time": "0:00:53", "remaining_time": "0:59:54", "throughput": 2793.15, "total_tokens": 149568}
{"current_steps": 565, "total_steps": 38150, "loss": 0.6863, "lr": 7.391874180865006e-06, "epoch": 0.14809960681520315, "percentage": 1.48, "elapsed_time": "0:00:54", "remaining_time": "0:59:53", "throughput": 2793.9, "total_tokens": 150912}
{"current_steps": 570, "total_steps": 38150, "loss": 0.6304, "lr": 7.457404980340761e-06, "epoch": 0.14941022280471822, "percentage": 1.49, "elapsed_time": "0:00:54", "remaining_time": "0:59:51", "throughput": 2796.56, "total_tokens": 152352}
{"current_steps": 575, "total_steps": 38150, "loss": 0.4556, "lr": 7.522935779816514e-06, "epoch": 0.15072083879423329, "percentage": 1.51, "elapsed_time": "0:00:54", "remaining_time": "0:59:50", "throughput": 2798.11, "total_tokens": 153728}
{"current_steps": 580, "total_steps": 38150, "loss": 0.9984, "lr": 7.588466579292268e-06, "epoch": 0.15203145478374835, "percentage": 1.52, "elapsed_time": "0:00:55", "remaining_time": "0:59:50", "throughput": 2800.19, "total_tokens": 155200}
{"current_steps": 585, "total_steps": 38150, "loss": 0.6088, "lr": 7.653997378768021e-06, "epoch": 0.15334207077326342, "percentage": 1.53, "elapsed_time": "0:00:55", "remaining_time": "0:59:46", "throughput": 2798.21, "total_tokens": 156288}
{"current_steps": 590, "total_steps": 38150, "loss": 0.6122, "lr": 7.719528178243775e-06, "epoch": 0.15465268676277852, "percentage": 1.55, "elapsed_time": "0:00:56", "remaining_time": "0:59:46", "throughput": 2798.57, "total_tokens": 157664}
{"current_steps": 595, "total_steps": 38150, "loss": 0.3903, "lr": 7.785058977719529e-06, "epoch": 0.1559633027522936, "percentage": 1.56, "elapsed_time": "0:00:56", "remaining_time": "0:59:42", "throughput": 2798.01, "total_tokens": 158832}
{"current_steps": 600, "total_steps": 38150, "loss": 0.4947, "lr": 7.850589777195281e-06, "epoch": 0.15727391874180865, "percentage": 1.57, "elapsed_time": "0:00:57", "remaining_time": "0:59:40", "throughput": 2796.67, "total_tokens": 159984}
{"current_steps": 605, "total_steps": 38150, "loss": 0.4981, "lr": 7.916120576671037e-06, "epoch": 0.15858453473132372, "percentage": 1.59, "elapsed_time": "0:00:57", "remaining_time": "0:59:38", "throughput": 2796.0, "total_tokens": 161248}
{"current_steps": 610, "total_steps": 38150, "loss": 0.6375, "lr": 7.981651376146789e-06, "epoch": 0.1598951507208388, "percentage": 1.6, "elapsed_time": "0:00:58", "remaining_time": "0:59:36", "throughput": 2798.1, "total_tokens": 162608}
{"current_steps": 615, "total_steps": 38150, "loss": 0.4791, "lr": 8.047182175622543e-06, "epoch": 0.16120576671035386, "percentage": 1.61, "elapsed_time": "0:00:58", "remaining_time": "0:59:33", "throughput": 2796.11, "total_tokens": 163696}
{"current_steps": 620, "total_steps": 38150, "loss": 0.6564, "lr": 8.112712975098297e-06, "epoch": 0.16251638269986893, "percentage": 1.63, "elapsed_time": "0:00:59", "remaining_time": "0:59:32", "throughput": 2796.43, "total_tokens": 165056}
{"current_steps": 625, "total_steps": 38150, "loss": 0.4405, "lr": 8.17824377457405e-06, "epoch": 0.16382699868938402, "percentage": 1.64, "elapsed_time": "0:00:59", "remaining_time": "0:59:35", "throughput": 2801.93, "total_tokens": 166848}
{"current_steps": 630, "total_steps": 38150, "loss": 0.6398, "lr": 8.243774574049803e-06, "epoch": 0.1651376146788991, "percentage": 1.65, "elapsed_time": "0:01:00", "remaining_time": "0:59:35", "throughput": 2802.66, "total_tokens": 168240}
{"current_steps": 635, "total_steps": 38150, "loss": 0.7777, "lr": 8.309305373525557e-06, "epoch": 0.16644823066841416, "percentage": 1.66, "elapsed_time": "0:01:00", "remaining_time": "0:59:36", "throughput": 2805.65, "total_tokens": 169840}
{"current_steps": 640, "total_steps": 38150, "loss": 0.6179, "lr": 8.374836173001311e-06, "epoch": 0.16775884665792923, "percentage": 1.68, "elapsed_time": "0:01:00", "remaining_time": "0:59:34", "throughput": 2806.65, "total_tokens": 171152}
{"current_steps": 645, "total_steps": 38150, "loss": 0.75, "lr": 8.440366972477065e-06, "epoch": 0.1690694626474443, "percentage": 1.69, "elapsed_time": "0:01:01", "remaining_time": "0:59:30", "throughput": 2803.39, "total_tokens": 172144}
{"current_steps": 650, "total_steps": 38150, "loss": 0.6045, "lr": 8.505897771952819e-06, "epoch": 0.17038007863695936, "percentage": 1.7, "elapsed_time": "0:01:01", "remaining_time": "0:59:29", "throughput": 2803.32, "total_tokens": 173440}
{"current_steps": 655, "total_steps": 38150, "loss": 0.6365, "lr": 8.571428571428573e-06, "epoch": 0.17169069462647443, "percentage": 1.72, "elapsed_time": "0:01:02", "remaining_time": "0:59:30", "throughput": 2804.8, "total_tokens": 174928}
{"current_steps": 660, "total_steps": 38150, "loss": 0.5728, "lr": 8.636959370904325e-06, "epoch": 0.17300131061598953, "percentage": 1.73, "elapsed_time": "0:01:02", "remaining_time": "0:59:30", "throughput": 2810.69, "total_tokens": 176672}
{"current_steps": 665, "total_steps": 38150, "loss": 0.8667, "lr": 8.702490170380079e-06, "epoch": 0.1743119266055046, "percentage": 1.74, "elapsed_time": "0:01:03", "remaining_time": "0:59:28", "throughput": 2806.77, "total_tokens": 177712}
{"current_steps": 670, "total_steps": 38150, "loss": 0.7419, "lr": 8.768020969855833e-06, "epoch": 0.17562254259501967, "percentage": 1.76, "elapsed_time": "0:01:03", "remaining_time": "0:59:30", "throughput": 2808.32, "total_tokens": 179232}
{"current_steps": 675, "total_steps": 38150, "loss": 0.512, "lr": 8.833551769331587e-06, "epoch": 0.17693315858453473, "percentage": 1.77, "elapsed_time": "0:01:04", "remaining_time": "0:59:31", "throughput": 2809.16, "total_tokens": 180688}
{"current_steps": 680, "total_steps": 38150, "loss": 0.5894, "lr": 8.89908256880734e-06, "epoch": 0.1782437745740498, "percentage": 1.78, "elapsed_time": "0:01:04", "remaining_time": "0:59:27", "throughput": 2808.03, "total_tokens": 181824}
{"current_steps": 685, "total_steps": 38150, "loss": 0.7088, "lr": 8.964613368283094e-06, "epoch": 0.17955439056356487, "percentage": 1.8, "elapsed_time": "0:01:05", "remaining_time": "0:59:24", "throughput": 2804.37, "total_tokens": 182768}
{"current_steps": 690, "total_steps": 38150, "loss": 0.7519, "lr": 9.030144167758847e-06, "epoch": 0.18086500655307994, "percentage": 1.81, "elapsed_time": "0:01:05", "remaining_time": "0:59:23", "throughput": 2807.21, "total_tokens": 184240}
{"current_steps": 695, "total_steps": 38150, "loss": 0.587, "lr": 9.0956749672346e-06, "epoch": 0.182175622542595, "percentage": 1.82, "elapsed_time": "0:01:06", "remaining_time": "0:59:20", "throughput": 2803.25, "total_tokens": 185200}
{"current_steps": 700, "total_steps": 38150, "loss": 0.428, "lr": 9.161205766710354e-06, "epoch": 0.1834862385321101, "percentage": 1.83, "elapsed_time": "0:01:06", "remaining_time": "0:59:19", "throughput": 2802.96, "total_tokens": 186464}
{"current_steps": 705, "total_steps": 38150, "loss": 0.4563, "lr": 9.226736566186107e-06, "epoch": 0.18479685452162517, "percentage": 1.85, "elapsed_time": "0:01:06", "remaining_time": "0:59:16", "throughput": 2803.81, "total_tokens": 187728}
{"current_steps": 710, "total_steps": 38150, "loss": 0.5365, "lr": 9.29226736566186e-06, "epoch": 0.18610747051114024, "percentage": 1.86, "elapsed_time": "0:01:07", "remaining_time": "0:59:14", "throughput": 2802.84, "total_tokens": 188944}
{"current_steps": 715, "total_steps": 38150, "loss": 0.6357, "lr": 9.357798165137616e-06, "epoch": 0.1874180865006553, "percentage": 1.87, "elapsed_time": "0:01:07", "remaining_time": "0:59:12", "throughput": 2804.27, "total_tokens": 190288}
{"current_steps": 720, "total_steps": 38150, "loss": 0.3928, "lr": 9.423328964613368e-06, "epoch": 0.18872870249017037, "percentage": 1.89, "elapsed_time": "0:01:08", "remaining_time": "0:59:09", "throughput": 2801.41, "total_tokens": 191264}
{"current_steps": 725, "total_steps": 38150, "loss": 0.6598, "lr": 9.488859764089122e-06, "epoch": 0.19003931847968544, "percentage": 1.9, "elapsed_time": "0:01:08", "remaining_time": "0:59:06", "throughput": 2799.52, "total_tokens": 192336}
{"current_steps": 730, "total_steps": 38150, "loss": 0.5796, "lr": 9.554390563564876e-06, "epoch": 0.1913499344692005, "percentage": 1.91, "elapsed_time": "0:01:09", "remaining_time": "0:59:03", "throughput": 2800.23, "total_tokens": 193584}
{"current_steps": 735, "total_steps": 38150, "loss": 0.5654, "lr": 9.619921363040628e-06, "epoch": 0.1926605504587156, "percentage": 1.93, "elapsed_time": "0:01:09", "remaining_time": "0:59:01", "throughput": 2799.97, "total_tokens": 194800}
{"current_steps": 740, "total_steps": 38150, "loss": 0.5122, "lr": 9.685452162516382e-06, "epoch": 0.19397116644823068, "percentage": 1.94, "elapsed_time": "0:01:10", "remaining_time": "0:59:00", "throughput": 2801.1, "total_tokens": 196192}
{"current_steps": 745, "total_steps": 38150, "loss": 0.5149, "lr": 9.750982961992136e-06, "epoch": 0.19528178243774574, "percentage": 1.95, "elapsed_time": "0:01:10", "remaining_time": "0:58:58", "throughput": 2798.41, "total_tokens": 197200}
{"current_steps": 750, "total_steps": 38150, "loss": 0.712, "lr": 9.81651376146789e-06, "epoch": 0.1965923984272608, "percentage": 1.97, "elapsed_time": "0:01:10", "remaining_time": "0:58:56", "throughput": 2797.71, "total_tokens": 198384}
{"current_steps": 755, "total_steps": 38150, "loss": 0.6368, "lr": 9.882044560943644e-06, "epoch": 0.19790301441677588, "percentage": 1.98, "elapsed_time": "0:01:11", "remaining_time": "0:58:54", "throughput": 2798.48, "total_tokens": 199680}
{"current_steps": 760, "total_steps": 38150, "loss": 0.5327, "lr": 9.947575360419398e-06, "epoch": 0.19921363040629095, "percentage": 1.99, "elapsed_time": "0:01:11", "remaining_time": "0:58:52", "throughput": 2799.02, "total_tokens": 200960}
{"current_steps": 765, "total_steps": 38150, "loss": 0.6716, "lr": 1.0013106159895152e-05, "epoch": 0.20052424639580602, "percentage": 2.01, "elapsed_time": "0:01:12", "remaining_time": "0:58:55", "throughput": 2807.83, "total_tokens": 203120}
{"current_steps": 770, "total_steps": 38150, "loss": 0.7915, "lr": 1.0078636959370904e-05, "epoch": 0.2018348623853211, "percentage": 2.02, "elapsed_time": "0:01:12", "remaining_time": "0:58:56", "throughput": 2811.73, "total_tokens": 204848}
{"current_steps": 775, "total_steps": 38150, "loss": 0.7314, "lr": 1.0144167758846658e-05, "epoch": 0.20314547837483618, "percentage": 2.03, "elapsed_time": "0:01:13", "remaining_time": "0:58:54", "throughput": 2811.64, "total_tokens": 206080}
{"current_steps": 780, "total_steps": 38150, "loss": 0.6835, "lr": 1.0209698558322412e-05, "epoch": 0.20445609436435125, "percentage": 2.04, "elapsed_time": "0:01:13", "remaining_time": "0:58:55", "throughput": 2817.4, "total_tokens": 207920}
{"current_steps": 785, "total_steps": 38150, "loss": 0.6687, "lr": 1.0275229357798166e-05, "epoch": 0.20576671035386632, "percentage": 2.06, "elapsed_time": "0:01:14", "remaining_time": "0:58:53", "throughput": 2815.83, "total_tokens": 209040}
{"current_steps": 790, "total_steps": 38150, "loss": 0.5398, "lr": 1.034076015727392e-05, "epoch": 0.20707732634338138, "percentage": 2.07, "elapsed_time": "0:01:14", "remaining_time": "0:58:51", "throughput": 2817.08, "total_tokens": 210384}
{"current_steps": 795, "total_steps": 38150, "loss": 0.6529, "lr": 1.0406290956749674e-05, "epoch": 0.20838794233289645, "percentage": 2.08, "elapsed_time": "0:01:15", "remaining_time": "0:58:54", "throughput": 2818.67, "total_tokens": 212032}
{"current_steps": 800, "total_steps": 38150, "loss": 0.6423, "lr": 1.0471821756225426e-05, "epoch": 0.20969855832241152, "percentage": 2.1, "elapsed_time": "0:01:15", "remaining_time": "0:58:51", "throughput": 2814.38, "total_tokens": 212896}
{"current_steps": 805, "total_steps": 38150, "loss": 0.8765, "lr": 1.053735255570118e-05, "epoch": 0.21100917431192662, "percentage": 2.11, "elapsed_time": "0:01:16", "remaining_time": "0:58:51", "throughput": 2815.62, "total_tokens": 214352}
{"current_steps": 810, "total_steps": 38150, "loss": 0.7097, "lr": 1.0602883355176934e-05, "epoch": 0.21231979030144169, "percentage": 2.12, "elapsed_time": "0:01:16", "remaining_time": "0:58:51", "throughput": 2815.58, "total_tokens": 215664}
{"current_steps": 815, "total_steps": 38150, "loss": 0.5415, "lr": 1.0668414154652686e-05, "epoch": 0.21363040629095675, "percentage": 2.14, "elapsed_time": "0:01:17", "remaining_time": "0:58:50", "throughput": 2816.89, "total_tokens": 217104}
{"current_steps": 820, "total_steps": 38150, "loss": 0.6262, "lr": 1.0733944954128442e-05, "epoch": 0.21494102228047182, "percentage": 2.15, "elapsed_time": "0:01:17", "remaining_time": "0:58:48", "throughput": 2815.76, "total_tokens": 218224}
{"current_steps": 825, "total_steps": 38150, "loss": 0.5641, "lr": 1.0799475753604196e-05, "epoch": 0.2162516382699869, "percentage": 2.16, "elapsed_time": "0:01:17", "remaining_time": "0:58:46", "throughput": 2817.09, "total_tokens": 219584}
{"current_steps": 830, "total_steps": 38150, "loss": 0.6382, "lr": 1.0865006553079948e-05, "epoch": 0.21756225425950196, "percentage": 2.18, "elapsed_time": "0:01:18", "remaining_time": "0:58:44", "throughput": 2818.2, "total_tokens": 220912}
{"current_steps": 835, "total_steps": 38150, "loss": 0.5017, "lr": 1.0930537352555702e-05, "epoch": 0.21887287024901703, "percentage": 2.19, "elapsed_time": "0:01:18", "remaining_time": "0:58:44", "throughput": 2818.48, "total_tokens": 222272}
{"current_steps": 840, "total_steps": 38150, "loss": 0.6999, "lr": 1.0996068152031456e-05, "epoch": 0.22018348623853212, "percentage": 2.2, "elapsed_time": "0:01:19", "remaining_time": "0:58:43", "throughput": 2818.62, "total_tokens": 223600}
{"current_steps": 845, "total_steps": 38150, "loss": 0.8037, "lr": 1.1061598951507208e-05, "epoch": 0.2214941022280472, "percentage": 2.21, "elapsed_time": "0:01:19", "remaining_time": "0:58:46", "throughput": 2819.65, "total_tokens": 225232}
{"current_steps": 850, "total_steps": 38150, "loss": 0.5246, "lr": 1.1127129750982962e-05, "epoch": 0.22280471821756226, "percentage": 2.23, "elapsed_time": "0:01:20", "remaining_time": "0:58:44", "throughput": 2820.07, "total_tokens": 226512}
{"current_steps": 855, "total_steps": 38150, "loss": 0.7657, "lr": 1.1192660550458717e-05, "epoch": 0.22411533420707733, "percentage": 2.24, "elapsed_time": "0:01:20", "remaining_time": "0:58:42", "throughput": 2818.01, "total_tokens": 227552}
{"current_steps": 860, "total_steps": 38150, "loss": 0.6498, "lr": 1.125819134993447e-05, "epoch": 0.2254259501965924, "percentage": 2.25, "elapsed_time": "0:01:21", "remaining_time": "0:58:39", "throughput": 2816.16, "total_tokens": 228608}
{"current_steps": 865, "total_steps": 38150, "loss": 0.3725, "lr": 1.1323722149410223e-05, "epoch": 0.22673656618610746, "percentage": 2.27, "elapsed_time": "0:01:21", "remaining_time": "0:58:41", "throughput": 2818.0, "total_tokens": 230192}
{"current_steps": 870, "total_steps": 38150, "loss": 0.5481, "lr": 1.1389252948885977e-05, "epoch": 0.22804718217562253, "percentage": 2.28, "elapsed_time": "0:01:22", "remaining_time": "0:58:41", "throughput": 2820.31, "total_tokens": 231776}
{"current_steps": 875, "total_steps": 38150, "loss": 0.7993, "lr": 1.145478374836173e-05, "epoch": 0.22935779816513763, "percentage": 2.29, "elapsed_time": "0:01:22", "remaining_time": "0:58:40", "throughput": 2820.62, "total_tokens": 233104}
{"current_steps": 880, "total_steps": 38150, "loss": 0.6003, "lr": 1.1520314547837483e-05, "epoch": 0.2306684141546527, "percentage": 2.31, "elapsed_time": "0:01:23", "remaining_time": "0:58:37", "throughput": 2818.66, "total_tokens": 234128}
{"current_steps": 885, "total_steps": 38150, "loss": 0.6396, "lr": 1.1585845347313237e-05, "epoch": 0.23197903014416776, "percentage": 2.32, "elapsed_time": "0:01:23", "remaining_time": "0:58:35", "throughput": 2818.31, "total_tokens": 235312}
{"current_steps": 890, "total_steps": 38150, "loss": 0.5369, "lr": 1.1651376146788991e-05, "epoch": 0.23328964613368283, "percentage": 2.33, "elapsed_time": "0:01:24", "remaining_time": "0:58:37", "throughput": 2819.91, "total_tokens": 236896}
{"current_steps": 895, "total_steps": 38150, "loss": 0.6541, "lr": 1.1716906946264745e-05, "epoch": 0.2346002621231979, "percentage": 2.35, "elapsed_time": "0:01:24", "remaining_time": "0:58:35", "throughput": 2817.63, "total_tokens": 237936}
{"current_steps": 900, "total_steps": 38150, "loss": 0.6812, "lr": 1.1782437745740499e-05, "epoch": 0.23591087811271297, "percentage": 2.36, "elapsed_time": "0:01:24", "remaining_time": "0:58:35", "throughput": 2819.97, "total_tokens": 239552}
{"current_steps": 905, "total_steps": 38150, "loss": 0.5113, "lr": 1.1847968545216253e-05, "epoch": 0.23722149410222804, "percentage": 2.37, "elapsed_time": "0:01:25", "remaining_time": "0:58:34", "throughput": 2820.51, "total_tokens": 240832}
{"current_steps": 910, "total_steps": 38150, "loss": 0.615, "lr": 1.1913499344692005e-05, "epoch": 0.23853211009174313, "percentage": 2.39, "elapsed_time": "0:01:25", "remaining_time": "0:58:33", "throughput": 2821.13, "total_tokens": 242192}
{"current_steps": 915, "total_steps": 38150, "loss": 0.6304, "lr": 1.197903014416776e-05, "epoch": 0.2398427260812582, "percentage": 2.4, "elapsed_time": "0:01:26", "remaining_time": "0:58:31", "throughput": 2821.7, "total_tokens": 243456}
{"current_steps": 920, "total_steps": 38150, "loss": 0.7477, "lr": 1.2044560943643513e-05, "epoch": 0.24115334207077327, "percentage": 2.41, "elapsed_time": "0:01:26", "remaining_time": "0:58:33", "throughput": 2827.11, "total_tokens": 245424}
{"current_steps": 925, "total_steps": 38150, "loss": 0.5115, "lr": 1.2110091743119267e-05, "epoch": 0.24246395806028834, "percentage": 2.42, "elapsed_time": "0:01:27", "remaining_time": "0:58:32", "throughput": 2827.45, "total_tokens": 246752}
{"current_steps": 930, "total_steps": 38150, "loss": 0.4929, "lr": 1.2175622542595021e-05, "epoch": 0.2437745740498034, "percentage": 2.44, "elapsed_time": "0:01:27", "remaining_time": "0:58:31", "throughput": 2829.07, "total_tokens": 248256}
{"current_steps": 935, "total_steps": 38150, "loss": 0.4981, "lr": 1.2241153342070775e-05, "epoch": 0.24508519003931847, "percentage": 2.45, "elapsed_time": "0:01:28", "remaining_time": "0:58:31", "throughput": 2829.73, "total_tokens": 249648}
{"current_steps": 940, "total_steps": 38150, "loss": 0.6766, "lr": 1.2306684141546527e-05, "epoch": 0.24639580602883354, "percentage": 2.46, "elapsed_time": "0:01:28", "remaining_time": "0:58:34", "throughput": 2833.78, "total_tokens": 251600}
{"current_steps": 945, "total_steps": 38150, "loss": 0.465, "lr": 1.2372214941022281e-05, "epoch": 0.24770642201834864, "percentage": 2.48, "elapsed_time": "0:01:29", "remaining_time": "0:58:32", "throughput": 2830.68, "total_tokens": 252544}
{"current_steps": 950, "total_steps": 38150, "loss": 0.7819, "lr": 1.2437745740498035e-05, "epoch": 0.2490170380078637, "percentage": 2.49, "elapsed_time": "0:01:29", "remaining_time": "0:58:30", "throughput": 2830.46, "total_tokens": 253776}
{"current_steps": 955, "total_steps": 38150, "loss": 0.4234, "lr": 1.2503276539973787e-05, "epoch": 0.2503276539973788, "percentage": 2.5, "elapsed_time": "0:01:30", "remaining_time": "0:58:30", "throughput": 2831.52, "total_tokens": 255232}
{"current_steps": 960, "total_steps": 38150, "loss": 0.6626, "lr": 1.2568807339449543e-05, "epoch": 0.2516382699868938, "percentage": 2.52, "elapsed_time": "0:01:30", "remaining_time": "0:58:30", "throughput": 2832.15, "total_tokens": 256640}
{"current_steps": 965, "total_steps": 38150, "loss": 0.5937, "lr": 1.2634338138925295e-05, "epoch": 0.2529488859764089, "percentage": 2.53, "elapsed_time": "0:01:31", "remaining_time": "0:58:28", "throughput": 2830.96, "total_tokens": 257760}
{"current_steps": 970, "total_steps": 38150, "loss": 0.5067, "lr": 1.2699868938401049e-05, "epoch": 0.254259501965924, "percentage": 2.54, "elapsed_time": "0:01:31", "remaining_time": "0:58:26", "throughput": 2830.66, "total_tokens": 258960}
{"current_steps": 975, "total_steps": 38150, "loss": 0.4635, "lr": 1.2765399737876801e-05, "epoch": 0.25557011795543905, "percentage": 2.56, "elapsed_time": "0:01:31", "remaining_time": "0:58:27", "throughput": 2832.2, "total_tokens": 260528}
{"current_steps": 980, "total_steps": 38150, "loss": 0.5117, "lr": 1.2830930537352557e-05, "epoch": 0.25688073394495414, "percentage": 2.57, "elapsed_time": "0:01:32", "remaining_time": "0:58:25", "throughput": 2831.86, "total_tokens": 261744}
{"current_steps": 985, "total_steps": 38150, "loss": 0.6982, "lr": 1.289646133682831e-05, "epoch": 0.2581913499344692, "percentage": 2.58, "elapsed_time": "0:01:32", "remaining_time": "0:58:23", "throughput": 2831.54, "total_tokens": 262928}
{"current_steps": 990, "total_steps": 38150, "loss": 0.5254, "lr": 1.2961992136304063e-05, "epoch": 0.2595019659239843, "percentage": 2.6, "elapsed_time": "0:01:33", "remaining_time": "0:58:22", "throughput": 2833.6, "total_tokens": 264384}
{"current_steps": 995, "total_steps": 38150, "loss": 0.9029, "lr": 1.3027522935779818e-05, "epoch": 0.2608125819134993, "percentage": 2.61, "elapsed_time": "0:01:33", "remaining_time": "0:58:20", "throughput": 2831.08, "total_tokens": 265392}
{"current_steps": 1000, "total_steps": 38150, "loss": 0.9019, "lr": 1.309305373525557e-05, "epoch": 0.2621231979030144, "percentage": 2.62, "elapsed_time": "0:01:34", "remaining_time": "0:58:18", "throughput": 2830.83, "total_tokens": 266608}
{"current_steps": 1005, "total_steps": 38150, "loss": 0.5617, "lr": 1.3158584534731325e-05, "epoch": 0.2634338138925295, "percentage": 2.63, "elapsed_time": "0:01:34", "remaining_time": "0:58:17", "throughput": 2832.17, "total_tokens": 267984}
{"current_steps": 1010, "total_steps": 38150, "loss": 0.7564, "lr": 1.3224115334207077e-05, "epoch": 0.26474442988204455, "percentage": 2.65, "elapsed_time": "0:01:35", "remaining_time": "0:58:14", "throughput": 2829.78, "total_tokens": 268944}
{"current_steps": 1015, "total_steps": 38150, "loss": 0.5298, "lr": 1.328964613368283e-05, "epoch": 0.26605504587155965, "percentage": 2.66, "elapsed_time": "0:01:35", "remaining_time": "0:58:15", "throughput": 2833.16, "total_tokens": 270672}
{"current_steps": 1020, "total_steps": 38150, "loss": 0.4828, "lr": 1.3355176933158586e-05, "epoch": 0.2673656618610747, "percentage": 2.67, "elapsed_time": "0:01:36", "remaining_time": "0:58:15", "throughput": 2835.07, "total_tokens": 272208}
{"current_steps": 1025, "total_steps": 38150, "loss": 0.6142, "lr": 1.3420707732634339e-05, "epoch": 0.2686762778505898, "percentage": 2.69, "elapsed_time": "0:01:36", "remaining_time": "0:58:21", "throughput": 2841.85, "total_tokens": 274720}
{"current_steps": 1030, "total_steps": 38150, "loss": 2.2081, "lr": 1.3486238532110092e-05, "epoch": 0.2699868938401048, "percentage": 2.7, "elapsed_time": "0:01:37", "remaining_time": "0:58:19", "throughput": 2839.08, "total_tokens": 275664}
{"current_steps": 1035, "total_steps": 38150, "loss": 0.7791, "lr": 1.3551769331585845e-05, "epoch": 0.2712975098296199, "percentage": 2.71, "elapsed_time": "0:01:37", "remaining_time": "0:58:20", "throughput": 2843.29, "total_tokens": 277568}
{"current_steps": 1040, "total_steps": 38150, "loss": 0.4941, "lr": 1.36173001310616e-05, "epoch": 0.272608125819135, "percentage": 2.73, "elapsed_time": "0:01:38", "remaining_time": "0:58:19", "throughput": 2843.04, "total_tokens": 278848}
{"current_steps": 1045, "total_steps": 38150, "loss": 0.4921, "lr": 1.3682830930537352e-05, "epoch": 0.27391874180865006, "percentage": 2.74, "elapsed_time": "0:01:38", "remaining_time": "0:58:17", "throughput": 2842.02, "total_tokens": 279968}
{"current_steps": 1050, "total_steps": 38150, "loss": 0.8069, "lr": 1.3748361730013106e-05, "epoch": 0.27522935779816515, "percentage": 2.75, "elapsed_time": "0:01:38", "remaining_time": "0:58:16", "throughput": 2841.64, "total_tokens": 281184}
{"current_steps": 1055, "total_steps": 38150, "loss": 0.4275, "lr": 1.3813892529488862e-05, "epoch": 0.2765399737876802, "percentage": 2.77, "elapsed_time": "0:01:39", "remaining_time": "0:58:15", "throughput": 2842.35, "total_tokens": 282576}
{"current_steps": 1060, "total_steps": 38150, "loss": 0.5144, "lr": 1.3879423328964614e-05, "epoch": 0.2778505897771953, "percentage": 2.78, "elapsed_time": "0:01:39", "remaining_time": "0:58:15", "throughput": 2843.89, "total_tokens": 284064}
{"current_steps": 1065, "total_steps": 38150, "loss": 0.5713, "lr": 1.3944954128440368e-05, "epoch": 0.27916120576671033, "percentage": 2.79, "elapsed_time": "0:01:40", "remaining_time": "0:58:13", "throughput": 2842.86, "total_tokens": 285200}
{"current_steps": 1070, "total_steps": 38150, "loss": 0.5656, "lr": 1.401048492791612e-05, "epoch": 0.2804718217562254, "percentage": 2.8, "elapsed_time": "0:01:40", "remaining_time": "0:58:12", "throughput": 2842.21, "total_tokens": 286432}
{"current_steps": 1075, "total_steps": 38150, "loss": 0.559, "lr": 1.4076015727391876e-05, "epoch": 0.2817824377457405, "percentage": 2.82, "elapsed_time": "0:01:41", "remaining_time": "0:58:12", "throughput": 2843.18, "total_tokens": 287888}
{"current_steps": 1080, "total_steps": 38150, "loss": 0.6067, "lr": 1.4141546526867626e-05, "epoch": 0.28309305373525556, "percentage": 2.83, "elapsed_time": "0:01:41", "remaining_time": "0:58:10", "throughput": 2843.12, "total_tokens": 289120}
{"current_steps": 1085, "total_steps": 38150, "loss": 0.7679, "lr": 1.4207077326343382e-05, "epoch": 0.28440366972477066, "percentage": 2.84, "elapsed_time": "0:01:42", "remaining_time": "0:58:08", "throughput": 2840.11, "total_tokens": 290032}
{"current_steps": 1090, "total_steps": 38150, "loss": 0.4873, "lr": 1.4272608125819138e-05, "epoch": 0.2857142857142857, "percentage": 2.86, "elapsed_time": "0:01:42", "remaining_time": "0:58:07", "throughput": 2840.35, "total_tokens": 291328}
{"current_steps": 1095, "total_steps": 38150, "loss": 0.3964, "lr": 1.4338138925294888e-05, "epoch": 0.2870249017038008, "percentage": 2.87, "elapsed_time": "0:01:43", "remaining_time": "0:58:11", "throughput": 2846.04, "total_tokens": 293632}
{"current_steps": 1100, "total_steps": 38150, "loss": 0.7431, "lr": 1.4403669724770644e-05, "epoch": 0.28833551769331583, "percentage": 2.88, "elapsed_time": "0:01:43", "remaining_time": "0:58:10", "throughput": 2845.43, "total_tokens": 294880}
{"current_steps": 1105, "total_steps": 38150, "loss": 0.5196, "lr": 1.4469200524246396e-05, "epoch": 0.28964613368283093, "percentage": 2.9, "elapsed_time": "0:01:44", "remaining_time": "0:58:09", "throughput": 2846.13, "total_tokens": 296208}
{"current_steps": 1110, "total_steps": 38150, "loss": 0.5651, "lr": 1.453473132372215e-05, "epoch": 0.290956749672346, "percentage": 2.91, "elapsed_time": "0:01:44", "remaining_time": "0:58:08", "throughput": 2847.84, "total_tokens": 297760}
{"current_steps": 1115, "total_steps": 38150, "loss": 0.5457, "lr": 1.4600262123197902e-05, "epoch": 0.29226736566186107, "percentage": 2.92, "elapsed_time": "0:01:44", "remaining_time": "0:58:07", "throughput": 2846.43, "total_tokens": 298832}
{"current_steps": 1120, "total_steps": 38150, "loss": 0.556, "lr": 1.4665792922673658e-05, "epoch": 0.29357798165137616, "percentage": 2.94, "elapsed_time": "0:01:45", "remaining_time": "0:58:05", "throughput": 2846.6, "total_tokens": 300112}
{"current_steps": 1125, "total_steps": 38150, "loss": 0.626, "lr": 1.4731323722149412e-05, "epoch": 0.2948885976408912, "percentage": 2.95, "elapsed_time": "0:01:45", "remaining_time": "0:58:03", "throughput": 2845.78, "total_tokens": 301248}
{"current_steps": 1130, "total_steps": 38150, "loss": 0.5083, "lr": 1.4796854521625164e-05, "epoch": 0.2961992136304063, "percentage": 2.96, "elapsed_time": "0:01:46", "remaining_time": "0:58:03", "throughput": 2846.49, "total_tokens": 302656}
{"current_steps": 1135, "total_steps": 38150, "loss": 0.5714, "lr": 1.486238532110092e-05, "epoch": 0.29750982961992134, "percentage": 2.98, "elapsed_time": "0:01:46", "remaining_time": "0:58:02", "throughput": 2847.01, "total_tokens": 304000}
{"current_steps": 1140, "total_steps": 38150, "loss": 0.6576, "lr": 1.4927916120576672e-05, "epoch": 0.29882044560943644, "percentage": 2.99, "elapsed_time": "0:01:47", "remaining_time": "0:58:02", "throughput": 2848.08, "total_tokens": 305472}
{"current_steps": 1145, "total_steps": 38150, "loss": 0.3387, "lr": 1.4993446920052426e-05, "epoch": 0.30013106159895153, "percentage": 3.0, "elapsed_time": "0:01:47", "remaining_time": "0:57:59", "throughput": 2846.08, "total_tokens": 306448}
{"current_steps": 1150, "total_steps": 38150, "loss": 0.8595, "lr": 1.5058977719528178e-05, "epoch": 0.30144167758846657, "percentage": 3.01, "elapsed_time": "0:01:48", "remaining_time": "0:57:58", "throughput": 2845.7, "total_tokens": 307632}
{"current_steps": 1155, "total_steps": 38150, "loss": 0.3936, "lr": 1.5124508519003932e-05, "epoch": 0.30275229357798167, "percentage": 3.03, "elapsed_time": "0:01:48", "remaining_time": "0:57:56", "throughput": 2844.99, "total_tokens": 308784}
{"current_steps": 1160, "total_steps": 38150, "loss": 0.5111, "lr": 1.5190039318479687e-05, "epoch": 0.3040629095674967, "percentage": 3.04, "elapsed_time": "0:01:48", "remaining_time": "0:57:55", "throughput": 2845.26, "total_tokens": 310096}
{"current_steps": 1165, "total_steps": 38150, "loss": 1.2331, "lr": 1.525557011795544e-05, "epoch": 0.3053735255570118, "percentage": 3.05, "elapsed_time": "0:01:49", "remaining_time": "0:57:54", "throughput": 2846.0, "total_tokens": 311472}
{"current_steps": 1170, "total_steps": 38150, "loss": 0.5845, "lr": 1.5321100917431195e-05, "epoch": 0.30668414154652685, "percentage": 3.07, "elapsed_time": "0:01:49", "remaining_time": "0:57:53", "throughput": 2845.29, "total_tokens": 312704}
{"current_steps": 1175, "total_steps": 38150, "loss": 0.478, "lr": 1.5386631716906946e-05, "epoch": 0.30799475753604194, "percentage": 3.08, "elapsed_time": "0:01:50", "remaining_time": "0:57:52", "throughput": 2844.67, "total_tokens": 313888}
{"current_steps": 1180, "total_steps": 38150, "loss": 0.5634, "lr": 1.54521625163827e-05, "epoch": 0.30930537352555704, "percentage": 3.09, "elapsed_time": "0:01:50", "remaining_time": "0:57:51", "throughput": 2843.28, "total_tokens": 315040}
{"current_steps": 1185, "total_steps": 38150, "loss": 0.9521, "lr": 1.5517693315858454e-05, "epoch": 0.3106159895150721, "percentage": 3.11, "elapsed_time": "0:01:51", "remaining_time": "0:57:50", "throughput": 2843.77, "total_tokens": 316352}
{"current_steps": 1190, "total_steps": 38150, "loss": 0.6194, "lr": 1.5583224115334208e-05, "epoch": 0.3119266055045872, "percentage": 3.12, "elapsed_time": "0:01:51", "remaining_time": "0:57:49", "throughput": 2843.13, "total_tokens": 317616}
{"current_steps": 1195, "total_steps": 38150, "loss": 0.6724, "lr": 1.564875491480996e-05, "epoch": 0.3132372214941022, "percentage": 3.13, "elapsed_time": "0:01:52", "remaining_time": "0:57:49", "throughput": 2844.99, "total_tokens": 319184}
{"current_steps": 1200, "total_steps": 38150, "loss": 0.5945, "lr": 1.5714285714285715e-05, "epoch": 0.3145478374836173, "percentage": 3.15, "elapsed_time": "0:01:52", "remaining_time": "0:57:50", "throughput": 2846.14, "total_tokens": 320752}
{"current_steps": 1205, "total_steps": 38150, "loss": 0.4788, "lr": 1.577981651376147e-05, "epoch": 0.31585845347313235, "percentage": 3.16, "elapsed_time": "0:01:53", "remaining_time": "0:57:49", "throughput": 2846.13, "total_tokens": 322032}
{"current_steps": 1210, "total_steps": 38150, "loss": 0.5148, "lr": 1.5845347313237223e-05, "epoch": 0.31716906946264745, "percentage": 3.17, "elapsed_time": "0:01:53", "remaining_time": "0:57:47", "throughput": 2846.74, "total_tokens": 323360}
{"current_steps": 1215, "total_steps": 38150, "loss": 0.4866, "lr": 1.5910878112712977e-05, "epoch": 0.31847968545216254, "percentage": 3.18, "elapsed_time": "0:01:54", "remaining_time": "0:57:46", "throughput": 2846.21, "total_tokens": 324528}
{"current_steps": 1220, "total_steps": 38150, "loss": 0.5161, "lr": 1.5976408912188728e-05, "epoch": 0.3197903014416776, "percentage": 3.2, "elapsed_time": "0:01:54", "remaining_time": "0:57:50", "throughput": 2851.75, "total_tokens": 326928}
{"current_steps": 1225, "total_steps": 38150, "loss": 0.4283, "lr": 1.604193971166448e-05, "epoch": 0.3211009174311927, "percentage": 3.21, "elapsed_time": "0:01:55", "remaining_time": "0:57:50", "throughput": 2852.83, "total_tokens": 328432}
{"current_steps": 1230, "total_steps": 38150, "loss": 0.597, "lr": 1.610747051114024e-05, "epoch": 0.3224115334207077, "percentage": 3.22, "elapsed_time": "0:01:55", "remaining_time": "0:57:48", "throughput": 2850.84, "total_tokens": 329440}
{"current_steps": 1235, "total_steps": 38150, "loss": 0.4051, "lr": 1.617300131061599e-05, "epoch": 0.3237221494102228, "percentage": 3.24, "elapsed_time": "0:01:56", "remaining_time": "0:57:48", "throughput": 2851.78, "total_tokens": 330912}
{"current_steps": 1240, "total_steps": 38150, "loss": 0.5861, "lr": 1.6238532110091743e-05, "epoch": 0.32503276539973786, "percentage": 3.25, "elapsed_time": "0:01:56", "remaining_time": "0:57:48", "throughput": 2852.01, "total_tokens": 332288}
{"current_steps": 1245, "total_steps": 38150, "loss": 0.6095, "lr": 1.6304062909567497e-05, "epoch": 0.32634338138925295, "percentage": 3.26, "elapsed_time": "0:01:56", "remaining_time": "0:57:46", "throughput": 2852.37, "total_tokens": 333584}
{"current_steps": 1250, "total_steps": 38150, "loss": 0.516, "lr": 1.636959370904325e-05, "epoch": 0.32765399737876805, "percentage": 3.28, "elapsed_time": "0:01:57", "remaining_time": "0:57:48", "throughput": 2854.09, "total_tokens": 335392}
{"current_steps": 1255, "total_steps": 38150, "loss": 0.6243, "lr": 1.6435124508519005e-05, "epoch": 0.3289646133682831, "percentage": 3.29, "elapsed_time": "0:01:57", "remaining_time": "0:57:47", "throughput": 2853.48, "total_tokens": 336560}
{"current_steps": 1260, "total_steps": 38150, "loss": 0.6346, "lr": 1.650065530799476e-05, "epoch": 0.3302752293577982, "percentage": 3.3, "elapsed_time": "0:01:58", "remaining_time": "0:57:47", "throughput": 2855.78, "total_tokens": 338192}
{"current_steps": 1265, "total_steps": 38150, "loss": 0.6469, "lr": 1.6566186107470513e-05, "epoch": 0.3315858453473132, "percentage": 3.32, "elapsed_time": "0:01:58", "remaining_time": "0:57:46", "throughput": 2856.25, "total_tokens": 339568}
{"current_steps": 1270, "total_steps": 38150, "loss": 0.4535, "lr": 1.6631716906946267e-05, "epoch": 0.3328964613368283, "percentage": 3.33, "elapsed_time": "0:01:59", "remaining_time": "0:57:45", "throughput": 2855.59, "total_tokens": 340736}
{"current_steps": 1275, "total_steps": 38150, "loss": 1.2089, "lr": 1.669724770642202e-05, "epoch": 0.33420707732634336, "percentage": 3.34, "elapsed_time": "0:01:59", "remaining_time": "0:57:43", "throughput": 2853.1, "total_tokens": 341680}
{"current_steps": 1280, "total_steps": 38150, "loss": 0.624, "lr": 1.676277850589777e-05, "epoch": 0.33551769331585846, "percentage": 3.36, "elapsed_time": "0:02:00", "remaining_time": "0:57:41", "throughput": 2852.29, "total_tokens": 342800}
{"current_steps": 1285, "total_steps": 38150, "loss": 0.636, "lr": 1.682830930537353e-05, "epoch": 0.33682830930537355, "percentage": 3.37, "elapsed_time": "0:02:00", "remaining_time": "0:57:40", "throughput": 2851.96, "total_tokens": 344000}
{"current_steps": 1290, "total_steps": 38150, "loss": 0.5283, "lr": 1.689384010484928e-05, "epoch": 0.3381389252948886, "percentage": 3.38, "elapsed_time": "0:02:01", "remaining_time": "0:57:42", "throughput": 2855.2, "total_tokens": 345968}
{"current_steps": 1295, "total_steps": 38150, "loss": 0.4928, "lr": 1.6959370904325033e-05, "epoch": 0.3394495412844037, "percentage": 3.39, "elapsed_time": "0:02:01", "remaining_time": "0:57:42", "throughput": 2854.31, "total_tokens": 347264}
{"current_steps": 1300, "total_steps": 38150, "loss": 0.4994, "lr": 1.702490170380079e-05, "epoch": 0.34076015727391873, "percentage": 3.41, "elapsed_time": "0:02:02", "remaining_time": "0:57:42", "throughput": 2854.88, "total_tokens": 348720}
{"current_steps": 1305, "total_steps": 38150, "loss": 0.4343, "lr": 1.709043250327654e-05, "epoch": 0.3420707732634338, "percentage": 3.42, "elapsed_time": "0:02:02", "remaining_time": "0:57:41", "throughput": 2855.25, "total_tokens": 350032}
{"current_steps": 1310, "total_steps": 38150, "loss": 0.5269, "lr": 1.7155963302752295e-05, "epoch": 0.34338138925294887, "percentage": 3.43, "elapsed_time": "0:02:03", "remaining_time": "0:57:40", "throughput": 2855.7, "total_tokens": 351408}
{"current_steps": 1315, "total_steps": 38150, "loss": 0.4433, "lr": 1.722149410222805e-05, "epoch": 0.34469200524246396, "percentage": 3.45, "elapsed_time": "0:02:03", "remaining_time": "0:57:39", "throughput": 2854.95, "total_tokens": 352560}
{"current_steps": 1320, "total_steps": 38150, "loss": 0.6349, "lr": 1.7287024901703802e-05, "epoch": 0.34600262123197906, "percentage": 3.46, "elapsed_time": "0:02:03", "remaining_time": "0:57:38", "throughput": 2854.67, "total_tokens": 353824}
{"current_steps": 1325, "total_steps": 38150, "loss": 0.4238, "lr": 1.7352555701179553e-05, "epoch": 0.3473132372214941, "percentage": 3.47, "elapsed_time": "0:02:04", "remaining_time": "0:57:38", "throughput": 2856.18, "total_tokens": 355456}
{"current_steps": 1330, "total_steps": 38150, "loss": 0.4968, "lr": 1.741808650065531e-05, "epoch": 0.3486238532110092, "percentage": 3.49, "elapsed_time": "0:02:04", "remaining_time": "0:57:37", "throughput": 2854.66, "total_tokens": 356528}
{"current_steps": 1335, "total_steps": 38150, "loss": 0.3711, "lr": 1.7483617300131064e-05, "epoch": 0.34993446920052423, "percentage": 3.5, "elapsed_time": "0:02:05", "remaining_time": "0:57:35", "throughput": 2854.11, "total_tokens": 357664}
{"current_steps": 1340, "total_steps": 38150, "loss": 0.6615, "lr": 1.7549148099606815e-05, "epoch": 0.35124508519003933, "percentage": 3.51, "elapsed_time": "0:02:05", "remaining_time": "0:57:35", "throughput": 2854.9, "total_tokens": 359136}
{"current_steps": 1345, "total_steps": 38150, "loss": 0.3844, "lr": 1.7614678899082572e-05, "epoch": 0.35255570117955437, "percentage": 3.53, "elapsed_time": "0:02:06", "remaining_time": "0:57:34", "throughput": 2856.08, "total_tokens": 360560}
{"current_steps": 1350, "total_steps": 38150, "loss": 0.6441, "lr": 1.7680209698558323e-05, "epoch": 0.35386631716906947, "percentage": 3.54, "elapsed_time": "0:02:06", "remaining_time": "0:57:33", "throughput": 2856.84, "total_tokens": 361952}
{"current_steps": 1355, "total_steps": 38150, "loss": 0.5154, "lr": 1.7745740498034076e-05, "epoch": 0.35517693315858456, "percentage": 3.55, "elapsed_time": "0:02:07", "remaining_time": "0:57:33", "throughput": 2856.87, "total_tokens": 363360}
{"current_steps": 1360, "total_steps": 38150, "loss": 0.5918, "lr": 1.781127129750983e-05, "epoch": 0.3564875491480996, "percentage": 3.56, "elapsed_time": "0:02:07", "remaining_time": "0:57:32", "throughput": 2855.97, "total_tokens": 364480}
{"current_steps": 1365, "total_steps": 38150, "loss": 0.7536, "lr": 1.7876802096985584e-05, "epoch": 0.3577981651376147, "percentage": 3.58, "elapsed_time": "0:02:08", "remaining_time": "0:57:30", "throughput": 2854.86, "total_tokens": 365584}
{"current_steps": 1370, "total_steps": 38150, "loss": 0.7425, "lr": 1.7942332896461335e-05, "epoch": 0.35910878112712974, "percentage": 3.59, "elapsed_time": "0:02:08", "remaining_time": "0:57:29", "throughput": 2853.85, "total_tokens": 366704}
{"current_steps": 1375, "total_steps": 38150, "loss": 0.8073, "lr": 1.8007863695937092e-05, "epoch": 0.36041939711664484, "percentage": 3.6, "elapsed_time": "0:02:08", "remaining_time": "0:57:28", "throughput": 2854.04, "total_tokens": 368000}
{"current_steps": 1380, "total_steps": 38150, "loss": 0.42, "lr": 1.8073394495412846e-05, "epoch": 0.3617300131061599, "percentage": 3.62, "elapsed_time": "0:02:09", "remaining_time": "0:57:28", "throughput": 2853.99, "total_tokens": 369360}
{"current_steps": 1385, "total_steps": 38150, "loss": 0.5472, "lr": 1.8138925294888597e-05, "epoch": 0.36304062909567497, "percentage": 3.63, "elapsed_time": "0:02:09", "remaining_time": "0:57:28", "throughput": 2855.15, "total_tokens": 370864}
{"current_steps": 1390, "total_steps": 38150, "loss": 0.365, "lr": 1.8204456094364354e-05, "epoch": 0.36435124508519, "percentage": 3.64, "elapsed_time": "0:02:10", "remaining_time": "0:57:26", "throughput": 2854.48, "total_tokens": 372000}
{"current_steps": 1395, "total_steps": 38150, "loss": 0.8865, "lr": 1.8269986893840104e-05, "epoch": 0.3656618610747051, "percentage": 3.66, "elapsed_time": "0:02:10", "remaining_time": "0:57:25", "throughput": 2854.02, "total_tokens": 373168}
{"current_steps": 1400, "total_steps": 38150, "loss": 0.6544, "lr": 1.833551769331586e-05, "epoch": 0.3669724770642202, "percentage": 3.67, "elapsed_time": "0:02:11", "remaining_time": "0:57:23", "throughput": 2853.12, "total_tokens": 374304}
{"current_steps": 1405, "total_steps": 38150, "loss": 0.3194, "lr": 1.8401048492791612e-05, "epoch": 0.36828309305373524, "percentage": 3.68, "elapsed_time": "0:02:11", "remaining_time": "0:57:23", "throughput": 2854.87, "total_tokens": 375888}
{"current_steps": 1410, "total_steps": 38150, "loss": 0.5509, "lr": 1.8466579292267366e-05, "epoch": 0.36959370904325034, "percentage": 3.7, "elapsed_time": "0:02:12", "remaining_time": "0:57:22", "throughput": 2855.5, "total_tokens": 377296}
{"current_steps": 1415, "total_steps": 38150, "loss": 0.415, "lr": 1.853211009174312e-05, "epoch": 0.3709043250327654, "percentage": 3.71, "elapsed_time": "0:02:12", "remaining_time": "0:57:22", "throughput": 2855.47, "total_tokens": 378608}
{"current_steps": 1420, "total_steps": 38150, "loss": 0.4986, "lr": 1.8597640891218874e-05, "epoch": 0.3722149410222805, "percentage": 3.72, "elapsed_time": "0:02:13", "remaining_time": "0:57:22", "throughput": 2856.59, "total_tokens": 380192}
{"current_steps": 1425, "total_steps": 38150, "loss": 0.5814, "lr": 1.8663171690694628e-05, "epoch": 0.3735255570117955, "percentage": 3.74, "elapsed_time": "0:02:13", "remaining_time": "0:57:21", "throughput": 2857.66, "total_tokens": 381616}
{"current_steps": 1430, "total_steps": 38150, "loss": 0.5123, "lr": 1.872870249017038e-05, "epoch": 0.3748361730013106, "percentage": 3.75, "elapsed_time": "0:02:13", "remaining_time": "0:57:20", "throughput": 2856.39, "total_tokens": 382672}
{"current_steps": 1435, "total_steps": 38150, "loss": 0.5751, "lr": 1.8794233289646136e-05, "epoch": 0.3761467889908257, "percentage": 3.76, "elapsed_time": "0:02:14", "remaining_time": "0:57:19", "throughput": 2857.92, "total_tokens": 384176}
{"current_steps": 1440, "total_steps": 38150, "loss": 0.7369, "lr": 1.8859764089121886e-05, "epoch": 0.37745740498034075, "percentage": 3.77, "elapsed_time": "0:02:14", "remaining_time": "0:57:18", "throughput": 2858.45, "total_tokens": 385584}
{"current_steps": 1445, "total_steps": 38150, "loss": 0.4625, "lr": 1.892529488859764e-05, "epoch": 0.37876802096985585, "percentage": 3.79, "elapsed_time": "0:02:15", "remaining_time": "0:57:18", "throughput": 2857.96, "total_tokens": 386832}
{"current_steps": 1450, "total_steps": 38150, "loss": 0.4687, "lr": 1.8990825688073397e-05, "epoch": 0.3800786369593709, "percentage": 3.8, "elapsed_time": "0:02:15", "remaining_time": "0:57:18", "throughput": 2858.94, "total_tokens": 388416}
{"current_steps": 1455, "total_steps": 38150, "loss": 0.4035, "lr": 1.9056356487549148e-05, "epoch": 0.381389252948886, "percentage": 3.81, "elapsed_time": "0:02:16", "remaining_time": "0:57:17", "throughput": 2858.9, "total_tokens": 389648}
{"current_steps": 1460, "total_steps": 38150, "loss": 0.3531, "lr": 1.9121887287024902e-05, "epoch": 0.382699868938401, "percentage": 3.83, "elapsed_time": "0:02:16", "remaining_time": "0:57:15", "throughput": 2858.71, "total_tokens": 390864}
{"current_steps": 1465, "total_steps": 38150, "loss": 0.5029, "lr": 1.9187418086500656e-05, "epoch": 0.3840104849279161, "percentage": 3.84, "elapsed_time": "0:02:17", "remaining_time": "0:57:14", "throughput": 2858.81, "total_tokens": 392128}
{"current_steps": 1470, "total_steps": 38150, "loss": 0.6116, "lr": 1.925294888597641e-05, "epoch": 0.3853211009174312, "percentage": 3.85, "elapsed_time": "0:02:17", "remaining_time": "0:57:21", "throughput": 2863.2, "total_tokens": 394864}
{"current_steps": 1475, "total_steps": 38150, "loss": 0.4047, "lr": 1.9318479685452164e-05, "epoch": 0.38663171690694625, "percentage": 3.87, "elapsed_time": "0:02:18", "remaining_time": "0:57:19", "throughput": 2861.1, "total_tokens": 395776}
{"current_steps": 1480, "total_steps": 38150, "loss": 0.6953, "lr": 1.9384010484927918e-05, "epoch": 0.38794233289646135, "percentage": 3.88, "elapsed_time": "0:02:18", "remaining_time": "0:57:17", "throughput": 2860.26, "total_tokens": 396880}
{"current_steps": 1485, "total_steps": 38150, "loss": 0.4633, "lr": 1.944954128440367e-05, "epoch": 0.3892529488859764, "percentage": 3.89, "elapsed_time": "0:02:19", "remaining_time": "0:57:18", "throughput": 2861.63, "total_tokens": 398512}
{"current_steps": 1490, "total_steps": 38150, "loss": 0.765, "lr": 1.9515072083879425e-05, "epoch": 0.3905635648754915, "percentage": 3.91, "elapsed_time": "0:02:19", "remaining_time": "0:57:16", "throughput": 2861.03, "total_tokens": 399664}
{"current_steps": 1495, "total_steps": 38150, "loss": 0.3598, "lr": 1.958060288335518e-05, "epoch": 0.3918741808650065, "percentage": 3.92, "elapsed_time": "0:02:20", "remaining_time": "0:57:15", "throughput": 2860.59, "total_tokens": 400864}
{"current_steps": 1500, "total_steps": 38150, "loss": 0.5789, "lr": 1.964613368283093e-05, "epoch": 0.3931847968545216, "percentage": 3.93, "elapsed_time": "0:02:20", "remaining_time": "0:57:15", "throughput": 2861.39, "total_tokens": 402352}
{"current_steps": 1505, "total_steps": 38150, "loss": 0.5405, "lr": 1.9711664482306684e-05, "epoch": 0.3944954128440367, "percentage": 3.94, "elapsed_time": "0:02:21", "remaining_time": "0:57:15", "throughput": 2861.71, "total_tokens": 403744}
{"current_steps": 1510, "total_steps": 38150, "loss": 0.4973, "lr": 1.9777195281782438e-05, "epoch": 0.39580602883355176, "percentage": 3.96, "elapsed_time": "0:02:21", "remaining_time": "0:57:14", "throughput": 2862.33, "total_tokens": 405104}
{"current_steps": 1515, "total_steps": 38150, "loss": 0.7394, "lr": 1.984272608125819e-05, "epoch": 0.39711664482306686, "percentage": 3.97, "elapsed_time": "0:02:21", "remaining_time": "0:57:12", "throughput": 2861.6, "total_tokens": 406208}
{"current_steps": 1520, "total_steps": 38150, "loss": 0.5288, "lr": 1.9908256880733945e-05, "epoch": 0.3984272608125819, "percentage": 3.98, "elapsed_time": "0:02:22", "remaining_time": "0:57:11", "throughput": 2859.36, "total_tokens": 407120}
{"current_steps": 1525, "total_steps": 38150, "loss": 0.5958, "lr": 1.99737876802097e-05, "epoch": 0.399737876802097, "percentage": 4.0, "elapsed_time": "0:02:22", "remaining_time": "0:57:09", "throughput": 2857.64, "total_tokens": 408080}
{"current_steps": 1530, "total_steps": 38150, "loss": 0.589, "lr": 2.0039318479685453e-05, "epoch": 0.40104849279161203, "percentage": 4.01, "elapsed_time": "0:02:23", "remaining_time": "0:57:08", "throughput": 2857.41, "total_tokens": 409360}
{"current_steps": 1535, "total_steps": 38150, "loss": 0.493, "lr": 2.0104849279161207e-05, "epoch": 0.40235910878112713, "percentage": 4.02, "elapsed_time": "0:02:23", "remaining_time": "0:57:07", "throughput": 2857.98, "total_tokens": 410720}
{"current_steps": 1540, "total_steps": 38150, "loss": 0.5521, "lr": 2.017038007863696e-05, "epoch": 0.4036697247706422, "percentage": 4.04, "elapsed_time": "0:02:24", "remaining_time": "0:57:06", "throughput": 2857.64, "total_tokens": 411904}
{"current_steps": 1545, "total_steps": 38150, "loss": 0.5543, "lr": 2.023591087811271e-05, "epoch": 0.40498034076015726, "percentage": 4.05, "elapsed_time": "0:02:24", "remaining_time": "0:57:05", "throughput": 2856.59, "total_tokens": 412960}
{"current_steps": 1550, "total_steps": 38150, "loss": 0.5345, "lr": 2.030144167758847e-05, "epoch": 0.40629095674967236, "percentage": 4.06, "elapsed_time": "0:02:25", "remaining_time": "0:57:06", "throughput": 2858.66, "total_tokens": 414784}
{"current_steps": 1555, "total_steps": 38150, "loss": 0.5646, "lr": 2.0366972477064223e-05, "epoch": 0.4076015727391874, "percentage": 4.08, "elapsed_time": "0:02:25", "remaining_time": "0:57:05", "throughput": 2859.62, "total_tokens": 416208}
{"current_steps": 1560, "total_steps": 38150, "loss": 0.4671, "lr": 2.0432503276539973e-05, "epoch": 0.4089121887287025, "percentage": 4.09, "elapsed_time": "0:02:25", "remaining_time": "0:57:04", "throughput": 2859.46, "total_tokens": 417440}
{"current_steps": 1565, "total_steps": 38150, "loss": 0.5949, "lr": 2.049803407601573e-05, "epoch": 0.41022280471821754, "percentage": 4.1, "elapsed_time": "0:02:26", "remaining_time": "0:57:04", "throughput": 2860.38, "total_tokens": 419008}
{"current_steps": 1570, "total_steps": 38150, "loss": 0.5006, "lr": 2.056356487549148e-05, "epoch": 0.41153342070773263, "percentage": 4.12, "elapsed_time": "0:02:26", "remaining_time": "0:57:03", "throughput": 2861.08, "total_tokens": 420384}
{"current_steps": 1575, "total_steps": 38150, "loss": 0.6081, "lr": 2.0629095674967235e-05, "epoch": 0.41284403669724773, "percentage": 4.13, "elapsed_time": "0:02:27", "remaining_time": "0:57:02", "throughput": 2859.85, "total_tokens": 421440}
{"current_steps": 1580, "total_steps": 38150, "loss": 0.5264, "lr": 2.069462647444299e-05, "epoch": 0.41415465268676277, "percentage": 4.14, "elapsed_time": "0:02:27", "remaining_time": "0:57:00", "throughput": 2859.4, "total_tokens": 422608}
{"current_steps": 1585, "total_steps": 38150, "loss": 0.5201, "lr": 2.0760157273918743e-05, "epoch": 0.41546526867627787, "percentage": 4.15, "elapsed_time": "0:02:28", "remaining_time": "0:56:59", "throughput": 2859.95, "total_tokens": 423952}
{"current_steps": 1590, "total_steps": 38150, "loss": 0.5109, "lr": 2.0825688073394497e-05, "epoch": 0.4167758846657929, "percentage": 4.17, "elapsed_time": "0:02:28", "remaining_time": "0:56:59", "throughput": 2861.33, "total_tokens": 425552}
{"current_steps": 1595, "total_steps": 38150, "loss": 0.6181, "lr": 2.089121887287025e-05, "epoch": 0.418086500655308, "percentage": 4.18, "elapsed_time": "0:02:29", "remaining_time": "0:56:58", "throughput": 2860.4, "total_tokens": 426656}
{"current_steps": 1600, "total_steps": 38150, "loss": 0.692, "lr": 2.0956749672346005e-05, "epoch": 0.41939711664482304, "percentage": 4.19, "elapsed_time": "0:02:29", "remaining_time": "0:56:57", "throughput": 2858.96, "total_tokens": 427696}
{"current_steps": 1605, "total_steps": 38150, "loss": 0.5801, "lr": 2.1022280471821755e-05, "epoch": 0.42070773263433814, "percentage": 4.21, "elapsed_time": "0:02:30", "remaining_time": "0:56:55", "throughput": 2857.24, "total_tokens": 428656}
{"current_steps": 1610, "total_steps": 38150, "loss": 0.701, "lr": 2.1087811271297513e-05, "epoch": 0.42201834862385323, "percentage": 4.22, "elapsed_time": "0:02:30", "remaining_time": "0:56:55", "throughput": 2857.96, "total_tokens": 430128}
{"current_steps": 1615, "total_steps": 38150, "loss": 0.3888, "lr": 2.1153342070773263e-05, "epoch": 0.4233289646133683, "percentage": 4.23, "elapsed_time": "0:02:31", "remaining_time": "0:56:56", "throughput": 2859.47, "total_tokens": 431792}
{"current_steps": 1620, "total_steps": 38150, "loss": 0.4169, "lr": 2.1218872870249017e-05, "epoch": 0.42463958060288337, "percentage": 4.25, "elapsed_time": "0:02:31", "remaining_time": "0:56:55", "throughput": 2860.29, "total_tokens": 433200}
{"current_steps": 1625, "total_steps": 38150, "loss": 0.5097, "lr": 2.1284403669724774e-05, "epoch": 0.4259501965923984, "percentage": 4.26, "elapsed_time": "0:02:31", "remaining_time": "0:56:54", "throughput": 2861.47, "total_tokens": 434688}
{"current_steps": 1630, "total_steps": 38150, "loss": 0.7436, "lr": 2.1349934469200525e-05, "epoch": 0.4272608125819135, "percentage": 4.27, "elapsed_time": "0:02:32", "remaining_time": "0:56:53", "throughput": 2861.35, "total_tokens": 435920}
{"current_steps": 1635, "total_steps": 38150, "loss": 0.5988, "lr": 2.141546526867628e-05, "epoch": 0.42857142857142855, "percentage": 4.29, "elapsed_time": "0:02:32", "remaining_time": "0:56:55", "throughput": 2864.3, "total_tokens": 438000}
{"current_steps": 1640, "total_steps": 38150, "loss": 0.5468, "lr": 2.1480996068152033e-05, "epoch": 0.42988204456094364, "percentage": 4.3, "elapsed_time": "0:02:33", "remaining_time": "0:56:53", "throughput": 2863.42, "total_tokens": 439104}
{"current_steps": 1645, "total_steps": 38150, "loss": 0.5618, "lr": 2.1546526867627787e-05, "epoch": 0.43119266055045874, "percentage": 4.31, "elapsed_time": "0:02:33", "remaining_time": "0:56:52", "throughput": 2863.66, "total_tokens": 440400}
{"current_steps": 1650, "total_steps": 38150, "loss": 0.5733, "lr": 2.1612057667103537e-05, "epoch": 0.4325032765399738, "percentage": 4.33, "elapsed_time": "0:02:34", "remaining_time": "0:56:55", "throughput": 2866.48, "total_tokens": 442592}
{"current_steps": 1655, "total_steps": 38150, "loss": 0.6375, "lr": 2.1677588466579294e-05, "epoch": 0.4338138925294889, "percentage": 4.34, "elapsed_time": "0:02:34", "remaining_time": "0:56:55", "throughput": 2866.96, "total_tokens": 444016}
{"current_steps": 1660, "total_steps": 38150, "loss": 0.5666, "lr": 2.1743119266055048e-05, "epoch": 0.4351245085190039, "percentage": 4.35, "elapsed_time": "0:02:35", "remaining_time": "0:56:55", "throughput": 2867.12, "total_tokens": 445424}
{"current_steps": 1665, "total_steps": 38150, "loss": 0.4677, "lr": 2.18086500655308e-05, "epoch": 0.436435124508519, "percentage": 4.36, "elapsed_time": "0:02:35", "remaining_time": "0:56:54", "throughput": 2867.5, "total_tokens": 446768}
{"current_steps": 1670, "total_steps": 38150, "loss": 0.7541, "lr": 2.1874180865006556e-05, "epoch": 0.43774574049803405, "percentage": 4.38, "elapsed_time": "0:02:36", "remaining_time": "0:56:53", "throughput": 2868.44, "total_tokens": 448288}
{"current_steps": 1675, "total_steps": 38150, "loss": 0.7581, "lr": 2.1939711664482307e-05, "epoch": 0.43905635648754915, "percentage": 4.39, "elapsed_time": "0:02:36", "remaining_time": "0:56:52", "throughput": 2867.84, "total_tokens": 449440}
{"current_steps": 1680, "total_steps": 38150, "loss": 0.4686, "lr": 2.200524246395806e-05, "epoch": 0.44036697247706424, "percentage": 4.4, "elapsed_time": "0:02:37", "remaining_time": "0:56:52", "throughput": 2868.11, "total_tokens": 450832}
{"current_steps": 1685, "total_steps": 38150, "loss": 0.6392, "lr": 2.2070773263433814e-05, "epoch": 0.4416775884665793, "percentage": 4.42, "elapsed_time": "0:02:37", "remaining_time": "0:56:50", "throughput": 2867.66, "total_tokens": 451984}
{"current_steps": 1690, "total_steps": 38150, "loss": 0.5365, "lr": 2.213630406290957e-05, "epoch": 0.4429882044560944, "percentage": 4.43, "elapsed_time": "0:02:38", "remaining_time": "0:56:49", "throughput": 2868.37, "total_tokens": 453376}
{"current_steps": 1695, "total_steps": 38150, "loss": 0.4315, "lr": 2.2201834862385322e-05, "epoch": 0.4442988204456094, "percentage": 4.44, "elapsed_time": "0:02:38", "remaining_time": "0:56:48", "throughput": 2868.53, "total_tokens": 454672}
{"current_steps": 1700, "total_steps": 38150, "loss": 0.4382, "lr": 2.2267365661861076e-05, "epoch": 0.4456094364351245, "percentage": 4.46, "elapsed_time": "0:02:38", "remaining_time": "0:56:48", "throughput": 2869.64, "total_tokens": 456240}
{"current_steps": 1705, "total_steps": 38150, "loss": 0.4954, "lr": 2.233289646133683e-05, "epoch": 0.44692005242463956, "percentage": 4.47, "elapsed_time": "0:02:39", "remaining_time": "0:56:48", "throughput": 2869.97, "total_tokens": 457632}
{"current_steps": 1710, "total_steps": 38150, "loss": 0.376, "lr": 2.239842726081258e-05, "epoch": 0.44823066841415465, "percentage": 4.48, "elapsed_time": "0:02:39", "remaining_time": "0:56:48", "throughput": 2870.68, "total_tokens": 459200}
{"current_steps": 1715, "total_steps": 38150, "loss": 0.5954, "lr": 2.2463958060288338e-05, "epoch": 0.44954128440366975, "percentage": 4.5, "elapsed_time": "0:02:40", "remaining_time": "0:56:47", "throughput": 2870.18, "total_tokens": 460384}
{"current_steps": 1720, "total_steps": 38150, "loss": 0.5143, "lr": 2.252948885976409e-05, "epoch": 0.4508519003931848, "percentage": 4.51, "elapsed_time": "0:02:40", "remaining_time": "0:56:46", "throughput": 2870.44, "total_tokens": 461712}
{"current_steps": 1725, "total_steps": 38150, "loss": 0.3802, "lr": 2.2595019659239842e-05, "epoch": 0.4521625163826999, "percentage": 4.52, "elapsed_time": "0:02:41", "remaining_time": "0:56:45", "throughput": 2869.88, "total_tokens": 462832}
{"current_steps": 1730, "total_steps": 38150, "loss": 0.4073, "lr": 2.26605504587156e-05, "epoch": 0.4534731323722149, "percentage": 4.53, "elapsed_time": "0:02:41", "remaining_time": "0:56:45", "throughput": 2870.08, "total_tokens": 464224}
{"current_steps": 1735, "total_steps": 38150, "loss": 0.5006, "lr": 2.272608125819135e-05, "epoch": 0.45478374836173, "percentage": 4.55, "elapsed_time": "0:02:42", "remaining_time": "0:56:44", "throughput": 2870.8, "total_tokens": 465696}
{"current_steps": 1740, "total_steps": 38150, "loss": 0.5384, "lr": 2.2791612057667104e-05, "epoch": 0.45609436435124506, "percentage": 4.56, "elapsed_time": "0:02:42", "remaining_time": "0:56:44", "throughput": 2870.34, "total_tokens": 466944}
{"current_steps": 1745, "total_steps": 38150, "loss": 0.58, "lr": 2.2857142857142858e-05, "epoch": 0.45740498034076016, "percentage": 4.57, "elapsed_time": "0:02:43", "remaining_time": "0:56:43", "throughput": 2870.59, "total_tokens": 468256}
{"current_steps": 1750, "total_steps": 38150, "loss": 0.5626, "lr": 2.2922673656618612e-05, "epoch": 0.45871559633027525, "percentage": 4.59, "elapsed_time": "0:02:43", "remaining_time": "0:56:41", "throughput": 2870.05, "total_tokens": 469376}
{"current_steps": 1755, "total_steps": 38150, "loss": 0.6257, "lr": 2.2988204456094366e-05, "epoch": 0.4600262123197903, "percentage": 4.6, "elapsed_time": "0:02:43", "remaining_time": "0:56:40", "throughput": 2868.36, "total_tokens": 470336}
{"current_steps": 1760, "total_steps": 38150, "loss": 0.608, "lr": 2.305373525557012e-05, "epoch": 0.4613368283093054, "percentage": 4.61, "elapsed_time": "0:02:44", "remaining_time": "0:56:39", "throughput": 2867.09, "total_tokens": 471360}
{"current_steps": 1765, "total_steps": 38150, "loss": 0.6554, "lr": 2.3119266055045874e-05, "epoch": 0.46264744429882043, "percentage": 4.63, "elapsed_time": "0:02:44", "remaining_time": "0:56:38", "throughput": 2866.8, "total_tokens": 472576}
{"current_steps": 1770, "total_steps": 38150, "loss": 0.6181, "lr": 2.3184796854521628e-05, "epoch": 0.4639580602883355, "percentage": 4.64, "elapsed_time": "0:02:45", "remaining_time": "0:56:38", "throughput": 2868.08, "total_tokens": 474224}
{"current_steps": 1775, "total_steps": 38150, "loss": 0.5038, "lr": 2.325032765399738e-05, "epoch": 0.46526867627785057, "percentage": 4.65, "elapsed_time": "0:02:45", "remaining_time": "0:56:37", "throughput": 2867.61, "total_tokens": 475376}
{"current_steps": 1780, "total_steps": 38150, "loss": 0.673, "lr": 2.3315858453473132e-05, "epoch": 0.46657929226736566, "percentage": 4.67, "elapsed_time": "0:02:46", "remaining_time": "0:56:36", "throughput": 2867.1, "total_tokens": 476592}
{"current_steps": 1785, "total_steps": 38150, "loss": 0.6623, "lr": 2.3381389252948886e-05, "epoch": 0.46788990825688076, "percentage": 4.68, "elapsed_time": "0:02:46", "remaining_time": "0:56:35", "throughput": 2865.33, "total_tokens": 477504}
{"current_steps": 1790, "total_steps": 38150, "loss": 0.4698, "lr": 2.344692005242464e-05, "epoch": 0.4692005242463958, "percentage": 4.69, "elapsed_time": "0:02:47", "remaining_time": "0:56:33", "throughput": 2864.16, "total_tokens": 478560}
{"current_steps": 1795, "total_steps": 38150, "loss": 0.4542, "lr": 2.3512450851900394e-05, "epoch": 0.4705111402359109, "percentage": 4.71, "elapsed_time": "0:02:47", "remaining_time": "0:56:33", "throughput": 2864.59, "total_tokens": 479920}
{"current_steps": 1800, "total_steps": 38150, "loss": 0.393, "lr": 2.3577981651376148e-05, "epoch": 0.47182175622542594, "percentage": 4.72, "elapsed_time": "0:02:47", "remaining_time": "0:56:32", "throughput": 2865.07, "total_tokens": 481264}
{"current_steps": 1805, "total_steps": 38150, "loss": 0.8531, "lr": 2.36435124508519e-05, "epoch": 0.47313237221494103, "percentage": 4.73, "elapsed_time": "0:02:48", "remaining_time": "0:56:31", "throughput": 2865.06, "total_tokens": 482608}
{"current_steps": 1810, "total_steps": 38150, "loss": 0.5, "lr": 2.3709043250327656e-05, "epoch": 0.4744429882044561, "percentage": 4.74, "elapsed_time": "0:02:48", "remaining_time": "0:56:31", "throughput": 2865.86, "total_tokens": 484096}
{"current_steps": 1815, "total_steps": 38150, "loss": 0.5278, "lr": 2.377457404980341e-05, "epoch": 0.47575360419397117, "percentage": 4.76, "elapsed_time": "0:02:49", "remaining_time": "0:56:30", "throughput": 2864.8, "total_tokens": 485120}
{"current_steps": 1820, "total_steps": 38150, "loss": 0.3805, "lr": 2.3840104849279163e-05, "epoch": 0.47706422018348627, "percentage": 4.77, "elapsed_time": "0:02:49", "remaining_time": "0:56:28", "throughput": 2863.19, "total_tokens": 486080}
{"current_steps": 1825, "total_steps": 38150, "loss": 0.5567, "lr": 2.3905635648754914e-05, "epoch": 0.4783748361730013, "percentage": 4.78, "elapsed_time": "0:02:50", "remaining_time": "0:56:27", "throughput": 2863.16, "total_tokens": 487344}
{"current_steps": 1830, "total_steps": 38150, "loss": 0.4873, "lr": 2.397116644823067e-05, "epoch": 0.4796854521625164, "percentage": 4.8, "elapsed_time": "0:02:50", "remaining_time": "0:56:26", "throughput": 2863.18, "total_tokens": 488608}
{"current_steps": 1835, "total_steps": 38150, "loss": 0.5203, "lr": 2.4036697247706425e-05, "epoch": 0.48099606815203144, "percentage": 4.81, "elapsed_time": "0:02:51", "remaining_time": "0:56:25", "throughput": 2862.12, "total_tokens": 489664}
{"current_steps": 1840, "total_steps": 38150, "loss": 0.4861, "lr": 2.4102228047182176e-05, "epoch": 0.48230668414154654, "percentage": 4.82, "elapsed_time": "0:02:51", "remaining_time": "0:56:25", "throughput": 2862.61, "total_tokens": 491120}
{"current_steps": 1845, "total_steps": 38150, "loss": 0.4841, "lr": 2.4167758846657933e-05, "epoch": 0.4836173001310616, "percentage": 4.84, "elapsed_time": "0:02:52", "remaining_time": "0:56:25", "throughput": 2862.6, "total_tokens": 492464}
{"current_steps": 1850, "total_steps": 38150, "loss": 0.5523, "lr": 2.4233289646133683e-05, "epoch": 0.4849279161205767, "percentage": 4.85, "elapsed_time": "0:02:52", "remaining_time": "0:56:24", "throughput": 2862.44, "total_tokens": 493712}
{"current_steps": 1855, "total_steps": 38150, "loss": 0.3985, "lr": 2.4298820445609437e-05, "epoch": 0.48623853211009177, "percentage": 4.86, "elapsed_time": "0:02:52", "remaining_time": "0:56:23", "throughput": 2861.67, "total_tokens": 494816}
{"current_steps": 1860, "total_steps": 38150, "loss": 0.4525, "lr": 2.436435124508519e-05, "epoch": 0.4875491480996068, "percentage": 4.88, "elapsed_time": "0:02:53", "remaining_time": "0:56:23", "throughput": 2862.04, "total_tokens": 496256}
{"current_steps": 1865, "total_steps": 38150, "loss": 0.5601, "lr": 2.4429882044560945e-05, "epoch": 0.4888597640891219, "percentage": 4.89, "elapsed_time": "0:02:53", "remaining_time": "0:56:21", "throughput": 2860.88, "total_tokens": 497280}
{"current_steps": 1870, "total_steps": 38150, "loss": 0.6646, "lr": 2.44954128440367e-05, "epoch": 0.49017038007863695, "percentage": 4.9, "elapsed_time": "0:02:54", "remaining_time": "0:56:20", "throughput": 2858.94, "total_tokens": 498144}
{"current_steps": 1875, "total_steps": 38150, "loss": 0.5493, "lr": 2.4560943643512453e-05, "epoch": 0.49148099606815204, "percentage": 4.91, "elapsed_time": "0:02:54", "remaining_time": "0:56:20", "throughput": 2859.59, "total_tokens": 499648}
{"current_steps": 1880, "total_steps": 38150, "loss": 0.4124, "lr": 2.4626474442988207e-05, "epoch": 0.4927916120576671, "percentage": 4.93, "elapsed_time": "0:02:55", "remaining_time": "0:56:21", "throughput": 2861.48, "total_tokens": 501488}
{"current_steps": 1885, "total_steps": 38150, "loss": 0.7164, "lr": 2.4692005242463957e-05, "epoch": 0.4941022280471822, "percentage": 4.94, "elapsed_time": "0:02:55", "remaining_time": "0:56:21", "throughput": 2864.08, "total_tokens": 503456}
{"current_steps": 1890, "total_steps": 38150, "loss": 0.3806, "lr": 2.4757536041939715e-05, "epoch": 0.4954128440366973, "percentage": 4.95, "elapsed_time": "0:02:56", "remaining_time": "0:56:20", "throughput": 2863.33, "total_tokens": 504544}
{"current_steps": 1895, "total_steps": 38150, "loss": 0.5285, "lr": 2.4823066841415465e-05, "epoch": 0.4967234600262123, "percentage": 4.97, "elapsed_time": "0:02:56", "remaining_time": "0:56:19", "throughput": 2862.98, "total_tokens": 505728}
{"current_steps": 1900, "total_steps": 38150, "loss": 0.6124, "lr": 2.488859764089122e-05, "epoch": 0.4980340760157274, "percentage": 4.98, "elapsed_time": "0:02:57", "remaining_time": "0:56:18", "throughput": 2861.37, "total_tokens": 506640}
{"current_steps": 1905, "total_steps": 38150, "loss": 0.4242, "lr": 2.4954128440366977e-05, "epoch": 0.49934469200524245, "percentage": 4.99, "elapsed_time": "0:02:57", "remaining_time": "0:56:17", "throughput": 2861.49, "total_tokens": 507904}
{"current_steps": 1908, "total_steps": 38150, "eval_loss": 0.5420164465904236, "epoch": 0.5001310615989515, "percentage": 5.0, "elapsed_time": "0:03:14", "remaining_time": "1:01:37", "throughput": 2613.02, "total_tokens": 508608}
{"current_steps": 1910, "total_steps": 38150, "loss": 0.554, "lr": 2.501965923984273e-05, "epoch": 0.5006553079947575, "percentage": 5.01, "elapsed_time": "0:03:16", "remaining_time": "1:02:02", "throughput": 2594.17, "total_tokens": 508992}
{"current_steps": 1915, "total_steps": 38150, "loss": 0.3146, "lr": 2.5085190039318478e-05, "epoch": 0.5019659239842726, "percentage": 5.02, "elapsed_time": "0:03:16", "remaining_time": "1:02:01", "throughput": 2594.27, "total_tokens": 510256}
{"current_steps": 1920, "total_steps": 38150, "loss": 0.5678, "lr": 2.5150720838794235e-05, "epoch": 0.5032765399737876, "percentage": 5.03, "elapsed_time": "0:03:17", "remaining_time": "1:01:59", "throughput": 2595.04, "total_tokens": 511536}
{"current_steps": 1925, "total_steps": 38150, "loss": 0.6072, "lr": 2.521625163826999e-05, "epoch": 0.5045871559633027, "percentage": 5.05, "elapsed_time": "0:03:17", "remaining_time": "1:01:59", "throughput": 2595.48, "total_tokens": 512992}
{"current_steps": 1930, "total_steps": 38150, "loss": 0.4577, "lr": 2.5281782437745743e-05, "epoch": 0.5058977719528178, "percentage": 5.06, "elapsed_time": "0:03:18", "remaining_time": "1:01:57", "throughput": 2595.51, "total_tokens": 514208}
{"current_steps": 1935, "total_steps": 38150, "loss": 0.7158, "lr": 2.5347313237221493e-05, "epoch": 0.5072083879423329, "percentage": 5.07, "elapsed_time": "0:03:18", "remaining_time": "1:01:56", "throughput": 2596.06, "total_tokens": 515536}
{"current_steps": 1940, "total_steps": 38150, "loss": 0.6246, "lr": 2.5412844036697247e-05, "epoch": 0.508519003931848, "percentage": 5.09, "elapsed_time": "0:03:19", "remaining_time": "1:01:55", "throughput": 2596.04, "total_tokens": 516720}
{"current_steps": 1945, "total_steps": 38150, "loss": 0.4182, "lr": 2.5478374836173e-05, "epoch": 0.509829619921363, "percentage": 5.1, "elapsed_time": "0:03:19", "remaining_time": "1:01:53", "throughput": 2595.92, "total_tokens": 517808}
{"current_steps": 1950, "total_steps": 38150, "loss": 0.4925, "lr": 2.554390563564876e-05, "epoch": 0.5111402359108781, "percentage": 5.11, "elapsed_time": "0:03:19", "remaining_time": "1:01:51", "throughput": 2597.2, "total_tokens": 519312}