rbelanec commited on
Commit
123fda7
·
verified ·
1 Parent(s): 152db30

Training in progress, step 34344

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +382 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:864b3df4bdd85730559bdc46f9b309a4ae65a7362780b5dffcf4d3f57a1d0ce1
3
  size 8388736
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:839f9343ab4c570769fd780d2043c03d5be3cbdcfc8af44444f537653df8ea78
3
  size 8388736
trainer_log.jsonl CHANGED
@@ -6508,3 +6508,385 @@
6508
  {"current_steps": 32455, "total_steps": 38150, "loss": 0.2879, "lr": 3.319119781513144e-06, "epoch": 8.507208387942333, "percentage": 85.07, "elapsed_time": "0:53:51", "remaining_time": "0:09:27", "throughput": 2694.09, "total_tokens": 8705440}
6509
  {"current_steps": 32460, "total_steps": 38150, "loss": 0.0348, "lr": 3.313427442957695e-06, "epoch": 8.508519003931848, "percentage": 85.09, "elapsed_time": "0:53:51", "remaining_time": "0:09:26", "throughput": 2694.03, "total_tokens": 8706368}
6510
  {"current_steps": 32465, "total_steps": 38150, "loss": 0.1652, "lr": 3.3077396433644046e-06, "epoch": 8.509829619921362, "percentage": 85.1, "elapsed_time": "0:53:52", "remaining_time": "0:09:25", "throughput": 2694.15, "total_tokens": 8708048}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6508
  {"current_steps": 32455, "total_steps": 38150, "loss": 0.2879, "lr": 3.319119781513144e-06, "epoch": 8.507208387942333, "percentage": 85.07, "elapsed_time": "0:53:51", "remaining_time": "0:09:27", "throughput": 2694.09, "total_tokens": 8705440}
6509
  {"current_steps": 32460, "total_steps": 38150, "loss": 0.0348, "lr": 3.313427442957695e-06, "epoch": 8.508519003931848, "percentage": 85.09, "elapsed_time": "0:53:51", "remaining_time": "0:09:26", "throughput": 2694.03, "total_tokens": 8706368}
6510
  {"current_steps": 32465, "total_steps": 38150, "loss": 0.1652, "lr": 3.3077396433644046e-06, "epoch": 8.509829619921362, "percentage": 85.1, "elapsed_time": "0:53:52", "remaining_time": "0:09:25", "throughput": 2694.15, "total_tokens": 8708048}
6511
+ {"current_steps": 32470, "total_steps": 38150, "loss": 0.1616, "lr": 3.3020563839237267e-06, "epoch": 8.511140235910878, "percentage": 85.11, "elapsed_time": "0:53:52", "remaining_time": "0:09:25", "throughput": 2694.14, "total_tokens": 8709200}
6512
+ {"current_steps": 32475, "total_steps": 38150, "loss": 0.159, "lr": 3.2963776658251537e-06, "epoch": 8.512450851900393, "percentage": 85.12, "elapsed_time": "0:53:53", "remaining_time": "0:09:24", "throughput": 2694.13, "total_tokens": 8710304}
6513
+ {"current_steps": 32480, "total_steps": 38150, "loss": 0.3113, "lr": 3.2907034902572382e-06, "epoch": 8.513761467889909, "percentage": 85.14, "elapsed_time": "0:53:53", "remaining_time": "0:09:24", "throughput": 2694.16, "total_tokens": 8711632}
6514
+ {"current_steps": 32485, "total_steps": 38150, "loss": 0.0631, "lr": 3.2850338584075686e-06, "epoch": 8.515072083879424, "percentage": 85.15, "elapsed_time": "0:53:53", "remaining_time": "0:09:23", "throughput": 2694.08, "total_tokens": 8712464}
6515
+ {"current_steps": 32490, "total_steps": 38150, "loss": 0.2275, "lr": 3.279368771462793e-06, "epoch": 8.516382699868938, "percentage": 85.16, "elapsed_time": "0:53:54", "remaining_time": "0:09:23", "throughput": 2694.08, "total_tokens": 8713568}
6516
+ {"current_steps": 32495, "total_steps": 38150, "loss": 0.1845, "lr": 3.273708230608602e-06, "epoch": 8.517693315858454, "percentage": 85.18, "elapsed_time": "0:53:54", "remaining_time": "0:09:22", "throughput": 2694.15, "total_tokens": 8715136}
6517
+ {"current_steps": 32500, "total_steps": 38150, "loss": 0.109, "lr": 3.2680522370297397e-06, "epoch": 8.51900393184797, "percentage": 85.19, "elapsed_time": "0:53:55", "remaining_time": "0:09:22", "throughput": 2694.29, "total_tokens": 8717136}
6518
+ {"current_steps": 32505, "total_steps": 38150, "loss": 0.1344, "lr": 3.262400791909992e-06, "epoch": 8.520314547837483, "percentage": 85.2, "elapsed_time": "0:53:55", "remaining_time": "0:09:21", "throughput": 2694.34, "total_tokens": 8718544}
6519
+ {"current_steps": 32510, "total_steps": 38150, "loss": 0.1414, "lr": 3.256753896432202e-06, "epoch": 8.521625163826998, "percentage": 85.22, "elapsed_time": "0:53:56", "remaining_time": "0:09:21", "throughput": 2694.34, "total_tokens": 8719680}
6520
+ {"current_steps": 32515, "total_steps": 38150, "loss": 0.0855, "lr": 3.2511115517782494e-06, "epoch": 8.522935779816514, "percentage": 85.23, "elapsed_time": "0:53:56", "remaining_time": "0:09:20", "throughput": 2694.43, "total_tokens": 8721344}
6521
+ {"current_steps": 32520, "total_steps": 38150, "loss": 0.246, "lr": 3.2454737591290695e-06, "epoch": 8.52424639580603, "percentage": 85.24, "elapsed_time": "0:53:57", "remaining_time": "0:09:20", "throughput": 2694.48, "total_tokens": 8722752}
6522
+ {"current_steps": 32525, "total_steps": 38150, "loss": 0.0454, "lr": 3.2398405196646443e-06, "epoch": 8.525557011795543, "percentage": 85.26, "elapsed_time": "0:53:57", "remaining_time": "0:09:19", "throughput": 2694.51, "total_tokens": 8723984}
6523
+ {"current_steps": 32530, "total_steps": 38150, "loss": 0.2172, "lr": 3.2342118345639993e-06, "epoch": 8.526867627785059, "percentage": 85.27, "elapsed_time": "0:53:58", "remaining_time": "0:09:19", "throughput": 2694.53, "total_tokens": 8725184}
6524
+ {"current_steps": 32535, "total_steps": 38150, "loss": 0.127, "lr": 3.2285877050052093e-06, "epoch": 8.528178243774574, "percentage": 85.28, "elapsed_time": "0:53:58", "remaining_time": "0:09:18", "throughput": 2694.58, "total_tokens": 8726592}
6525
+ {"current_steps": 32540, "total_steps": 38150, "loss": 0.0901, "lr": 3.2229681321653966e-06, "epoch": 8.52948885976409, "percentage": 85.29, "elapsed_time": "0:53:58", "remaining_time": "0:09:18", "throughput": 2694.56, "total_tokens": 8727664}
6526
+ {"current_steps": 32545, "total_steps": 38150, "loss": 0.1748, "lr": 3.2173531172207283e-06, "epoch": 8.530799475753604, "percentage": 85.31, "elapsed_time": "0:53:59", "remaining_time": "0:09:17", "throughput": 2694.58, "total_tokens": 8728928}
6527
+ {"current_steps": 32550, "total_steps": 38150, "loss": 0.1292, "lr": 3.211742661346409e-06, "epoch": 8.53211009174312, "percentage": 85.32, "elapsed_time": "0:53:59", "remaining_time": "0:09:17", "throughput": 2694.63, "total_tokens": 8730256}
6528
+ {"current_steps": 32555, "total_steps": 38150, "loss": 0.1356, "lr": 3.2061367657167025e-06, "epoch": 8.533420707732635, "percentage": 85.33, "elapsed_time": "0:54:00", "remaining_time": "0:09:16", "throughput": 2694.6, "total_tokens": 8731248}
6529
+ {"current_steps": 32560, "total_steps": 38150, "loss": 0.1955, "lr": 3.200535431504914e-06, "epoch": 8.534731323722148, "percentage": 85.35, "elapsed_time": "0:54:00", "remaining_time": "0:09:16", "throughput": 2694.64, "total_tokens": 8732624}
6530
+ {"current_steps": 32565, "total_steps": 38150, "loss": 0.1211, "lr": 3.194938659883398e-06, "epoch": 8.536041939711664, "percentage": 85.36, "elapsed_time": "0:54:01", "remaining_time": "0:09:15", "throughput": 2694.64, "total_tokens": 8733728}
6531
+ {"current_steps": 32570, "total_steps": 38150, "loss": 0.1766, "lr": 3.189346452023542e-06, "epoch": 8.53735255570118, "percentage": 85.37, "elapsed_time": "0:54:01", "remaining_time": "0:09:15", "throughput": 2694.62, "total_tokens": 8734784}
6532
+ {"current_steps": 32575, "total_steps": 38150, "loss": 0.1027, "lr": 3.1837588090957853e-06, "epoch": 8.538663171690695, "percentage": 85.39, "elapsed_time": "0:54:02", "remaining_time": "0:09:14", "throughput": 2694.71, "total_tokens": 8736320}
6533
+ {"current_steps": 32580, "total_steps": 38150, "loss": 0.1754, "lr": 3.1781757322696224e-06, "epoch": 8.539973787680209, "percentage": 85.4, "elapsed_time": "0:54:02", "remaining_time": "0:09:14", "throughput": 2694.7, "total_tokens": 8737424}
6534
+ {"current_steps": 32585, "total_steps": 38150, "loss": 0.1416, "lr": 3.172597222713569e-06, "epoch": 8.541284403669724, "percentage": 85.41, "elapsed_time": "0:54:02", "remaining_time": "0:09:13", "throughput": 2694.7, "total_tokens": 8738592}
6535
+ {"current_steps": 32590, "total_steps": 38150, "loss": 0.1005, "lr": 3.1670232815951994e-06, "epoch": 8.54259501965924, "percentage": 85.43, "elapsed_time": "0:54:03", "remaining_time": "0:09:13", "throughput": 2694.73, "total_tokens": 8739936}
6536
+ {"current_steps": 32595, "total_steps": 38150, "loss": 0.2203, "lr": 3.1614539100811356e-06, "epoch": 8.543905635648755, "percentage": 85.44, "elapsed_time": "0:54:03", "remaining_time": "0:09:12", "throughput": 2694.8, "total_tokens": 8741440}
6537
+ {"current_steps": 32600, "total_steps": 38150, "loss": 0.2072, "lr": 3.1558891093370318e-06, "epoch": 8.54521625163827, "percentage": 85.45, "elapsed_time": "0:54:04", "remaining_time": "0:09:12", "throughput": 2694.9, "total_tokens": 8743024}
6538
+ {"current_steps": 32605, "total_steps": 38150, "loss": 0.087, "lr": 3.150328880527595e-06, "epoch": 8.546526867627785, "percentage": 85.47, "elapsed_time": "0:54:04", "remaining_time": "0:09:11", "throughput": 2694.9, "total_tokens": 8744160}
6539
+ {"current_steps": 32610, "total_steps": 38150, "loss": 0.1094, "lr": 3.1447732248165633e-06, "epoch": 8.5478374836173, "percentage": 85.48, "elapsed_time": "0:54:05", "remaining_time": "0:09:11", "throughput": 2694.92, "total_tokens": 8745344}
6540
+ {"current_steps": 32615, "total_steps": 38150, "loss": 0.1611, "lr": 3.139222143366746e-06, "epoch": 8.549148099606816, "percentage": 85.49, "elapsed_time": "0:54:05", "remaining_time": "0:09:10", "throughput": 2694.98, "total_tokens": 8746800}
6541
+ {"current_steps": 32620, "total_steps": 38150, "loss": 0.1201, "lr": 3.1336756373399556e-06, "epoch": 8.55045871559633, "percentage": 85.5, "elapsed_time": "0:54:06", "remaining_time": "0:09:10", "throughput": 2695.08, "total_tokens": 8748416}
6542
+ {"current_steps": 32625, "total_steps": 38150, "loss": 0.1595, "lr": 3.128133707897071e-06, "epoch": 8.551769331585845, "percentage": 85.52, "elapsed_time": "0:54:06", "remaining_time": "0:09:09", "throughput": 2695.15, "total_tokens": 8749808}
6543
+ {"current_steps": 32630, "total_steps": 38150, "loss": 0.2437, "lr": 3.12259635619801e-06, "epoch": 8.55307994757536, "percentage": 85.53, "elapsed_time": "0:54:06", "remaining_time": "0:09:09", "throughput": 2695.17, "total_tokens": 8751008}
6544
+ {"current_steps": 32635, "total_steps": 38150, "loss": 0.1381, "lr": 3.1170635834017303e-06, "epoch": 8.554390563564876, "percentage": 85.54, "elapsed_time": "0:54:07", "remaining_time": "0:09:08", "throughput": 2695.22, "total_tokens": 8752416}
6545
+ {"current_steps": 32640, "total_steps": 38150, "loss": 0.1119, "lr": 3.1115353906662313e-06, "epoch": 8.55570117955439, "percentage": 85.56, "elapsed_time": "0:54:07", "remaining_time": "0:09:08", "throughput": 2695.2, "total_tokens": 8753456}
6546
+ {"current_steps": 32645, "total_steps": 38150, "loss": 0.1541, "lr": 3.1060117791485544e-06, "epoch": 8.557011795543906, "percentage": 85.57, "elapsed_time": "0:54:08", "remaining_time": "0:09:07", "throughput": 2695.23, "total_tokens": 8754768}
6547
+ {"current_steps": 32650, "total_steps": 38150, "loss": 0.1482, "lr": 3.1004927500047827e-06, "epoch": 8.558322411533421, "percentage": 85.58, "elapsed_time": "0:54:08", "remaining_time": "0:09:07", "throughput": 2695.29, "total_tokens": 8756160}
6548
+ {"current_steps": 32655, "total_steps": 38150, "loss": 0.1219, "lr": 3.0949783043900393e-06, "epoch": 8.559633027522935, "percentage": 85.6, "elapsed_time": "0:54:09", "remaining_time": "0:09:06", "throughput": 2695.24, "total_tokens": 8757088}
6549
+ {"current_steps": 32660, "total_steps": 38150, "loss": 0.2306, "lr": 3.0894684434584958e-06, "epoch": 8.56094364351245, "percentage": 85.61, "elapsed_time": "0:54:09", "remaining_time": "0:09:06", "throughput": 2695.3, "total_tokens": 8758464}
6550
+ {"current_steps": 32665, "total_steps": 38150, "loss": 0.1423, "lr": 3.083963168363341e-06, "epoch": 8.562254259501966, "percentage": 85.62, "elapsed_time": "0:54:09", "remaining_time": "0:09:05", "throughput": 2695.35, "total_tokens": 8759792}
6551
+ {"current_steps": 32670, "total_steps": 38150, "loss": 0.1669, "lr": 3.0784624802568192e-06, "epoch": 8.563564875491481, "percentage": 85.64, "elapsed_time": "0:54:10", "remaining_time": "0:09:05", "throughput": 2695.41, "total_tokens": 8761248}
6552
+ {"current_steps": 32675, "total_steps": 38150, "loss": 0.1365, "lr": 3.072966380290232e-06, "epoch": 8.564875491480995, "percentage": 85.65, "elapsed_time": "0:54:10", "remaining_time": "0:09:04", "throughput": 2695.44, "total_tokens": 8762480}
6553
+ {"current_steps": 32680, "total_steps": 38150, "loss": 0.2416, "lr": 3.0674748696138927e-06, "epoch": 8.56618610747051, "percentage": 85.66, "elapsed_time": "0:54:11", "remaining_time": "0:09:04", "throughput": 2695.56, "total_tokens": 8764176}
6554
+ {"current_steps": 32685, "total_steps": 38150, "loss": 0.2441, "lr": 3.0619879493771654e-06, "epoch": 8.567496723460026, "percentage": 85.67, "elapsed_time": "0:54:11", "remaining_time": "0:09:03", "throughput": 2695.58, "total_tokens": 8765344}
6555
+ {"current_steps": 32690, "total_steps": 38150, "loss": 0.0591, "lr": 3.056505620728456e-06, "epoch": 8.568807339449542, "percentage": 85.69, "elapsed_time": "0:54:12", "remaining_time": "0:09:03", "throughput": 2695.6, "total_tokens": 8766608}
6556
+ {"current_steps": 32695, "total_steps": 38150, "loss": 0.2646, "lr": 3.051027884815211e-06, "epoch": 8.570117955439056, "percentage": 85.7, "elapsed_time": "0:54:12", "remaining_time": "0:09:02", "throughput": 2695.75, "total_tokens": 8768576}
6557
+ {"current_steps": 32700, "total_steps": 38150, "loss": 0.1051, "lr": 3.0455547427838986e-06, "epoch": 8.571428571428571, "percentage": 85.71, "elapsed_time": "0:54:13", "remaining_time": "0:09:02", "throughput": 2695.74, "total_tokens": 8769712}
6558
+ {"current_steps": 32705, "total_steps": 38150, "loss": 0.2622, "lr": 3.0400861957800423e-06, "epoch": 8.572739187418087, "percentage": 85.73, "elapsed_time": "0:54:13", "remaining_time": "0:09:01", "throughput": 2695.91, "total_tokens": 8771792}
6559
+ {"current_steps": 32710, "total_steps": 38150, "loss": 0.1677, "lr": 3.034622244948204e-06, "epoch": 8.574049803407602, "percentage": 85.74, "elapsed_time": "0:54:14", "remaining_time": "0:09:01", "throughput": 2695.91, "total_tokens": 8772912}
6560
+ {"current_steps": 32715, "total_steps": 38150, "loss": 0.1415, "lr": 3.029162891431975e-06, "epoch": 8.575360419397116, "percentage": 85.75, "elapsed_time": "0:54:14", "remaining_time": "0:09:00", "throughput": 2696.03, "total_tokens": 8774656}
6561
+ {"current_steps": 32720, "total_steps": 38150, "loss": 0.1123, "lr": 3.02370813637399e-06, "epoch": 8.576671035386632, "percentage": 85.77, "elapsed_time": "0:54:15", "remaining_time": "0:09:00", "throughput": 2696.18, "total_tokens": 8776688}
6562
+ {"current_steps": 32725, "total_steps": 38150, "loss": 0.0681, "lr": 3.018257980915917e-06, "epoch": 8.577981651376147, "percentage": 85.78, "elapsed_time": "0:54:15", "remaining_time": "0:08:59", "throughput": 2696.14, "total_tokens": 8777680}
6563
+ {"current_steps": 32730, "total_steps": 38150, "loss": 0.2023, "lr": 3.0128124261984693e-06, "epoch": 8.579292267365663, "percentage": 85.79, "elapsed_time": "0:54:16", "remaining_time": "0:08:59", "throughput": 2696.09, "total_tokens": 8778656}
6564
+ {"current_steps": 32735, "total_steps": 38150, "loss": 0.1234, "lr": 3.007371473361398e-06, "epoch": 8.580602883355176, "percentage": 85.81, "elapsed_time": "0:54:16", "remaining_time": "0:08:58", "throughput": 2696.12, "total_tokens": 8779968}
6565
+ {"current_steps": 32740, "total_steps": 38150, "loss": 0.2098, "lr": 3.00193512354347e-06, "epoch": 8.581913499344692, "percentage": 85.82, "elapsed_time": "0:54:17", "remaining_time": "0:08:58", "throughput": 2696.27, "total_tokens": 8781888}
6566
+ {"current_steps": 32745, "total_steps": 38150, "loss": 0.1873, "lr": 2.996503377882512e-06, "epoch": 8.583224115334207, "percentage": 85.83, "elapsed_time": "0:54:17", "remaining_time": "0:08:57", "throughput": 2696.34, "total_tokens": 8783424}
6567
+ {"current_steps": 32750, "total_steps": 38150, "loss": 0.0549, "lr": 2.9910762375153752e-06, "epoch": 8.584534731323721, "percentage": 85.85, "elapsed_time": "0:54:17", "remaining_time": "0:08:57", "throughput": 2696.33, "total_tokens": 8784496}
6568
+ {"current_steps": 32755, "total_steps": 38150, "loss": 0.2593, "lr": 2.9856537035779564e-06, "epoch": 8.585845347313237, "percentage": 85.86, "elapsed_time": "0:54:18", "remaining_time": "0:08:56", "throughput": 2696.39, "total_tokens": 8785840}
6569
+ {"current_steps": 32760, "total_steps": 38150, "loss": 0.1327, "lr": 2.980235777205179e-06, "epoch": 8.587155963302752, "percentage": 85.87, "elapsed_time": "0:54:18", "remaining_time": "0:08:56", "throughput": 2696.46, "total_tokens": 8787392}
6570
+ {"current_steps": 32765, "total_steps": 38150, "loss": 0.1416, "lr": 2.9748224595310042e-06, "epoch": 8.588466579292268, "percentage": 85.88, "elapsed_time": "0:54:19", "remaining_time": "0:08:55", "throughput": 2696.51, "total_tokens": 8788704}
6571
+ {"current_steps": 32770, "total_steps": 38150, "loss": 0.1558, "lr": 2.9694137516884353e-06, "epoch": 8.589777195281782, "percentage": 85.9, "elapsed_time": "0:54:19", "remaining_time": "0:08:55", "throughput": 2696.54, "total_tokens": 8789984}
6572
+ {"current_steps": 32775, "total_steps": 38150, "loss": 0.1444, "lr": 2.9640096548094992e-06, "epoch": 8.591087811271297, "percentage": 85.91, "elapsed_time": "0:54:20", "remaining_time": "0:08:54", "throughput": 2696.68, "total_tokens": 8791792}
6573
+ {"current_steps": 32780, "total_steps": 38150, "loss": 0.1477, "lr": 2.958610170025275e-06, "epoch": 8.592398427260813, "percentage": 85.92, "elapsed_time": "0:54:20", "remaining_time": "0:08:54", "throughput": 2696.7, "total_tokens": 8793152}
6574
+ {"current_steps": 32785, "total_steps": 38150, "loss": 0.1643, "lr": 2.9532152984658446e-06, "epoch": 8.593709043250328, "percentage": 85.94, "elapsed_time": "0:54:21", "remaining_time": "0:08:53", "throughput": 2696.72, "total_tokens": 8794432}
6575
+ {"current_steps": 32790, "total_steps": 38150, "loss": 0.1005, "lr": 2.9478250412603657e-06, "epoch": 8.595019659239842, "percentage": 85.95, "elapsed_time": "0:54:21", "remaining_time": "0:08:53", "throughput": 2696.72, "total_tokens": 8795648}
6576
+ {"current_steps": 32795, "total_steps": 38150, "loss": 0.091, "lr": 2.942439399537e-06, "epoch": 8.596330275229358, "percentage": 85.96, "elapsed_time": "0:54:22", "remaining_time": "0:08:52", "throughput": 2696.69, "total_tokens": 8796640}
6577
+ {"current_steps": 32800, "total_steps": 38150, "loss": 0.3083, "lr": 2.9370583744229572e-06, "epoch": 8.597640891218873, "percentage": 85.98, "elapsed_time": "0:54:22", "remaining_time": "0:08:52", "throughput": 2696.68, "total_tokens": 8797744}
6578
+ {"current_steps": 32805, "total_steps": 38150, "loss": 0.0995, "lr": 2.931681967044475e-06, "epoch": 8.598951507208389, "percentage": 85.99, "elapsed_time": "0:54:22", "remaining_time": "0:08:51", "throughput": 2696.66, "total_tokens": 8798800}
6579
+ {"current_steps": 32810, "total_steps": 38150, "loss": 0.1758, "lr": 2.9263101785268254e-06, "epoch": 8.600262123197902, "percentage": 86.0, "elapsed_time": "0:54:23", "remaining_time": "0:08:51", "throughput": 2696.69, "total_tokens": 8800016}
6580
+ {"current_steps": 32815, "total_steps": 38150, "loss": 0.1535, "lr": 2.9209430099943204e-06, "epoch": 8.601572739187418, "percentage": 86.02, "elapsed_time": "0:54:23", "remaining_time": "0:08:50", "throughput": 2696.76, "total_tokens": 8801488}
6581
+ {"current_steps": 32820, "total_steps": 38150, "loss": 0.1218, "lr": 2.915580462570289e-06, "epoch": 8.602883355176933, "percentage": 86.03, "elapsed_time": "0:54:24", "remaining_time": "0:08:50", "throughput": 2696.75, "total_tokens": 8802592}
6582
+ {"current_steps": 32825, "total_steps": 38150, "loss": 0.1859, "lr": 2.910222537377108e-06, "epoch": 8.604193971166449, "percentage": 86.04, "elapsed_time": "0:54:24", "remaining_time": "0:08:49", "throughput": 2696.79, "total_tokens": 8803872}
6583
+ {"current_steps": 32830, "total_steps": 38150, "loss": 0.1368, "lr": 2.904869235536181e-06, "epoch": 8.605504587155963, "percentage": 86.06, "elapsed_time": "0:54:25", "remaining_time": "0:08:49", "throughput": 2696.79, "total_tokens": 8805008}
6584
+ {"current_steps": 32835, "total_steps": 38150, "loss": 0.045, "lr": 2.8995205581679452e-06, "epoch": 8.606815203145478, "percentage": 86.07, "elapsed_time": "0:54:25", "remaining_time": "0:08:48", "throughput": 2696.75, "total_tokens": 8806000}
6585
+ {"current_steps": 32840, "total_steps": 38150, "loss": 0.1114, "lr": 2.8941765063918657e-06, "epoch": 8.608125819134994, "percentage": 86.08, "elapsed_time": "0:54:25", "remaining_time": "0:08:48", "throughput": 2696.75, "total_tokens": 8807248}
6586
+ {"current_steps": 32845, "total_steps": 38150, "loss": 0.0965, "lr": 2.888837081326451e-06, "epoch": 8.609436435124508, "percentage": 86.09, "elapsed_time": "0:54:26", "remaining_time": "0:08:47", "throughput": 2696.84, "total_tokens": 8808752}
6587
+ {"current_steps": 32850, "total_steps": 38150, "loss": 0.0898, "lr": 2.8835022840892373e-06, "epoch": 8.610747051114023, "percentage": 86.11, "elapsed_time": "0:54:26", "remaining_time": "0:08:47", "throughput": 2696.9, "total_tokens": 8810096}
6588
+ {"current_steps": 32855, "total_steps": 38150, "loss": 0.2868, "lr": 2.8781721157967734e-06, "epoch": 8.612057667103539, "percentage": 86.12, "elapsed_time": "0:54:27", "remaining_time": "0:08:46", "throughput": 2697.05, "total_tokens": 8812144}
6589
+ {"current_steps": 32860, "total_steps": 38150, "loss": 0.2291, "lr": 2.872846577564664e-06, "epoch": 8.613368283093054, "percentage": 86.13, "elapsed_time": "0:54:27", "remaining_time": "0:08:46", "throughput": 2697.03, "total_tokens": 8813200}
6590
+ {"current_steps": 32865, "total_steps": 38150, "loss": 0.1565, "lr": 2.8675256705075343e-06, "epoch": 8.614678899082568, "percentage": 86.15, "elapsed_time": "0:54:28", "remaining_time": "0:08:45", "throughput": 2697.07, "total_tokens": 8814496}
6591
+ {"current_steps": 32870, "total_steps": 38150, "loss": 0.2031, "lr": 2.8622093957390377e-06, "epoch": 8.615989515072084, "percentage": 86.16, "elapsed_time": "0:54:28", "remaining_time": "0:08:45", "throughput": 2697.07, "total_tokens": 8815600}
6592
+ {"current_steps": 32875, "total_steps": 38150, "loss": 0.2346, "lr": 2.856897754371865e-06, "epoch": 8.617300131061599, "percentage": 86.17, "elapsed_time": "0:54:29", "remaining_time": "0:08:44", "throughput": 2697.05, "total_tokens": 8816672}
6593
+ {"current_steps": 32880, "total_steps": 38150, "loss": 0.1166, "lr": 2.8515907475177293e-06, "epoch": 8.618610747051115, "percentage": 86.19, "elapsed_time": "0:54:29", "remaining_time": "0:08:44", "throughput": 2697.04, "total_tokens": 8817824}
6594
+ {"current_steps": 32885, "total_steps": 38150, "loss": 0.0957, "lr": 2.8462883762873842e-06, "epoch": 8.619921363040628, "percentage": 86.2, "elapsed_time": "0:54:29", "remaining_time": "0:08:43", "throughput": 2697.04, "total_tokens": 8819072}
6595
+ {"current_steps": 32890, "total_steps": 38150, "loss": 0.0968, "lr": 2.8409906417906e-06, "epoch": 8.621231979030144, "percentage": 86.21, "elapsed_time": "0:54:30", "remaining_time": "0:08:43", "throughput": 2697.06, "total_tokens": 8820272}
6596
+ {"current_steps": 32895, "total_steps": 38150, "loss": 0.1195, "lr": 2.8356975451361873e-06, "epoch": 8.62254259501966, "percentage": 86.23, "elapsed_time": "0:54:30", "remaining_time": "0:08:42", "throughput": 2697.08, "total_tokens": 8821520}
6597
+ {"current_steps": 32900, "total_steps": 38150, "loss": 0.2078, "lr": 2.8304090874319843e-06, "epoch": 8.623853211009175, "percentage": 86.24, "elapsed_time": "0:54:31", "remaining_time": "0:08:41", "throughput": 2697.11, "total_tokens": 8822768}
6598
+ {"current_steps": 32905, "total_steps": 38150, "loss": 0.1781, "lr": 2.8251252697848505e-06, "epoch": 8.625163826998689, "percentage": 86.25, "elapsed_time": "0:54:31", "remaining_time": "0:08:41", "throughput": 2697.2, "total_tokens": 8824384}
6599
+ {"current_steps": 32910, "total_steps": 38150, "loss": 0.2354, "lr": 2.819846093300682e-06, "epoch": 8.626474442988204, "percentage": 86.26, "elapsed_time": "0:54:32", "remaining_time": "0:08:40", "throughput": 2697.23, "total_tokens": 8825712}
6600
+ {"current_steps": 32915, "total_steps": 38150, "loss": 0.1287, "lr": 2.814571559084403e-06, "epoch": 8.62778505897772, "percentage": 86.28, "elapsed_time": "0:54:32", "remaining_time": "0:08:40", "throughput": 2697.19, "total_tokens": 8826688}
6601
+ {"current_steps": 32920, "total_steps": 38150, "loss": 0.1804, "lr": 2.8093016682399636e-06, "epoch": 8.629095674967235, "percentage": 86.29, "elapsed_time": "0:54:33", "remaining_time": "0:08:39", "throughput": 2697.24, "total_tokens": 8828144}
6602
+ {"current_steps": 32925, "total_steps": 38150, "loss": 0.156, "lr": 2.804036421870343e-06, "epoch": 8.63040629095675, "percentage": 86.3, "elapsed_time": "0:54:33", "remaining_time": "0:08:39", "throughput": 2697.36, "total_tokens": 8829856}
6603
+ {"current_steps": 32930, "total_steps": 38150, "loss": 0.0942, "lr": 2.798775821077551e-06, "epoch": 8.631716906946265, "percentage": 86.32, "elapsed_time": "0:54:33", "remaining_time": "0:08:38", "throughput": 2697.43, "total_tokens": 8831280}
6604
+ {"current_steps": 32935, "total_steps": 38150, "loss": 0.1952, "lr": 2.7935198669626155e-06, "epoch": 8.63302752293578, "percentage": 86.33, "elapsed_time": "0:54:34", "remaining_time": "0:08:38", "throughput": 2697.5, "total_tokens": 8832688}
6605
+ {"current_steps": 32940, "total_steps": 38150, "loss": 0.117, "lr": 2.788268560625601e-06, "epoch": 8.634338138925294, "percentage": 86.34, "elapsed_time": "0:54:34", "remaining_time": "0:08:37", "throughput": 2697.52, "total_tokens": 8833952}
6606
+ {"current_steps": 32945, "total_steps": 38150, "loss": 0.2005, "lr": 2.7830219031655973e-06, "epoch": 8.63564875491481, "percentage": 86.36, "elapsed_time": "0:54:35", "remaining_time": "0:08:37", "throughput": 2697.6, "total_tokens": 8835504}
6607
+ {"current_steps": 32950, "total_steps": 38150, "loss": 0.1893, "lr": 2.777779895680721e-06, "epoch": 8.636959370904325, "percentage": 86.37, "elapsed_time": "0:54:35", "remaining_time": "0:08:36", "throughput": 2697.68, "total_tokens": 8837216}
6608
+ {"current_steps": 32955, "total_steps": 38150, "loss": 0.1651, "lr": 2.772542539268114e-06, "epoch": 8.63826998689384, "percentage": 86.38, "elapsed_time": "0:54:36", "remaining_time": "0:08:36", "throughput": 2697.63, "total_tokens": 8838176}
6609
+ {"current_steps": 32960, "total_steps": 38150, "loss": 0.1492, "lr": 2.7673098350239485e-06, "epoch": 8.639580602883354, "percentage": 86.4, "elapsed_time": "0:54:36", "remaining_time": "0:08:35", "throughput": 2697.67, "total_tokens": 8839440}
6610
+ {"current_steps": 32965, "total_steps": 38150, "loss": 0.1173, "lr": 2.7620817840434216e-06, "epoch": 8.64089121887287, "percentage": 86.41, "elapsed_time": "0:54:37", "remaining_time": "0:08:35", "throughput": 2697.7, "total_tokens": 8840688}
6611
+ {"current_steps": 32970, "total_steps": 38150, "loss": 0.1627, "lr": 2.756858387420758e-06, "epoch": 8.642201834862385, "percentage": 86.42, "elapsed_time": "0:54:37", "remaining_time": "0:08:34", "throughput": 2697.67, "total_tokens": 8841680}
6612
+ {"current_steps": 32975, "total_steps": 38150, "loss": 0.1922, "lr": 2.7516396462491934e-06, "epoch": 8.643512450851901, "percentage": 86.44, "elapsed_time": "0:54:37", "remaining_time": "0:08:34", "throughput": 2697.74, "total_tokens": 8843120}
6613
+ {"current_steps": 32980, "total_steps": 38150, "loss": 0.0974, "lr": 2.7464255616210106e-06, "epoch": 8.644823066841415, "percentage": 86.45, "elapsed_time": "0:54:38", "remaining_time": "0:08:33", "throughput": 2697.84, "total_tokens": 8844752}
6614
+ {"current_steps": 32985, "total_steps": 38150, "loss": 0.2323, "lr": 2.7412161346275057e-06, "epoch": 8.64613368283093, "percentage": 86.46, "elapsed_time": "0:54:38", "remaining_time": "0:08:33", "throughput": 2697.83, "total_tokens": 8845856}
6615
+ {"current_steps": 32990, "total_steps": 38150, "loss": 0.2219, "lr": 2.7360113663590036e-06, "epoch": 8.647444298820446, "percentage": 86.47, "elapsed_time": "0:54:39", "remaining_time": "0:08:32", "throughput": 2697.83, "total_tokens": 8846976}
6616
+ {"current_steps": 32995, "total_steps": 38150, "loss": 0.2262, "lr": 2.7308112579048555e-06, "epoch": 8.648754914809961, "percentage": 86.49, "elapsed_time": "0:54:39", "remaining_time": "0:08:32", "throughput": 2697.97, "total_tokens": 8848832}
6617
+ {"current_steps": 33000, "total_steps": 38150, "loss": 0.1237, "lr": 2.7256158103534298e-06, "epoch": 8.650065530799475, "percentage": 86.5, "elapsed_time": "0:54:40", "remaining_time": "0:08:31", "throughput": 2698.0, "total_tokens": 8850080}
6618
+ {"current_steps": 33005, "total_steps": 38150, "loss": 0.1413, "lr": 2.7204250247921308e-06, "epoch": 8.65137614678899, "percentage": 86.51, "elapsed_time": "0:54:40", "remaining_time": "0:08:31", "throughput": 2698.06, "total_tokens": 8851552}
6619
+ {"current_steps": 33010, "total_steps": 38150, "loss": 0.1868, "lr": 2.7152389023073766e-06, "epoch": 8.652686762778506, "percentage": 86.53, "elapsed_time": "0:54:41", "remaining_time": "0:08:30", "throughput": 2698.06, "total_tokens": 8852688}
6620
+ {"current_steps": 33015, "total_steps": 38150, "loss": 0.112, "lr": 2.710057443984615e-06, "epoch": 8.653997378768022, "percentage": 86.54, "elapsed_time": "0:54:41", "remaining_time": "0:08:30", "throughput": 2698.13, "total_tokens": 8854176}
6621
+ {"current_steps": 33020, "total_steps": 38150, "loss": 0.1547, "lr": 2.7048806509083146e-06, "epoch": 8.655307994757536, "percentage": 86.55, "elapsed_time": "0:54:42", "remaining_time": "0:08:29", "throughput": 2698.17, "total_tokens": 8855584}
6622
+ {"current_steps": 33025, "total_steps": 38150, "loss": 0.2194, "lr": 2.6997085241619728e-06, "epoch": 8.656618610747051, "percentage": 86.57, "elapsed_time": "0:54:42", "remaining_time": "0:08:29", "throughput": 2698.23, "total_tokens": 8857056}
6623
+ {"current_steps": 33030, "total_steps": 38150, "loss": 0.21, "lr": 2.6945410648281044e-06, "epoch": 8.657929226736567, "percentage": 86.58, "elapsed_time": "0:54:42", "remaining_time": "0:08:28", "throughput": 2698.27, "total_tokens": 8858304}
6624
+ {"current_steps": 33035, "total_steps": 38150, "loss": 0.1345, "lr": 2.6893782739882523e-06, "epoch": 8.65923984272608, "percentage": 86.59, "elapsed_time": "0:54:43", "remaining_time": "0:08:28", "throughput": 2698.35, "total_tokens": 8859936}
6625
+ {"current_steps": 33040, "total_steps": 38150, "loss": 0.1374, "lr": 2.6842201527229743e-06, "epoch": 8.660550458715596, "percentage": 86.61, "elapsed_time": "0:54:43", "remaining_time": "0:08:27", "throughput": 2698.32, "total_tokens": 8860976}
6626
+ {"current_steps": 33045, "total_steps": 38150, "loss": 0.0713, "lr": 2.6790667021118626e-06, "epoch": 8.661861074705111, "percentage": 86.62, "elapsed_time": "0:54:44", "remaining_time": "0:08:27", "throughput": 2698.24, "total_tokens": 8861792}
6627
+ {"current_steps": 33050, "total_steps": 38150, "loss": 0.2539, "lr": 2.6739179232335264e-06, "epoch": 8.663171690694627, "percentage": 86.63, "elapsed_time": "0:54:44", "remaining_time": "0:08:26", "throughput": 2698.23, "total_tokens": 8862880}
6628
+ {"current_steps": 33055, "total_steps": 38150, "loss": 0.1916, "lr": 2.6687738171655873e-06, "epoch": 8.66448230668414, "percentage": 86.64, "elapsed_time": "0:54:45", "remaining_time": "0:08:26", "throughput": 2698.22, "total_tokens": 8863952}
6629
+ {"current_steps": 33060, "total_steps": 38150, "loss": 0.219, "lr": 2.6636343849847033e-06, "epoch": 8.665792922673656, "percentage": 86.66, "elapsed_time": "0:54:45", "remaining_time": "0:08:25", "throughput": 2698.32, "total_tokens": 8865632}
6630
+ {"current_steps": 33065, "total_steps": 38150, "loss": 0.1, "lr": 2.6584996277665475e-06, "epoch": 8.667103538663172, "percentage": 86.67, "elapsed_time": "0:54:46", "remaining_time": "0:08:25", "throughput": 2698.4, "total_tokens": 8867136}
6631
+ {"current_steps": 33070, "total_steps": 38150, "loss": 0.1466, "lr": 2.653369546585813e-06, "epoch": 8.668414154652687, "percentage": 86.68, "elapsed_time": "0:54:46", "remaining_time": "0:08:24", "throughput": 2698.4, "total_tokens": 8868288}
6632
+ {"current_steps": 33075, "total_steps": 38150, "loss": 0.0815, "lr": 2.6482441425162235e-06, "epoch": 8.669724770642201, "percentage": 86.7, "elapsed_time": "0:54:46", "remaining_time": "0:08:24", "throughput": 2698.43, "total_tokens": 8869584}
6633
+ {"current_steps": 33080, "total_steps": 38150, "loss": 0.1538, "lr": 2.6431234166305135e-06, "epoch": 8.671035386631717, "percentage": 86.71, "elapsed_time": "0:54:47", "remaining_time": "0:08:23", "throughput": 2698.52, "total_tokens": 8871216}
6634
+ {"current_steps": 33085, "total_steps": 38150, "loss": 0.1021, "lr": 2.6380073700004504e-06, "epoch": 8.672346002621232, "percentage": 86.72, "elapsed_time": "0:54:47", "remaining_time": "0:08:23", "throughput": 2698.47, "total_tokens": 8872160}
6635
+ {"current_steps": 33090, "total_steps": 38150, "loss": 0.1287, "lr": 2.6328960036967996e-06, "epoch": 8.673656618610748, "percentage": 86.74, "elapsed_time": "0:54:48", "remaining_time": "0:08:22", "throughput": 2698.57, "total_tokens": 8873792}
6636
+ {"current_steps": 33095, "total_steps": 38150, "loss": 0.2082, "lr": 2.627789318789367e-06, "epoch": 8.674967234600262, "percentage": 86.75, "elapsed_time": "0:54:48", "remaining_time": "0:08:22", "throughput": 2698.57, "total_tokens": 8874960}
6637
+ {"current_steps": 33100, "total_steps": 38150, "loss": 0.1551, "lr": 2.6226873163469752e-06, "epoch": 8.676277850589777, "percentage": 86.76, "elapsed_time": "0:54:49", "remaining_time": "0:08:21", "throughput": 2698.62, "total_tokens": 8876304}
6638
+ {"current_steps": 33105, "total_steps": 38150, "loss": 0.0746, "lr": 2.6175899974374614e-06, "epoch": 8.677588466579293, "percentage": 86.78, "elapsed_time": "0:54:49", "remaining_time": "0:08:21", "throughput": 2698.58, "total_tokens": 8877312}
6639
+ {"current_steps": 33110, "total_steps": 38150, "loss": 0.1296, "lr": 2.6124973631276888e-06, "epoch": 8.678899082568808, "percentage": 86.79, "elapsed_time": "0:54:50", "remaining_time": "0:08:20", "throughput": 2698.61, "total_tokens": 8878544}
6640
+ {"current_steps": 33115, "total_steps": 38150, "loss": 0.2654, "lr": 2.607409414483536e-06, "epoch": 8.680209698558322, "percentage": 86.8, "elapsed_time": "0:54:50", "remaining_time": "0:08:20", "throughput": 2698.54, "total_tokens": 8879424}
6641
+ {"current_steps": 33120, "total_steps": 38150, "loss": 0.0906, "lr": 2.6023261525699003e-06, "epoch": 8.681520314547837, "percentage": 86.82, "elapsed_time": "0:54:50", "remaining_time": "0:08:19", "throughput": 2698.56, "total_tokens": 8880592}
6642
+ {"current_steps": 33125, "total_steps": 38150, "loss": 0.3102, "lr": 2.597247578450701e-06, "epoch": 8.682830930537353, "percentage": 86.83, "elapsed_time": "0:54:51", "remaining_time": "0:08:19", "throughput": 2698.67, "total_tokens": 8882272}
6643
+ {"current_steps": 33130, "total_steps": 38150, "loss": 0.1092, "lr": 2.592173693188876e-06, "epoch": 8.684141546526867, "percentage": 86.84, "elapsed_time": "0:54:51", "remaining_time": "0:08:18", "throughput": 2698.63, "total_tokens": 8883248}
6644
+ {"current_steps": 33135, "total_steps": 38150, "loss": 0.1374, "lr": 2.5871044978463764e-06, "epoch": 8.685452162516382, "percentage": 86.85, "elapsed_time": "0:54:52", "remaining_time": "0:08:18", "throughput": 2698.69, "total_tokens": 8884784}
6645
+ {"current_steps": 33140, "total_steps": 38150, "loss": 0.2433, "lr": 2.582039993484181e-06, "epoch": 8.686762778505898, "percentage": 86.87, "elapsed_time": "0:54:52", "remaining_time": "0:08:17", "throughput": 2698.82, "total_tokens": 8886576}
6646
+ {"current_steps": 33145, "total_steps": 38150, "loss": 0.1174, "lr": 2.576980181162278e-06, "epoch": 8.688073394495413, "percentage": 86.88, "elapsed_time": "0:54:53", "remaining_time": "0:08:17", "throughput": 2698.79, "total_tokens": 8887616}
6647
+ {"current_steps": 33150, "total_steps": 38150, "loss": 0.1354, "lr": 2.571925061939681e-06, "epoch": 8.689384010484927, "percentage": 86.89, "elapsed_time": "0:54:53", "remaining_time": "0:08:16", "throughput": 2698.88, "total_tokens": 8889232}
6648
+ {"current_steps": 33155, "total_steps": 38150, "loss": 0.1777, "lr": 2.566874636874414e-06, "epoch": 8.690694626474443, "percentage": 86.91, "elapsed_time": "0:54:54", "remaining_time": "0:08:16", "throughput": 2698.9, "total_tokens": 8890432}
6649
+ {"current_steps": 33160, "total_steps": 38150, "loss": 0.2335, "lr": 2.561828907023525e-06, "epoch": 8.692005242463958, "percentage": 86.92, "elapsed_time": "0:54:54", "remaining_time": "0:08:15", "throughput": 2699.02, "total_tokens": 8892160}
6650
+ {"current_steps": 33165, "total_steps": 38150, "loss": 0.1453, "lr": 2.556787873443081e-06, "epoch": 8.693315858453474, "percentage": 86.93, "elapsed_time": "0:54:55", "remaining_time": "0:08:15", "throughput": 2699.04, "total_tokens": 8893360}
6651
+ {"current_steps": 33170, "total_steps": 38150, "loss": 0.108, "lr": 2.5517515371881516e-06, "epoch": 8.694626474442988, "percentage": 86.95, "elapsed_time": "0:54:55", "remaining_time": "0:08:14", "throughput": 2699.17, "total_tokens": 8895104}
6652
+ {"current_steps": 33175, "total_steps": 38150, "loss": 0.0837, "lr": 2.5467198993128355e-06, "epoch": 8.695937090432503, "percentage": 86.96, "elapsed_time": "0:54:55", "remaining_time": "0:08:14", "throughput": 2699.19, "total_tokens": 8896304}
6653
+ {"current_steps": 33180, "total_steps": 38150, "loss": 0.2027, "lr": 2.541692960870251e-06, "epoch": 8.697247706422019, "percentage": 86.97, "elapsed_time": "0:54:56", "remaining_time": "0:08:13", "throughput": 2699.19, "total_tokens": 8897440}
6654
+ {"current_steps": 33185, "total_steps": 38150, "loss": 0.13, "lr": 2.536670722912518e-06, "epoch": 8.698558322411534, "percentage": 86.99, "elapsed_time": "0:54:56", "remaining_time": "0:08:13", "throughput": 2699.21, "total_tokens": 8898736}
6655
+ {"current_steps": 33190, "total_steps": 38150, "loss": 0.0772, "lr": 2.5316531864907955e-06, "epoch": 8.699868938401048, "percentage": 87.0, "elapsed_time": "0:54:57", "remaining_time": "0:08:12", "throughput": 2699.15, "total_tokens": 8899632}
6656
+ {"current_steps": 33195, "total_steps": 38150, "loss": 0.1346, "lr": 2.526640352655238e-06, "epoch": 8.701179554390563, "percentage": 87.01, "elapsed_time": "0:54:57", "remaining_time": "0:08:12", "throughput": 2699.16, "total_tokens": 8900848}
6657
+ {"current_steps": 33200, "total_steps": 38150, "loss": 0.074, "lr": 2.5216322224550226e-06, "epoch": 8.702490170380079, "percentage": 87.02, "elapsed_time": "0:54:58", "remaining_time": "0:08:11", "throughput": 2699.12, "total_tokens": 8901808}
6658
+ {"current_steps": 33205, "total_steps": 38150, "loss": 0.135, "lr": 2.51662879693835e-06, "epoch": 8.703800786369595, "percentage": 87.04, "elapsed_time": "0:54:58", "remaining_time": "0:08:11", "throughput": 2699.12, "total_tokens": 8903008}
6659
+ {"current_steps": 33210, "total_steps": 38150, "loss": 0.1774, "lr": 2.511630077152416e-06, "epoch": 8.705111402359108, "percentage": 87.05, "elapsed_time": "0:54:58", "remaining_time": "0:08:10", "throughput": 2699.13, "total_tokens": 8904192}
6660
+ {"current_steps": 33215, "total_steps": 38150, "loss": 0.2049, "lr": 2.50663606414345e-06, "epoch": 8.706422018348624, "percentage": 87.06, "elapsed_time": "0:54:59", "remaining_time": "0:08:10", "throughput": 2699.2, "total_tokens": 8905696}
6661
+ {"current_steps": 33220, "total_steps": 38150, "loss": 0.2952, "lr": 2.501646758956691e-06, "epoch": 8.70773263433814, "percentage": 87.08, "elapsed_time": "0:54:59", "remaining_time": "0:08:09", "throughput": 2699.33, "total_tokens": 8907440}
6662
+ {"current_steps": 33225, "total_steps": 38150, "loss": 0.1935, "lr": 2.49666216263639e-06, "epoch": 8.709043250327653, "percentage": 87.09, "elapsed_time": "0:55:00", "remaining_time": "0:08:09", "throughput": 2699.32, "total_tokens": 8908544}
6663
+ {"current_steps": 33230, "total_steps": 38150, "loss": 0.0888, "lr": 2.491682276225818e-06, "epoch": 8.710353866317169, "percentage": 87.1, "elapsed_time": "0:55:00", "remaining_time": "0:08:08", "throughput": 2699.34, "total_tokens": 8909744}
6664
+ {"current_steps": 33235, "total_steps": 38150, "loss": 0.1929, "lr": 2.486707100767252e-06, "epoch": 8.711664482306684, "percentage": 87.12, "elapsed_time": "0:55:01", "remaining_time": "0:08:08", "throughput": 2699.38, "total_tokens": 8911040}
6665
+ {"current_steps": 33240, "total_steps": 38150, "loss": 0.1167, "lr": 2.4817366373019902e-06, "epoch": 8.7129750982962, "percentage": 87.13, "elapsed_time": "0:55:01", "remaining_time": "0:08:07", "throughput": 2699.35, "total_tokens": 8912144}
6666
+ {"current_steps": 33245, "total_steps": 38150, "loss": 0.1588, "lr": 2.4767708868703414e-06, "epoch": 8.714285714285714, "percentage": 87.14, "elapsed_time": "0:55:02", "remaining_time": "0:08:07", "throughput": 2699.35, "total_tokens": 8913296}
6667
+ {"current_steps": 33250, "total_steps": 38150, "loss": 0.1208, "lr": 2.4718098505116305e-06, "epoch": 8.715596330275229, "percentage": 87.16, "elapsed_time": "0:55:02", "remaining_time": "0:08:06", "throughput": 2699.37, "total_tokens": 8914464}
6668
+ {"current_steps": 33255, "total_steps": 38150, "loss": 0.2307, "lr": 2.4668535292641904e-06, "epoch": 8.716906946264745, "percentage": 87.17, "elapsed_time": "0:55:02", "remaining_time": "0:08:06", "throughput": 2699.47, "total_tokens": 8916016}
6669
+ {"current_steps": 33260, "total_steps": 38150, "loss": 0.1432, "lr": 2.4619019241653716e-06, "epoch": 8.71821756225426, "percentage": 87.18, "elapsed_time": "0:55:03", "remaining_time": "0:08:05", "throughput": 2699.52, "total_tokens": 8917328}
6670
+ {"current_steps": 33265, "total_steps": 38150, "loss": 0.1024, "lr": 2.4569550362515403e-06, "epoch": 8.719528178243774, "percentage": 87.2, "elapsed_time": "0:55:03", "remaining_time": "0:08:05", "throughput": 2699.65, "total_tokens": 8919232}
6671
+ {"current_steps": 33270, "total_steps": 38150, "loss": 0.1271, "lr": 2.452012866558065e-06, "epoch": 8.72083879423329, "percentage": 87.21, "elapsed_time": "0:55:04", "remaining_time": "0:08:04", "throughput": 2699.68, "total_tokens": 8920512}
6672
+ {"current_steps": 33275, "total_steps": 38150, "loss": 0.0904, "lr": 2.4470754161193406e-06, "epoch": 8.722149410222805, "percentage": 87.22, "elapsed_time": "0:55:04", "remaining_time": "0:08:04", "throughput": 2699.69, "total_tokens": 8921680}
6673
+ {"current_steps": 33280, "total_steps": 38150, "loss": 0.2847, "lr": 2.4421426859687597e-06, "epoch": 8.72346002621232, "percentage": 87.23, "elapsed_time": "0:55:05", "remaining_time": "0:08:03", "throughput": 2699.73, "total_tokens": 8923056}
6674
+ {"current_steps": 33285, "total_steps": 38150, "loss": 0.0744, "lr": 2.437214677138744e-06, "epoch": 8.724770642201834, "percentage": 87.25, "elapsed_time": "0:55:05", "remaining_time": "0:08:03", "throughput": 2699.67, "total_tokens": 8923968}
6675
+ {"current_steps": 33290, "total_steps": 38150, "loss": 0.1474, "lr": 2.432291390660707e-06, "epoch": 8.72608125819135, "percentage": 87.26, "elapsed_time": "0:55:06", "remaining_time": "0:08:02", "throughput": 2699.71, "total_tokens": 8925248}
6676
+ {"current_steps": 33295, "total_steps": 38150, "loss": 0.1371, "lr": 2.427372827565086e-06, "epoch": 8.727391874180865, "percentage": 87.27, "elapsed_time": "0:55:06", "remaining_time": "0:08:02", "throughput": 2699.69, "total_tokens": 8926320}
6677
+ {"current_steps": 33300, "total_steps": 38150, "loss": 0.1517, "lr": 2.4224589888813263e-06, "epoch": 8.728702490170381, "percentage": 87.29, "elapsed_time": "0:55:06", "remaining_time": "0:08:01", "throughput": 2699.75, "total_tokens": 8927840}
6678
+ {"current_steps": 33305, "total_steps": 38150, "loss": 0.2137, "lr": 2.4175498756378924e-06, "epoch": 8.730013106159895, "percentage": 87.3, "elapsed_time": "0:55:07", "remaining_time": "0:08:01", "throughput": 2699.75, "total_tokens": 8929024}
6679
+ {"current_steps": 33310, "total_steps": 38150, "loss": 0.1931, "lr": 2.41264548886225e-06, "epoch": 8.73132372214941, "percentage": 87.31, "elapsed_time": "0:55:07", "remaining_time": "0:08:00", "throughput": 2699.77, "total_tokens": 8930240}
6680
+ {"current_steps": 33315, "total_steps": 38150, "loss": 0.0469, "lr": 2.407745829580882e-06, "epoch": 8.732634338138926, "percentage": 87.33, "elapsed_time": "0:55:08", "remaining_time": "0:08:00", "throughput": 2699.75, "total_tokens": 8931280}
6681
+ {"current_steps": 33320, "total_steps": 38150, "loss": 0.1634, "lr": 2.402850898819278e-06, "epoch": 8.73394495412844, "percentage": 87.34, "elapsed_time": "0:55:08", "remaining_time": "0:07:59", "throughput": 2699.77, "total_tokens": 8932480}
6682
+ {"current_steps": 33325, "total_steps": 38150, "loss": 0.2779, "lr": 2.397960697601931e-06, "epoch": 8.735255570117955, "percentage": 87.35, "elapsed_time": "0:55:09", "remaining_time": "0:07:59", "throughput": 2699.86, "total_tokens": 8934048}
6683
+ {"current_steps": 33330, "total_steps": 38150, "loss": 0.2419, "lr": 2.393075226952357e-06, "epoch": 8.73656618610747, "percentage": 87.37, "elapsed_time": "0:55:09", "remaining_time": "0:07:58", "throughput": 2699.95, "total_tokens": 8935520}
6684
+ {"current_steps": 33335, "total_steps": 38150, "loss": 0.1892, "lr": 2.388194487893078e-06, "epoch": 8.737876802096986, "percentage": 87.38, "elapsed_time": "0:55:09", "remaining_time": "0:07:58", "throughput": 2700.08, "total_tokens": 8937248}
6685
+ {"current_steps": 33340, "total_steps": 38150, "loss": 0.1633, "lr": 2.3833184814456204e-06, "epoch": 8.7391874180865, "percentage": 87.39, "elapsed_time": "0:55:10", "remaining_time": "0:07:57", "throughput": 2700.13, "total_tokens": 8938688}
6686
+ {"current_steps": 33345, "total_steps": 38150, "loss": 0.2077, "lr": 2.378447208630527e-06, "epoch": 8.740498034076015, "percentage": 87.4, "elapsed_time": "0:55:10", "remaining_time": "0:07:57", "throughput": 2700.2, "total_tokens": 8940176}
6687
+ {"current_steps": 33350, "total_steps": 38150, "loss": 0.1985, "lr": 2.373580670467343e-06, "epoch": 8.741808650065531, "percentage": 87.42, "elapsed_time": "0:55:11", "remaining_time": "0:07:56", "throughput": 2700.27, "total_tokens": 8941568}
6688
+ {"current_steps": 33355, "total_steps": 38150, "loss": 0.2417, "lr": 2.3687188679746315e-06, "epoch": 8.743119266055047, "percentage": 87.43, "elapsed_time": "0:55:11", "remaining_time": "0:07:56", "throughput": 2700.38, "total_tokens": 8943376}
6689
+ {"current_steps": 33360, "total_steps": 38150, "loss": 0.1149, "lr": 2.363861802169959e-06, "epoch": 8.74442988204456, "percentage": 87.44, "elapsed_time": "0:55:12", "remaining_time": "0:07:55", "throughput": 2700.44, "total_tokens": 8944832}
6690
+ {"current_steps": 33365, "total_steps": 38150, "loss": 0.204, "lr": 2.3590094740698955e-06, "epoch": 8.745740498034076, "percentage": 87.46, "elapsed_time": "0:55:12", "remaining_time": "0:07:55", "throughput": 2700.52, "total_tokens": 8946368}
6691
+ {"current_steps": 33370, "total_steps": 38150, "loss": 0.1251, "lr": 2.3541618846900316e-06, "epoch": 8.747051114023591, "percentage": 87.47, "elapsed_time": "0:55:13", "remaining_time": "0:07:54", "throughput": 2700.6, "total_tokens": 8947904}
6692
+ {"current_steps": 33375, "total_steps": 38150, "loss": 0.2063, "lr": 2.3493190350449557e-06, "epoch": 8.748361730013107, "percentage": 87.48, "elapsed_time": "0:55:13", "remaining_time": "0:07:54", "throughput": 2700.63, "total_tokens": 8949168}
6693
+ {"current_steps": 33380, "total_steps": 38150, "loss": 0.2173, "lr": 2.3444809261482653e-06, "epoch": 8.74967234600262, "percentage": 87.5, "elapsed_time": "0:55:14", "remaining_time": "0:07:53", "throughput": 2700.75, "total_tokens": 8950880}
6694
+ {"current_steps": 33385, "total_steps": 38150, "loss": 0.2304, "lr": 2.3396475590125753e-06, "epoch": 8.750982961992136, "percentage": 87.51, "elapsed_time": "0:55:14", "remaining_time": "0:07:53", "throughput": 2700.88, "total_tokens": 8952688}
6695
+ {"current_steps": 33390, "total_steps": 38150, "loss": 0.2136, "lr": 2.334818934649494e-06, "epoch": 8.752293577981652, "percentage": 87.52, "elapsed_time": "0:55:15", "remaining_time": "0:07:52", "throughput": 2700.97, "total_tokens": 8954448}
6696
+ {"current_steps": 33395, "total_steps": 38150, "loss": 0.1493, "lr": 2.3299950540696504e-06, "epoch": 8.753604193971167, "percentage": 87.54, "elapsed_time": "0:55:15", "remaining_time": "0:07:52", "throughput": 2701.04, "total_tokens": 8955872}
6697
+ {"current_steps": 33400, "total_steps": 38150, "loss": 0.2021, "lr": 2.325175918282674e-06, "epoch": 8.754914809960681, "percentage": 87.55, "elapsed_time": "0:55:16", "remaining_time": "0:07:51", "throughput": 2701.18, "total_tokens": 8957840}
6698
+ {"current_steps": 33405, "total_steps": 38150, "loss": 0.1984, "lr": 2.3203615282971934e-06, "epoch": 8.756225425950197, "percentage": 87.56, "elapsed_time": "0:55:16", "remaining_time": "0:07:51", "throughput": 2701.23, "total_tokens": 8959216}
6699
+ {"current_steps": 33410, "total_steps": 38150, "loss": 0.1406, "lr": 2.315551885120859e-06, "epoch": 8.757536041939712, "percentage": 87.58, "elapsed_time": "0:55:17", "remaining_time": "0:07:50", "throughput": 2701.26, "total_tokens": 8960464}
6700
+ {"current_steps": 33415, "total_steps": 38150, "loss": 0.1965, "lr": 2.3107469897603152e-06, "epoch": 8.758846657929226, "percentage": 87.59, "elapsed_time": "0:55:17", "remaining_time": "0:07:50", "throughput": 2701.24, "total_tokens": 8961536}
6701
+ {"current_steps": 33420, "total_steps": 38150, "loss": 0.0799, "lr": 2.305946843221224e-06, "epoch": 8.760157273918741, "percentage": 87.6, "elapsed_time": "0:55:17", "remaining_time": "0:07:49", "throughput": 2701.26, "total_tokens": 8962720}
6702
+ {"current_steps": 33425, "total_steps": 38150, "loss": 0.0935, "lr": 2.3011514465082485e-06, "epoch": 8.761467889908257, "percentage": 87.61, "elapsed_time": "0:55:18", "remaining_time": "0:07:49", "throughput": 2701.27, "total_tokens": 8963888}
6703
+ {"current_steps": 33430, "total_steps": 38150, "loss": 0.064, "lr": 2.2963608006250555e-06, "epoch": 8.762778505897773, "percentage": 87.63, "elapsed_time": "0:55:18", "remaining_time": "0:07:48", "throughput": 2701.27, "total_tokens": 8965008}
6704
+ {"current_steps": 33435, "total_steps": 38150, "loss": 0.1761, "lr": 2.2915749065743176e-06, "epoch": 8.764089121887286, "percentage": 87.64, "elapsed_time": "0:55:19", "remaining_time": "0:07:48", "throughput": 2701.3, "total_tokens": 8966432}
6705
+ {"current_steps": 33440, "total_steps": 38150, "loss": 0.2096, "lr": 2.2867937653577173e-06, "epoch": 8.765399737876802, "percentage": 87.65, "elapsed_time": "0:55:19", "remaining_time": "0:07:47", "throughput": 2701.44, "total_tokens": 8968304}
6706
+ {"current_steps": 33445, "total_steps": 38150, "loss": 0.1881, "lr": 2.282017377975934e-06, "epoch": 8.766710353866317, "percentage": 87.67, "elapsed_time": "0:55:20", "remaining_time": "0:07:47", "throughput": 2701.47, "total_tokens": 8969600}
6707
+ {"current_steps": 33450, "total_steps": 38150, "loss": 0.1641, "lr": 2.2772457454286578e-06, "epoch": 8.768020969855833, "percentage": 87.68, "elapsed_time": "0:55:20", "remaining_time": "0:07:46", "throughput": 2701.44, "total_tokens": 8970640}
6708
+ {"current_steps": 33455, "total_steps": 38150, "loss": 0.1593, "lr": 2.2724788687145865e-06, "epoch": 8.769331585845347, "percentage": 87.69, "elapsed_time": "0:55:21", "remaining_time": "0:07:46", "throughput": 2701.45, "total_tokens": 8971776}
6709
+ {"current_steps": 33460, "total_steps": 38150, "loss": 0.1491, "lr": 2.2677167488314193e-06, "epoch": 8.770642201834862, "percentage": 87.71, "elapsed_time": "0:55:21", "remaining_time": "0:07:45", "throughput": 2701.48, "total_tokens": 8973136}
6710
+ {"current_steps": 33465, "total_steps": 38150, "loss": 0.1991, "lr": 2.2629593867758564e-06, "epoch": 8.771952817824378, "percentage": 87.72, "elapsed_time": "0:55:22", "remaining_time": "0:07:45", "throughput": 2701.54, "total_tokens": 8974544}
6711
+ {"current_steps": 33470, "total_steps": 38150, "loss": 0.1342, "lr": 2.258206783543601e-06, "epoch": 8.773263433813893, "percentage": 87.73, "elapsed_time": "0:55:22", "remaining_time": "0:07:44", "throughput": 2701.53, "total_tokens": 8975632}
6712
+ {"current_steps": 33475, "total_steps": 38150, "loss": 0.1571, "lr": 2.253458940129383e-06, "epoch": 8.774574049803407, "percentage": 87.75, "elapsed_time": "0:55:22", "remaining_time": "0:07:44", "throughput": 2701.55, "total_tokens": 8976800}
6713
+ {"current_steps": 33480, "total_steps": 38150, "loss": 0.0628, "lr": 2.2487158575269006e-06, "epoch": 8.775884665792923, "percentage": 87.76, "elapsed_time": "0:55:23", "remaining_time": "0:07:43", "throughput": 2701.55, "total_tokens": 8977952}
6714
+ {"current_steps": 33485, "total_steps": 38150, "loss": 0.1207, "lr": 2.243977536728878e-06, "epoch": 8.777195281782438, "percentage": 87.77, "elapsed_time": "0:55:23", "remaining_time": "0:07:43", "throughput": 2701.62, "total_tokens": 8979392}
6715
+ {"current_steps": 33490, "total_steps": 38150, "loss": 0.1058, "lr": 2.2392439787270373e-06, "epoch": 8.778505897771954, "percentage": 87.79, "elapsed_time": "0:55:24", "remaining_time": "0:07:42", "throughput": 2701.59, "total_tokens": 8980416}
6716
+ {"current_steps": 33495, "total_steps": 38150, "loss": 0.1357, "lr": 2.234515184512104e-06, "epoch": 8.779816513761467, "percentage": 87.8, "elapsed_time": "0:55:24", "remaining_time": "0:07:42", "throughput": 2701.65, "total_tokens": 8981904}
6717
+ {"current_steps": 33500, "total_steps": 38150, "loss": 0.1357, "lr": 2.229791155073807e-06, "epoch": 8.781127129750983, "percentage": 87.81, "elapsed_time": "0:55:25", "remaining_time": "0:07:41", "throughput": 2701.79, "total_tokens": 8983696}
6718
+ {"current_steps": 33505, "total_steps": 38150, "loss": 0.1329, "lr": 2.225071891400879e-06, "epoch": 8.782437745740499, "percentage": 87.82, "elapsed_time": "0:55:25", "remaining_time": "0:07:41", "throughput": 2701.77, "total_tokens": 8984752}
6719
+ {"current_steps": 33510, "total_steps": 38150, "loss": 0.1029, "lr": 2.2203573944810514e-06, "epoch": 8.783748361730012, "percentage": 87.84, "elapsed_time": "0:55:25", "remaining_time": "0:07:40", "throughput": 2701.8, "total_tokens": 8985984}
6720
+ {"current_steps": 33515, "total_steps": 38150, "loss": 0.1477, "lr": 2.21564766530106e-06, "epoch": 8.785058977719528, "percentage": 87.85, "elapsed_time": "0:55:26", "remaining_time": "0:07:40", "throughput": 2701.82, "total_tokens": 8987232}
6721
+ {"current_steps": 33520, "total_steps": 38150, "loss": 0.1521, "lr": 2.210942704846647e-06, "epoch": 8.786369593709043, "percentage": 87.86, "elapsed_time": "0:55:26", "remaining_time": "0:07:39", "throughput": 2701.92, "total_tokens": 8988912}
6722
+ {"current_steps": 33525, "total_steps": 38150, "loss": 0.1217, "lr": 2.2062425141025465e-06, "epoch": 8.787680209698559, "percentage": 87.88, "elapsed_time": "0:55:27", "remaining_time": "0:07:39", "throughput": 2701.94, "total_tokens": 8990144}
6723
+ {"current_steps": 33530, "total_steps": 38150, "loss": 0.2287, "lr": 2.2015470940525003e-06, "epoch": 8.788990825688073, "percentage": 87.89, "elapsed_time": "0:55:27", "remaining_time": "0:07:38", "throughput": 2701.99, "total_tokens": 8991568}
6724
+ {"current_steps": 33535, "total_steps": 38150, "loss": 0.2095, "lr": 2.1968564456792573e-06, "epoch": 8.790301441677588, "percentage": 87.9, "elapsed_time": "0:55:28", "remaining_time": "0:07:38", "throughput": 2702.06, "total_tokens": 8993072}
6725
+ {"current_steps": 33540, "total_steps": 38150, "loss": 0.1485, "lr": 2.1921705699645607e-06, "epoch": 8.791612057667104, "percentage": 87.92, "elapsed_time": "0:55:28", "remaining_time": "0:07:37", "throughput": 2702.1, "total_tokens": 8994336}
6726
+ {"current_steps": 33545, "total_steps": 38150, "loss": 0.1305, "lr": 2.187489467889153e-06, "epoch": 8.79292267365662, "percentage": 87.93, "elapsed_time": "0:55:29", "remaining_time": "0:07:37", "throughput": 2702.14, "total_tokens": 8995632}
6727
+ {"current_steps": 33550, "total_steps": 38150, "loss": 0.0891, "lr": 2.182813140432785e-06, "epoch": 8.794233289646133, "percentage": 87.94, "elapsed_time": "0:55:29", "remaining_time": "0:07:36", "throughput": 2702.13, "total_tokens": 8996704}
6728
+ {"current_steps": 33555, "total_steps": 38150, "loss": 0.146, "lr": 2.1781415885742037e-06, "epoch": 8.795543905635649, "percentage": 87.96, "elapsed_time": "0:55:30", "remaining_time": "0:07:36", "throughput": 2702.27, "total_tokens": 8998608}
6729
+ {"current_steps": 33560, "total_steps": 38150, "loss": 0.1566, "lr": 2.1734748132911515e-06, "epoch": 8.796854521625164, "percentage": 87.97, "elapsed_time": "0:55:30", "remaining_time": "0:07:35", "throughput": 2702.32, "total_tokens": 8999952}
6730
+ {"current_steps": 33565, "total_steps": 38150, "loss": 0.1084, "lr": 2.1688128155603783e-06, "epoch": 8.79816513761468, "percentage": 87.98, "elapsed_time": "0:55:30", "remaining_time": "0:07:35", "throughput": 2702.35, "total_tokens": 9001312}
6731
+ {"current_steps": 33570, "total_steps": 38150, "loss": 0.0928, "lr": 2.1641555963576366e-06, "epoch": 8.799475753604193, "percentage": 87.99, "elapsed_time": "0:55:31", "remaining_time": "0:07:34", "throughput": 2702.38, "total_tokens": 9002528}
6732
+ {"current_steps": 33575, "total_steps": 38150, "loss": 0.1089, "lr": 2.159503156657669e-06, "epoch": 8.800786369593709, "percentage": 88.01, "elapsed_time": "0:55:31", "remaining_time": "0:07:33", "throughput": 2702.39, "total_tokens": 9003776}
6733
+ {"current_steps": 33580, "total_steps": 38150, "loss": 0.1011, "lr": 2.1548554974342244e-06, "epoch": 8.802096985583225, "percentage": 88.02, "elapsed_time": "0:55:32", "remaining_time": "0:07:33", "throughput": 2702.36, "total_tokens": 9004768}
6734
+ {"current_steps": 33585, "total_steps": 38150, "loss": 0.212, "lr": 2.15021261966005e-06, "epoch": 8.80340760157274, "percentage": 88.03, "elapsed_time": "0:55:32", "remaining_time": "0:07:32", "throughput": 2702.38, "total_tokens": 9005984}
6735
+ {"current_steps": 33590, "total_steps": 38150, "loss": 0.1514, "lr": 2.145574524306898e-06, "epoch": 8.804718217562254, "percentage": 88.05, "elapsed_time": "0:55:33", "remaining_time": "0:07:32", "throughput": 2702.4, "total_tokens": 9007200}
6736
+ {"current_steps": 33595, "total_steps": 38150, "loss": 0.1634, "lr": 2.140941212345507e-06, "epoch": 8.80602883355177, "percentage": 88.06, "elapsed_time": "0:55:33", "remaining_time": "0:07:31", "throughput": 2702.54, "total_tokens": 9009152}
6737
+ {"current_steps": 33600, "total_steps": 38150, "loss": 0.0768, "lr": 2.136312684745623e-06, "epoch": 8.807339449541285, "percentage": 88.07, "elapsed_time": "0:55:34", "remaining_time": "0:07:31", "throughput": 2702.77, "total_tokens": 9011744}
6738
+ {"current_steps": 33605, "total_steps": 38150, "loss": 0.1727, "lr": 2.1316889424759874e-06, "epoch": 8.808650065530799, "percentage": 88.09, "elapsed_time": "0:55:34", "remaining_time": "0:07:31", "throughput": 2702.86, "total_tokens": 9013424}
6739
+ {"current_steps": 33610, "total_steps": 38150, "loss": 0.1564, "lr": 2.1270699865043405e-06, "epoch": 8.809960681520314, "percentage": 88.1, "elapsed_time": "0:55:35", "remaining_time": "0:07:30", "throughput": 2702.82, "total_tokens": 9014384}
6740
+ {"current_steps": 33615, "total_steps": 38150, "loss": 0.2011, "lr": 2.122455817797428e-06, "epoch": 8.81127129750983, "percentage": 88.11, "elapsed_time": "0:55:35", "remaining_time": "0:07:30", "throughput": 2702.93, "total_tokens": 9015936}
6741
+ {"current_steps": 33620, "total_steps": 38150, "loss": 0.1366, "lr": 2.1178464373209807e-06, "epoch": 8.812581913499345, "percentage": 88.13, "elapsed_time": "0:55:36", "remaining_time": "0:07:29", "throughput": 2702.92, "total_tokens": 9017040}
6742
+ {"current_steps": 33625, "total_steps": 38150, "loss": 0.3266, "lr": 2.1132418460397376e-06, "epoch": 8.813892529488859, "percentage": 88.14, "elapsed_time": "0:55:36", "remaining_time": "0:07:29", "throughput": 2702.97, "total_tokens": 9018464}
6743
+ {"current_steps": 33630, "total_steps": 38150, "loss": 0.1096, "lr": 2.1086420449174303e-06, "epoch": 8.815203145478375, "percentage": 88.15, "elapsed_time": "0:55:36", "remaining_time": "0:07:28", "throughput": 2702.99, "total_tokens": 9019648}
6744
+ {"current_steps": 33635, "total_steps": 38150, "loss": 0.1754, "lr": 2.1040470349167923e-06, "epoch": 8.81651376146789, "percentage": 88.17, "elapsed_time": "0:55:37", "remaining_time": "0:07:27", "throughput": 2703.09, "total_tokens": 9021360}
6745
+ {"current_steps": 33640, "total_steps": 38150, "loss": 0.1223, "lr": 2.0994568169995455e-06, "epoch": 8.817824377457406, "percentage": 88.18, "elapsed_time": "0:55:37", "remaining_time": "0:07:27", "throughput": 2703.1, "total_tokens": 9022496}
6746
+ {"current_steps": 33645, "total_steps": 38150, "loss": 0.1329, "lr": 2.0948713921264214e-06, "epoch": 8.81913499344692, "percentage": 88.19, "elapsed_time": "0:55:38", "remaining_time": "0:07:26", "throughput": 2703.14, "total_tokens": 9023792}
6747
+ {"current_steps": 33650, "total_steps": 38150, "loss": 0.1012, "lr": 2.0902907612571355e-06, "epoch": 8.820445609436435, "percentage": 88.2, "elapsed_time": "0:55:38", "remaining_time": "0:07:26", "throughput": 2703.15, "total_tokens": 9024928}
6748
+ {"current_steps": 33655, "total_steps": 38150, "loss": 0.2293, "lr": 2.085714925350407e-06, "epoch": 8.82175622542595, "percentage": 88.22, "elapsed_time": "0:55:39", "remaining_time": "0:07:25", "throughput": 2703.19, "total_tokens": 9026256}
6749
+ {"current_steps": 33660, "total_steps": 38150, "loss": 0.2507, "lr": 2.0811438853639537e-06, "epoch": 8.823066841415466, "percentage": 88.23, "elapsed_time": "0:55:39", "remaining_time": "0:07:25", "throughput": 2703.28, "total_tokens": 9027904}
6750
+ {"current_steps": 33665, "total_steps": 38150, "loss": 0.203, "lr": 2.076577642254485e-06, "epoch": 8.82437745740498, "percentage": 88.24, "elapsed_time": "0:55:40", "remaining_time": "0:07:24", "throughput": 2703.32, "total_tokens": 9029168}
6751
+ {"current_steps": 33670, "total_steps": 38150, "loss": 0.1707, "lr": 2.072016196977708e-06, "epoch": 8.825688073394495, "percentage": 88.26, "elapsed_time": "0:55:40", "remaining_time": "0:07:24", "throughput": 2703.33, "total_tokens": 9030336}
6752
+ {"current_steps": 33675, "total_steps": 38150, "loss": 0.1469, "lr": 2.0674595504883265e-06, "epoch": 8.82699868938401, "percentage": 88.27, "elapsed_time": "0:55:40", "remaining_time": "0:07:23", "throughput": 2703.33, "total_tokens": 9031488}
6753
+ {"current_steps": 33680, "total_steps": 38150, "loss": 0.1293, "lr": 2.062907703740033e-06, "epoch": 8.828309305373526, "percentage": 88.28, "elapsed_time": "0:55:41", "remaining_time": "0:07:23", "throughput": 2703.44, "total_tokens": 9033152}
6754
+ {"current_steps": 33685, "total_steps": 38150, "loss": 0.1275, "lr": 2.0583606576855236e-06, "epoch": 8.82961992136304, "percentage": 88.3, "elapsed_time": "0:55:41", "remaining_time": "0:07:22", "throughput": 2703.45, "total_tokens": 9034352}
6755
+ {"current_steps": 33690, "total_steps": 38150, "loss": 0.1684, "lr": 2.0538184132764897e-06, "epoch": 8.830930537352556, "percentage": 88.31, "elapsed_time": "0:55:42", "remaining_time": "0:07:22", "throughput": 2703.49, "total_tokens": 9035712}
6756
+ {"current_steps": 33695, "total_steps": 38150, "loss": 0.1374, "lr": 2.0492809714636156e-06, "epoch": 8.832241153342071, "percentage": 88.32, "elapsed_time": "0:55:42", "remaining_time": "0:07:21", "throughput": 2703.47, "total_tokens": 9036784}
6757
+ {"current_steps": 33700, "total_steps": 38150, "loss": 0.1189, "lr": 2.044748333196572e-06, "epoch": 8.833551769331585, "percentage": 88.34, "elapsed_time": "0:55:43", "remaining_time": "0:07:21", "throughput": 2703.56, "total_tokens": 9038256}
6758
+ {"current_steps": 33705, "total_steps": 38150, "loss": 0.1156, "lr": 2.040220499424042e-06, "epoch": 8.8348623853211, "percentage": 88.35, "elapsed_time": "0:55:43", "remaining_time": "0:07:20", "throughput": 2703.62, "total_tokens": 9039712}
6759
+ {"current_steps": 33710, "total_steps": 38150, "loss": 0.1614, "lr": 2.0356974710936976e-06, "epoch": 8.836173001310616, "percentage": 88.36, "elapsed_time": "0:55:44", "remaining_time": "0:07:20", "throughput": 2703.72, "total_tokens": 9041440}
6760
+ {"current_steps": 33715, "total_steps": 38150, "loss": 0.0998, "lr": 2.0311792491521874e-06, "epoch": 8.837483617300132, "percentage": 88.37, "elapsed_time": "0:55:44", "remaining_time": "0:07:19", "throughput": 2703.69, "total_tokens": 9042448}
6761
+ {"current_steps": 33720, "total_steps": 38150, "loss": 0.1645, "lr": 2.0266658345451715e-06, "epoch": 8.838794233289645, "percentage": 88.39, "elapsed_time": "0:55:44", "remaining_time": "0:07:19", "throughput": 2703.73, "total_tokens": 9043824}
6762
+ {"current_steps": 33725, "total_steps": 38150, "loss": 0.1664, "lr": 2.022157228217303e-06, "epoch": 8.840104849279161, "percentage": 88.4, "elapsed_time": "0:55:45", "remaining_time": "0:07:18", "throughput": 2703.77, "total_tokens": 9045168}
6763
+ {"current_steps": 33730, "total_steps": 38150, "loss": 0.128, "lr": 2.0176534311122263e-06, "epoch": 8.841415465268676, "percentage": 88.41, "elapsed_time": "0:55:45", "remaining_time": "0:07:18", "throughput": 2703.78, "total_tokens": 9046368}
6764
+ {"current_steps": 33735, "total_steps": 38150, "loss": 0.2387, "lr": 2.0131544441725715e-06, "epoch": 8.842726081258192, "percentage": 88.43, "elapsed_time": "0:55:46", "remaining_time": "0:07:17", "throughput": 2703.77, "total_tokens": 9047520}
6765
+ {"current_steps": 33740, "total_steps": 38150, "loss": 0.1452, "lr": 2.0086602683399764e-06, "epoch": 8.844036697247706, "percentage": 88.44, "elapsed_time": "0:55:46", "remaining_time": "0:07:17", "throughput": 2703.83, "total_tokens": 9048960}
6766
+ {"current_steps": 33745, "total_steps": 38150, "loss": 0.0623, "lr": 2.0041709045550612e-06, "epoch": 8.845347313237221, "percentage": 88.45, "elapsed_time": "0:55:47", "remaining_time": "0:07:16", "throughput": 2703.74, "total_tokens": 9049744}
6767
+ {"current_steps": 33750, "total_steps": 38150, "loss": 0.2936, "lr": 1.9996863537574406e-06, "epoch": 8.846657929226737, "percentage": 88.47, "elapsed_time": "0:55:47", "remaining_time": "0:07:16", "throughput": 2703.78, "total_tokens": 9051040}
6768
+ {"current_steps": 33755, "total_steps": 38150, "loss": 0.0848, "lr": 1.9952066168857252e-06, "epoch": 8.847968545216252, "percentage": 88.48, "elapsed_time": "0:55:47", "remaining_time": "0:07:15", "throughput": 2703.71, "total_tokens": 9051920}
6769
+ {"current_steps": 33760, "total_steps": 38150, "loss": 0.4975, "lr": 1.990731694877515e-06, "epoch": 8.849279161205766, "percentage": 88.49, "elapsed_time": "0:55:48", "remaining_time": "0:07:15", "throughput": 2703.82, "total_tokens": 9053600}
6770
+ {"current_steps": 33765, "total_steps": 38150, "loss": 0.1857, "lr": 1.9862615886694054e-06, "epoch": 8.850589777195282, "percentage": 88.51, "elapsed_time": "0:55:48", "remaining_time": "0:07:14", "throughput": 2703.84, "total_tokens": 9054848}
6771
+ {"current_steps": 33770, "total_steps": 38150, "loss": 0.185, "lr": 1.9817962991969805e-06, "epoch": 8.851900393184797, "percentage": 88.52, "elapsed_time": "0:55:49", "remaining_time": "0:07:14", "throughput": 2703.85, "total_tokens": 9056032}
6772
+ {"current_steps": 33775, "total_steps": 38150, "loss": 0.1935, "lr": 1.977335827394816e-06, "epoch": 8.853211009174313, "percentage": 88.53, "elapsed_time": "0:55:49", "remaining_time": "0:07:13", "throughput": 2703.86, "total_tokens": 9057216}
6773
+ {"current_steps": 33780, "total_steps": 38150, "loss": 0.1577, "lr": 1.972880174196484e-06, "epoch": 8.854521625163827, "percentage": 88.55, "elapsed_time": "0:55:50", "remaining_time": "0:07:13", "throughput": 2703.9, "total_tokens": 9058528}
6774
+ {"current_steps": 33785, "total_steps": 38150, "loss": 0.1363, "lr": 1.9684293405345445e-06, "epoch": 8.855832241153342, "percentage": 88.56, "elapsed_time": "0:55:50", "remaining_time": "0:07:12", "throughput": 2703.88, "total_tokens": 9059568}
6775
+ {"current_steps": 33790, "total_steps": 38150, "loss": 0.2098, "lr": 1.9639833273405516e-06, "epoch": 8.857142857142858, "percentage": 88.57, "elapsed_time": "0:55:51", "remaining_time": "0:07:12", "throughput": 2703.93, "total_tokens": 9060960}
6776
+ {"current_steps": 33795, "total_steps": 38150, "loss": 0.1187, "lr": 1.959542135545042e-06, "epoch": 8.858453473132371, "percentage": 88.58, "elapsed_time": "0:55:51", "remaining_time": "0:07:11", "throughput": 2704.03, "total_tokens": 9062576}
6777
+ {"current_steps": 33800, "total_steps": 38150, "loss": 0.1352, "lr": 1.9551057660775527e-06, "epoch": 8.859764089121887, "percentage": 88.6, "elapsed_time": "0:55:51", "remaining_time": "0:07:11", "throughput": 2704.09, "total_tokens": 9064096}
6778
+ {"current_steps": 33805, "total_steps": 38150, "loss": 0.2148, "lr": 1.950674219866608e-06, "epoch": 8.861074705111402, "percentage": 88.61, "elapsed_time": "0:55:52", "remaining_time": "0:07:10", "throughput": 2704.15, "total_tokens": 9065552}
6779
+ {"current_steps": 33810, "total_steps": 38150, "loss": 0.1384, "lr": 1.9462474978397204e-06, "epoch": 8.862385321100918, "percentage": 88.62, "elapsed_time": "0:55:52", "remaining_time": "0:07:10", "throughput": 2704.21, "total_tokens": 9066992}
6780
+ {"current_steps": 33815, "total_steps": 38150, "loss": 0.1814, "lr": 1.941825600923397e-06, "epoch": 8.863695937090432, "percentage": 88.64, "elapsed_time": "0:55:53", "remaining_time": "0:07:09", "throughput": 2704.24, "total_tokens": 9068368}
6781
+ {"current_steps": 33820, "total_steps": 38150, "loss": 0.2899, "lr": 1.937408530043136e-06, "epoch": 8.865006553079947, "percentage": 88.65, "elapsed_time": "0:55:53", "remaining_time": "0:07:09", "throughput": 2704.3, "total_tokens": 9069840}
6782
+ {"current_steps": 33825, "total_steps": 38150, "loss": 0.2376, "lr": 1.9329962861234257e-06, "epoch": 8.866317169069463, "percentage": 88.66, "elapsed_time": "0:55:54", "remaining_time": "0:07:08", "throughput": 2704.35, "total_tokens": 9071248}
6783
+ {"current_steps": 33830, "total_steps": 38150, "loss": 0.1063, "lr": 1.9285888700877303e-06, "epoch": 8.867627785058978, "percentage": 88.68, "elapsed_time": "0:55:54", "remaining_time": "0:07:08", "throughput": 2704.42, "total_tokens": 9072800}
6784
+ {"current_steps": 33835, "total_steps": 38150, "loss": 0.1449, "lr": 1.92418628285852e-06, "epoch": 8.868938401048492, "percentage": 88.69, "elapsed_time": "0:55:55", "remaining_time": "0:07:07", "throughput": 2704.41, "total_tokens": 9073904}
6785
+ {"current_steps": 33840, "total_steps": 38150, "loss": 0.0604, "lr": 1.9197885253572497e-06, "epoch": 8.870249017038008, "percentage": 88.7, "elapsed_time": "0:55:55", "remaining_time": "0:07:07", "throughput": 2704.44, "total_tokens": 9075184}
6786
+ {"current_steps": 33845, "total_steps": 38150, "loss": 0.1262, "lr": 1.915395598504363e-06, "epoch": 8.871559633027523, "percentage": 88.72, "elapsed_time": "0:55:56", "remaining_time": "0:07:06", "throughput": 2704.47, "total_tokens": 9076464}
6787
+ {"current_steps": 33850, "total_steps": 38150, "loss": 0.2219, "lr": 1.9110075032192894e-06, "epoch": 8.872870249017039, "percentage": 88.73, "elapsed_time": "0:55:56", "remaining_time": "0:07:06", "throughput": 2704.47, "total_tokens": 9077680}
6788
+ {"current_steps": 33855, "total_steps": 38150, "loss": 0.0638, "lr": 1.9066242404204487e-06, "epoch": 8.874180865006553, "percentage": 88.74, "elapsed_time": "0:55:57", "remaining_time": "0:07:05", "throughput": 2704.49, "total_tokens": 9078976}
6789
+ {"current_steps": 33860, "total_steps": 38150, "loss": 0.1245, "lr": 1.902245811025255e-06, "epoch": 8.875491480996068, "percentage": 88.75, "elapsed_time": "0:55:57", "remaining_time": "0:07:05", "throughput": 2704.54, "total_tokens": 9080352}
6790
+ {"current_steps": 33865, "total_steps": 38150, "loss": 0.1482, "lr": 1.8978722159501029e-06, "epoch": 8.876802096985584, "percentage": 88.77, "elapsed_time": "0:55:57", "remaining_time": "0:07:04", "throughput": 2704.56, "total_tokens": 9081552}
6791
+ {"current_steps": 33870, "total_steps": 38150, "loss": 0.1298, "lr": 1.8935034561103764e-06, "epoch": 8.8781127129751, "percentage": 88.78, "elapsed_time": "0:55:58", "remaining_time": "0:07:04", "throughput": 2704.58, "total_tokens": 9082752}
6792
+ {"current_steps": 33875, "total_steps": 38150, "loss": 0.0865, "lr": 1.889139532420453e-06, "epoch": 8.879423328964613, "percentage": 88.79, "elapsed_time": "0:55:58", "remaining_time": "0:07:03", "throughput": 2704.6, "total_tokens": 9083936}
6793
+ {"current_steps": 33880, "total_steps": 38150, "loss": 0.0753, "lr": 1.8847804457936934e-06, "epoch": 8.880733944954128, "percentage": 88.81, "elapsed_time": "0:55:59", "remaining_time": "0:07:03", "throughput": 2704.64, "total_tokens": 9085264}
6794
+ {"current_steps": 33885, "total_steps": 38150, "loss": 0.047, "lr": 1.8804261971424458e-06, "epoch": 8.882044560943644, "percentage": 88.82, "elapsed_time": "0:55:59", "remaining_time": "0:07:02", "throughput": 2704.66, "total_tokens": 9086512}
6795
+ {"current_steps": 33890, "total_steps": 38150, "loss": 0.0705, "lr": 1.8760767873780476e-06, "epoch": 8.883355176933158, "percentage": 88.83, "elapsed_time": "0:55:59", "remaining_time": "0:07:02", "throughput": 2704.66, "total_tokens": 9087648}
6796
+ {"current_steps": 33895, "total_steps": 38150, "loss": 0.2254, "lr": 1.871732217410821e-06, "epoch": 8.884665792922673, "percentage": 88.85, "elapsed_time": "0:56:00", "remaining_time": "0:07:01", "throughput": 2704.73, "total_tokens": 9089120}
6797
+ {"current_steps": 33900, "total_steps": 38150, "loss": 0.072, "lr": 1.8673924881500826e-06, "epoch": 8.885976408912189, "percentage": 88.86, "elapsed_time": "0:56:00", "remaining_time": "0:07:01", "throughput": 2704.76, "total_tokens": 9090368}
6798
+ {"current_steps": 33905, "total_steps": 38150, "loss": 0.1165, "lr": 1.8630576005041228e-06, "epoch": 8.887287024901704, "percentage": 88.87, "elapsed_time": "0:56:01", "remaining_time": "0:07:00", "throughput": 2704.76, "total_tokens": 9091552}
6799
+ {"current_steps": 33910, "total_steps": 38150, "loss": 0.0996, "lr": 1.8587275553802352e-06, "epoch": 8.888597640891218, "percentage": 88.89, "elapsed_time": "0:56:01", "remaining_time": "0:07:00", "throughput": 2704.79, "total_tokens": 9092800}
6800
+ {"current_steps": 33915, "total_steps": 38150, "loss": 0.2382, "lr": 1.854402353684681e-06, "epoch": 8.889908256880734, "percentage": 88.9, "elapsed_time": "0:56:02", "remaining_time": "0:06:59", "throughput": 2704.9, "total_tokens": 9094512}
6801
+ {"current_steps": 33920, "total_steps": 38150, "loss": 0.1238, "lr": 1.8500819963227223e-06, "epoch": 8.89121887287025, "percentage": 88.91, "elapsed_time": "0:56:02", "remaining_time": "0:06:59", "throughput": 2704.89, "total_tokens": 9095776}
6802
+ {"current_steps": 33925, "total_steps": 38150, "loss": 0.1459, "lr": 1.8457664841986023e-06, "epoch": 8.892529488859765, "percentage": 88.93, "elapsed_time": "0:56:03", "remaining_time": "0:06:58", "throughput": 2704.89, "total_tokens": 9096992}
6803
+ {"current_steps": 33930, "total_steps": 38150, "loss": 0.0888, "lr": 1.8414558182155456e-06, "epoch": 8.893840104849279, "percentage": 88.94, "elapsed_time": "0:56:03", "remaining_time": "0:06:58", "throughput": 2704.9, "total_tokens": 9098160}
6804
+ {"current_steps": 33935, "total_steps": 38150, "loss": 0.1295, "lr": 1.8371499992757723e-06, "epoch": 8.895150720838794, "percentage": 88.95, "elapsed_time": "0:56:04", "remaining_time": "0:06:57", "throughput": 2705.04, "total_tokens": 9100016}
6805
+ {"current_steps": 33940, "total_steps": 38150, "loss": 0.0795, "lr": 1.832849028280484e-06, "epoch": 8.89646133682831, "percentage": 88.96, "elapsed_time": "0:56:04", "remaining_time": "0:06:57", "throughput": 2705.06, "total_tokens": 9101248}
6806
+ {"current_steps": 33945, "total_steps": 38150, "loss": 0.037, "lr": 1.8285529061298684e-06, "epoch": 8.897771952817825, "percentage": 88.98, "elapsed_time": "0:56:04", "remaining_time": "0:06:56", "throughput": 2705.08, "total_tokens": 9102448}
6807
+ {"current_steps": 33950, "total_steps": 38150, "loss": 0.0648, "lr": 1.82426163372309e-06, "epoch": 8.899082568807339, "percentage": 88.99, "elapsed_time": "0:56:05", "remaining_time": "0:06:56", "throughput": 2705.08, "total_tokens": 9103568}
6808
+ {"current_steps": 33955, "total_steps": 38150, "loss": 0.0825, "lr": 1.8199752119583052e-06, "epoch": 8.900393184796854, "percentage": 89.0, "elapsed_time": "0:56:05", "remaining_time": "0:06:55", "throughput": 2705.07, "total_tokens": 9104656}
6809
+ {"current_steps": 33960, "total_steps": 38150, "loss": 0.2582, "lr": 1.8156936417326576e-06, "epoch": 8.90170380078637, "percentage": 89.02, "elapsed_time": "0:56:06", "remaining_time": "0:06:55", "throughput": 2705.16, "total_tokens": 9106288}
6810
+ {"current_steps": 33965, "total_steps": 38150, "loss": 0.1101, "lr": 1.8114169239422691e-06, "epoch": 8.903014416775886, "percentage": 89.03, "elapsed_time": "0:56:06", "remaining_time": "0:06:54", "throughput": 2705.21, "total_tokens": 9107728}
6811
+ {"current_steps": 33970, "total_steps": 38150, "loss": 0.1721, "lr": 1.8071450594822543e-06, "epoch": 8.9043250327654, "percentage": 89.04, "elapsed_time": "0:56:07", "remaining_time": "0:06:54", "throughput": 2705.21, "total_tokens": 9108912}
6812
+ {"current_steps": 33975, "total_steps": 38150, "loss": 0.091, "lr": 1.8028780492467007e-06, "epoch": 8.905635648754915, "percentage": 89.06, "elapsed_time": "0:56:07", "remaining_time": "0:06:53", "throughput": 2705.35, "total_tokens": 9110784}
6813
+ {"current_steps": 33980, "total_steps": 38150, "loss": 0.1645, "lr": 1.798615894128694e-06, "epoch": 8.90694626474443, "percentage": 89.07, "elapsed_time": "0:56:08", "remaining_time": "0:06:53", "throughput": 2705.37, "total_tokens": 9112016}
6814
+ {"current_steps": 33985, "total_steps": 38150, "loss": 0.0913, "lr": 1.7943585950202902e-06, "epoch": 8.908256880733944, "percentage": 89.08, "elapsed_time": "0:56:08", "remaining_time": "0:06:52", "throughput": 2705.38, "total_tokens": 9113184}
6815
+ {"current_steps": 33990, "total_steps": 38150, "loss": 0.161, "lr": 1.7901061528125347e-06, "epoch": 8.90956749672346, "percentage": 89.1, "elapsed_time": "0:56:09", "remaining_time": "0:06:52", "throughput": 2705.46, "total_tokens": 9114832}
6816
+ {"current_steps": 33995, "total_steps": 38150, "loss": 0.1357, "lr": 1.7858585683954597e-06, "epoch": 8.910878112712975, "percentage": 89.11, "elapsed_time": "0:56:09", "remaining_time": "0:06:51", "throughput": 2705.5, "total_tokens": 9116128}
6817
+ {"current_steps": 34000, "total_steps": 38150, "loss": 0.1117, "lr": 1.7816158426580742e-06, "epoch": 8.91218872870249, "percentage": 89.12, "elapsed_time": "0:56:09", "remaining_time": "0:06:51", "throughput": 2705.53, "total_tokens": 9117408}
6818
+ {"current_steps": 34005, "total_steps": 38150, "loss": 0.0972, "lr": 1.7773779764883758e-06, "epoch": 8.913499344692005, "percentage": 89.13, "elapsed_time": "0:56:10", "remaining_time": "0:06:50", "throughput": 2705.76, "total_tokens": 9120192}
6819
+ {"current_steps": 34010, "total_steps": 38150, "loss": 0.1415, "lr": 1.7731449707733412e-06, "epoch": 8.91480996068152, "percentage": 89.15, "elapsed_time": "0:56:11", "remaining_time": "0:06:50", "throughput": 2705.82, "total_tokens": 9121616}
6820
+ {"current_steps": 34015, "total_steps": 38150, "loss": 0.1127, "lr": 1.7689168263989309e-06, "epoch": 8.916120576671036, "percentage": 89.16, "elapsed_time": "0:56:11", "remaining_time": "0:06:49", "throughput": 2705.86, "total_tokens": 9122912}
6821
+ {"current_steps": 34020, "total_steps": 38150, "loss": 0.1985, "lr": 1.7646935442500872e-06, "epoch": 8.917431192660551, "percentage": 89.17, "elapsed_time": "0:56:12", "remaining_time": "0:06:49", "throughput": 2705.9, "total_tokens": 9124336}
6822
+ {"current_steps": 34025, "total_steps": 38150, "loss": 0.1273, "lr": 1.760475125210745e-06, "epoch": 8.918741808650065, "percentage": 89.19, "elapsed_time": "0:56:12", "remaining_time": "0:06:48", "throughput": 2705.9, "total_tokens": 9125472}
6823
+ {"current_steps": 34030, "total_steps": 38150, "loss": 0.2448, "lr": 1.7562615701637975e-06, "epoch": 8.92005242463958, "percentage": 89.2, "elapsed_time": "0:56:12", "remaining_time": "0:06:48", "throughput": 2705.88, "total_tokens": 9126560}
6824
+ {"current_steps": 34035, "total_steps": 38150, "loss": 0.1394, "lr": 1.7520528799911395e-06, "epoch": 8.921363040629096, "percentage": 89.21, "elapsed_time": "0:56:13", "remaining_time": "0:06:47", "throughput": 2705.88, "total_tokens": 9127776}
6825
+ {"current_steps": 34040, "total_steps": 38150, "loss": 0.1162, "lr": 1.7478490555736471e-06, "epoch": 8.922673656618612, "percentage": 89.23, "elapsed_time": "0:56:13", "remaining_time": "0:06:47", "throughput": 2705.97, "total_tokens": 9129248}
6826
+ {"current_steps": 34045, "total_steps": 38150, "loss": 0.2044, "lr": 1.7436500977911663e-06, "epoch": 8.923984272608125, "percentage": 89.24, "elapsed_time": "0:56:14", "remaining_time": "0:06:46", "throughput": 2706.07, "total_tokens": 9130896}
6827
+ {"current_steps": 34050, "total_steps": 38150, "loss": 0.1418, "lr": 1.7394560075225414e-06, "epoch": 8.92529488859764, "percentage": 89.25, "elapsed_time": "0:56:14", "remaining_time": "0:06:46", "throughput": 2706.05, "total_tokens": 9131936}
6828
+ {"current_steps": 34055, "total_steps": 38150, "loss": 0.1266, "lr": 1.7352667856455812e-06, "epoch": 8.926605504587156, "percentage": 89.27, "elapsed_time": "0:56:15", "remaining_time": "0:06:45", "throughput": 2706.07, "total_tokens": 9133168}
6829
+ {"current_steps": 34060, "total_steps": 38150, "loss": 0.123, "lr": 1.7310824330370873e-06, "epoch": 8.927916120576672, "percentage": 89.28, "elapsed_time": "0:56:15", "remaining_time": "0:06:45", "throughput": 2706.06, "total_tokens": 9134272}
6830
+ {"current_steps": 34065, "total_steps": 38150, "loss": 0.1439, "lr": 1.7269029505728313e-06, "epoch": 8.929226736566186, "percentage": 89.29, "elapsed_time": "0:56:15", "remaining_time": "0:06:44", "throughput": 2706.08, "total_tokens": 9135520}
6831
+ {"current_steps": 34070, "total_steps": 38150, "loss": 0.0731, "lr": 1.7227283391275772e-06, "epoch": 8.930537352555701, "percentage": 89.31, "elapsed_time": "0:56:16", "remaining_time": "0:06:44", "throughput": 2706.11, "total_tokens": 9136752}
6832
+ {"current_steps": 34075, "total_steps": 38150, "loss": 0.0518, "lr": 1.7185585995750598e-06, "epoch": 8.931847968545217, "percentage": 89.32, "elapsed_time": "0:56:16", "remaining_time": "0:06:43", "throughput": 2706.02, "total_tokens": 9137568}
6833
+ {"current_steps": 34080, "total_steps": 38150, "loss": 0.0844, "lr": 1.714393732788e-06, "epoch": 8.93315858453473, "percentage": 89.33, "elapsed_time": "0:56:17", "remaining_time": "0:06:43", "throughput": 2706.02, "total_tokens": 9138688}
6834
+ {"current_steps": 34085, "total_steps": 38150, "loss": 0.0774, "lr": 1.710233739638098e-06, "epoch": 8.934469200524246, "percentage": 89.34, "elapsed_time": "0:56:17", "remaining_time": "0:06:42", "throughput": 2706.03, "total_tokens": 9139872}
6835
+ {"current_steps": 34090, "total_steps": 38150, "loss": 0.2076, "lr": 1.706078620996035e-06, "epoch": 8.935779816513762, "percentage": 89.36, "elapsed_time": "0:56:18", "remaining_time": "0:06:42", "throughput": 2706.23, "total_tokens": 9142192}
6836
+ {"current_steps": 34095, "total_steps": 38150, "loss": 0.1905, "lr": 1.7019283777314653e-06, "epoch": 8.937090432503277, "percentage": 89.37, "elapsed_time": "0:56:18", "remaining_time": "0:06:41", "throughput": 2706.29, "total_tokens": 9143664}
6837
+ {"current_steps": 34100, "total_steps": 38150, "loss": 0.1712, "lr": 1.6977830107130305e-06, "epoch": 8.938401048492791, "percentage": 89.38, "elapsed_time": "0:56:19", "remaining_time": "0:06:41", "throughput": 2706.35, "total_tokens": 9145136}
6838
+ {"current_steps": 34105, "total_steps": 38150, "loss": 0.1427, "lr": 1.6936425208083473e-06, "epoch": 8.939711664482306, "percentage": 89.4, "elapsed_time": "0:56:19", "remaining_time": "0:06:40", "throughput": 2706.41, "total_tokens": 9146544}
6839
+ {"current_steps": 34110, "total_steps": 38150, "loss": 0.0989, "lr": 1.6895069088840144e-06, "epoch": 8.941022280471822, "percentage": 89.41, "elapsed_time": "0:56:20", "remaining_time": "0:06:40", "throughput": 2706.52, "total_tokens": 9148336}
6840
+ {"current_steps": 34115, "total_steps": 38150, "loss": 0.3596, "lr": 1.6853761758056114e-06, "epoch": 8.942332896461338, "percentage": 89.42, "elapsed_time": "0:56:20", "remaining_time": "0:06:39", "throughput": 2706.78, "total_tokens": 9151376}
6841
+ {"current_steps": 34120, "total_steps": 38150, "loss": 0.085, "lr": 1.6812503224376857e-06, "epoch": 8.943643512450851, "percentage": 89.44, "elapsed_time": "0:56:21", "remaining_time": "0:06:39", "throughput": 2706.73, "total_tokens": 9152304}
6842
+ {"current_steps": 34125, "total_steps": 38150, "loss": 0.1676, "lr": 1.67712934964378e-06, "epoch": 8.944954128440367, "percentage": 89.45, "elapsed_time": "0:56:21", "remaining_time": "0:06:38", "throughput": 2706.78, "total_tokens": 9153632}
6843
+ {"current_steps": 34130, "total_steps": 38150, "loss": 0.1836, "lr": 1.673013258286399e-06, "epoch": 8.946264744429882, "percentage": 89.46, "elapsed_time": "0:56:22", "remaining_time": "0:06:38", "throughput": 2706.81, "total_tokens": 9154928}
6844
+ {"current_steps": 34135, "total_steps": 38150, "loss": 0.0753, "lr": 1.668902049227039e-06, "epoch": 8.947575360419398, "percentage": 89.48, "elapsed_time": "0:56:22", "remaining_time": "0:06:37", "throughput": 2706.82, "total_tokens": 9156096}
6845
+ {"current_steps": 34140, "total_steps": 38150, "loss": 0.1673, "lr": 1.664795723326168e-06, "epoch": 8.948885976408912, "percentage": 89.49, "elapsed_time": "0:56:23", "remaining_time": "0:06:37", "throughput": 2706.88, "total_tokens": 9157472}
6846
+ {"current_steps": 34145, "total_steps": 38150, "loss": 0.2231, "lr": 1.660694281443234e-06, "epoch": 8.950196592398427, "percentage": 89.5, "elapsed_time": "0:56:23", "remaining_time": "0:06:36", "throughput": 2706.97, "total_tokens": 9159088}
6847
+ {"current_steps": 34150, "total_steps": 38150, "loss": 0.2205, "lr": 1.6565977244366564e-06, "epoch": 8.951507208387943, "percentage": 89.52, "elapsed_time": "0:56:23", "remaining_time": "0:06:36", "throughput": 2706.99, "total_tokens": 9160304}
6848
+ {"current_steps": 34155, "total_steps": 38150, "loss": 0.1281, "lr": 1.6525060531638408e-06, "epoch": 8.952817824377458, "percentage": 89.53, "elapsed_time": "0:56:24", "remaining_time": "0:06:35", "throughput": 2707.05, "total_tokens": 9161824}
6849
+ {"current_steps": 34160, "total_steps": 38150, "loss": 0.2108, "lr": 1.6484192684811634e-06, "epoch": 8.954128440366972, "percentage": 89.54, "elapsed_time": "0:56:24", "remaining_time": "0:06:35", "throughput": 2707.07, "total_tokens": 9163040}
6850
+ {"current_steps": 34165, "total_steps": 38150, "loss": 0.1466, "lr": 1.644337371243987e-06, "epoch": 8.955439056356488, "percentage": 89.55, "elapsed_time": "0:56:25", "remaining_time": "0:06:34", "throughput": 2707.11, "total_tokens": 9164352}
6851
+ {"current_steps": 34170, "total_steps": 38150, "loss": 0.1232, "lr": 1.6402603623066448e-06, "epoch": 8.956749672346003, "percentage": 89.57, "elapsed_time": "0:56:25", "remaining_time": "0:06:34", "throughput": 2707.12, "total_tokens": 9165488}
6852
+ {"current_steps": 34175, "total_steps": 38150, "loss": 0.1799, "lr": 1.6361882425224461e-06, "epoch": 8.958060288335517, "percentage": 89.58, "elapsed_time": "0:56:26", "remaining_time": "0:06:33", "throughput": 2707.21, "total_tokens": 9167280}
6853
+ {"current_steps": 34180, "total_steps": 38150, "loss": 0.1357, "lr": 1.6321210127436814e-06, "epoch": 8.959370904325032, "percentage": 89.59, "elapsed_time": "0:56:26", "remaining_time": "0:06:33", "throughput": 2707.28, "total_tokens": 9168736}
6854
+ {"current_steps": 34185, "total_steps": 38150, "loss": 0.1513, "lr": 1.6280586738216114e-06, "epoch": 8.960681520314548, "percentage": 89.61, "elapsed_time": "0:56:27", "remaining_time": "0:06:32", "throughput": 2707.26, "total_tokens": 9169776}
6855
+ {"current_steps": 34190, "total_steps": 38150, "loss": 0.2785, "lr": 1.6240012266064752e-06, "epoch": 8.961992136304064, "percentage": 89.62, "elapsed_time": "0:56:27", "remaining_time": "0:06:32", "throughput": 2707.2, "total_tokens": 9170752}
6856
+ {"current_steps": 34195, "total_steps": 38150, "loss": 0.1045, "lr": 1.619948671947491e-06, "epoch": 8.963302752293577, "percentage": 89.63, "elapsed_time": "0:56:27", "remaining_time": "0:06:31", "throughput": 2707.21, "total_tokens": 9171984}
6857
+ {"current_steps": 34200, "total_steps": 38150, "loss": 0.1617, "lr": 1.6159010106928523e-06, "epoch": 8.964613368283093, "percentage": 89.65, "elapsed_time": "0:56:28", "remaining_time": "0:06:31", "throughput": 2707.27, "total_tokens": 9173440}
6858
+ {"current_steps": 34205, "total_steps": 38150, "loss": 0.1225, "lr": 1.611858243689729e-06, "epoch": 8.965923984272608, "percentage": 89.66, "elapsed_time": "0:56:28", "remaining_time": "0:06:30", "throughput": 2707.25, "total_tokens": 9174464}
6859
+ {"current_steps": 34210, "total_steps": 38150, "loss": 0.232, "lr": 1.6078203717842633e-06, "epoch": 8.967234600262124, "percentage": 89.67, "elapsed_time": "0:56:29", "remaining_time": "0:06:30", "throughput": 2707.35, "total_tokens": 9176080}
6860
+ {"current_steps": 34215, "total_steps": 38150, "loss": 0.1267, "lr": 1.6037873958215738e-06, "epoch": 8.968545216251638, "percentage": 89.69, "elapsed_time": "0:56:29", "remaining_time": "0:06:29", "throughput": 2707.5, "total_tokens": 9178208}
6861
+ {"current_steps": 34220, "total_steps": 38150, "loss": 0.1376, "lr": 1.5997593166457575e-06, "epoch": 8.969855832241153, "percentage": 89.7, "elapsed_time": "0:56:30", "remaining_time": "0:06:29", "throughput": 2707.52, "total_tokens": 9179456}
6862
+ {"current_steps": 34225, "total_steps": 38150, "loss": 0.1391, "lr": 1.5957361350998846e-06, "epoch": 8.971166448230669, "percentage": 89.71, "elapsed_time": "0:56:30", "remaining_time": "0:06:28", "throughput": 2707.56, "total_tokens": 9180736}
6863
+ {"current_steps": 34230, "total_steps": 38150, "loss": 0.1413, "lr": 1.5917178520259979e-06, "epoch": 8.972477064220184, "percentage": 89.72, "elapsed_time": "0:56:31", "remaining_time": "0:06:28", "throughput": 2707.66, "total_tokens": 9182416}
6864
+ {"current_steps": 34235, "total_steps": 38150, "loss": 0.1695, "lr": 1.5877044682651194e-06, "epoch": 8.973787680209698, "percentage": 89.74, "elapsed_time": "0:56:31", "remaining_time": "0:06:27", "throughput": 2707.72, "total_tokens": 9183904}
6865
+ {"current_steps": 34240, "total_steps": 38150, "loss": 0.2963, "lr": 1.5836959846572408e-06, "epoch": 8.975098296199214, "percentage": 89.75, "elapsed_time": "0:56:32", "remaining_time": "0:06:27", "throughput": 2707.76, "total_tokens": 9185232}
6866
+ {"current_steps": 34245, "total_steps": 38150, "loss": 0.1125, "lr": 1.579692402041333e-06, "epoch": 8.97640891218873, "percentage": 89.76, "elapsed_time": "0:56:32", "remaining_time": "0:06:26", "throughput": 2707.8, "total_tokens": 9186480}
6867
+ {"current_steps": 34250, "total_steps": 38150, "loss": 0.1772, "lr": 1.5756937212553391e-06, "epoch": 8.977719528178245, "percentage": 89.78, "elapsed_time": "0:56:33", "remaining_time": "0:06:26", "throughput": 2707.8, "total_tokens": 9187696}
6868
+ {"current_steps": 34255, "total_steps": 38150, "loss": 0.1345, "lr": 1.5716999431361762e-06, "epoch": 8.979030144167758, "percentage": 89.79, "elapsed_time": "0:56:33", "remaining_time": "0:06:25", "throughput": 2707.88, "total_tokens": 9189168}
6869
+ {"current_steps": 34260, "total_steps": 38150, "loss": 0.067, "lr": 1.5677110685197366e-06, "epoch": 8.980340760157274, "percentage": 89.8, "elapsed_time": "0:56:33", "remaining_time": "0:06:25", "throughput": 2707.85, "total_tokens": 9190192}
6870
+ {"current_steps": 34265, "total_steps": 38150, "loss": 0.2328, "lr": 1.5637270982408775e-06, "epoch": 8.98165137614679, "percentage": 89.82, "elapsed_time": "0:56:34", "remaining_time": "0:06:24", "throughput": 2707.92, "total_tokens": 9191712}
6871
+ {"current_steps": 34270, "total_steps": 38150, "loss": 0.1825, "lr": 1.559748033133443e-06, "epoch": 8.982961992136303, "percentage": 89.83, "elapsed_time": "0:56:34", "remaining_time": "0:06:24", "throughput": 2707.95, "total_tokens": 9192944}
6872
+ {"current_steps": 34275, "total_steps": 38150, "loss": 0.0795, "lr": 1.555773874030242e-06, "epoch": 8.984272608125819, "percentage": 89.84, "elapsed_time": "0:56:35", "remaining_time": "0:06:23", "throughput": 2707.91, "total_tokens": 9193888}
6873
+ {"current_steps": 34280, "total_steps": 38150, "loss": 0.1457, "lr": 1.5518046217630615e-06, "epoch": 8.985583224115334, "percentage": 89.86, "elapsed_time": "0:56:35", "remaining_time": "0:06:23", "throughput": 2707.98, "total_tokens": 9195280}
6874
+ {"current_steps": 34285, "total_steps": 38150, "loss": 0.157, "lr": 1.5478402771626599e-06, "epoch": 8.98689384010485, "percentage": 89.87, "elapsed_time": "0:56:36", "remaining_time": "0:06:22", "throughput": 2707.96, "total_tokens": 9196336}
6875
+ {"current_steps": 34290, "total_steps": 38150, "loss": 0.1633, "lr": 1.5438808410587646e-06, "epoch": 8.988204456094364, "percentage": 89.88, "elapsed_time": "0:56:36", "remaining_time": "0:06:22", "throughput": 2708.05, "total_tokens": 9197920}
6876
+ {"current_steps": 34295, "total_steps": 38150, "loss": 0.1744, "lr": 1.5399263142800825e-06, "epoch": 8.98951507208388, "percentage": 89.9, "elapsed_time": "0:56:36", "remaining_time": "0:06:21", "throughput": 2708.08, "total_tokens": 9199168}
6877
+ {"current_steps": 34300, "total_steps": 38150, "loss": 0.138, "lr": 1.5359766976542873e-06, "epoch": 8.990825688073395, "percentage": 89.91, "elapsed_time": "0:56:37", "remaining_time": "0:06:21", "throughput": 2708.05, "total_tokens": 9200176}
6878
+ {"current_steps": 34305, "total_steps": 38150, "loss": 0.169, "lr": 1.5320319920080262e-06, "epoch": 8.99213630406291, "percentage": 89.92, "elapsed_time": "0:56:37", "remaining_time": "0:06:20", "throughput": 2708.03, "total_tokens": 9201248}
6879
+ {"current_steps": 34310, "total_steps": 38150, "loss": 0.0929, "lr": 1.5280921981669166e-06, "epoch": 8.993446920052424, "percentage": 89.93, "elapsed_time": "0:56:38", "remaining_time": "0:06:20", "throughput": 2707.96, "total_tokens": 9202080}
6880
+ {"current_steps": 34315, "total_steps": 38150, "loss": 0.2718, "lr": 1.5241573169555573e-06, "epoch": 8.99475753604194, "percentage": 89.95, "elapsed_time": "0:56:38", "remaining_time": "0:06:19", "throughput": 2708.07, "total_tokens": 9203952}
6881
+ {"current_steps": 34320, "total_steps": 38150, "loss": 0.3196, "lr": 1.5202273491975061e-06, "epoch": 8.996068152031455, "percentage": 89.96, "elapsed_time": "0:56:39", "remaining_time": "0:06:19", "throughput": 2708.04, "total_tokens": 9204992}
6882
+ {"current_steps": 34325, "total_steps": 38150, "loss": 0.115, "lr": 1.5163022957152994e-06, "epoch": 8.997378768020969, "percentage": 89.97, "elapsed_time": "0:56:39", "remaining_time": "0:06:18", "throughput": 2708.05, "total_tokens": 9206160}
6883
+ {"current_steps": 34330, "total_steps": 38150, "loss": 0.1735, "lr": 1.5123821573304442e-06, "epoch": 8.998689384010484, "percentage": 89.99, "elapsed_time": "0:56:39", "remaining_time": "0:06:18", "throughput": 2708.1, "total_tokens": 9207520}
6884
+ {"current_steps": 34335, "total_steps": 38150, "loss": 0.1172, "lr": 1.5084669348634255e-06, "epoch": 9.0, "percentage": 90.0, "elapsed_time": "0:56:40", "remaining_time": "0:06:17", "throughput": 2708.0, "total_tokens": 9208488}
6885
+ {"current_steps": 34340, "total_steps": 38150, "loss": 0.0914, "lr": 1.5045566291336854e-06, "epoch": 9.001310615989516, "percentage": 90.01, "elapsed_time": "0:56:41", "remaining_time": "0:06:17", "throughput": 2707.89, "total_tokens": 9209608}
6886
+ {"current_steps": 34344, "total_steps": 38150, "eval_loss": 0.85427325963974, "epoch": 9.002359108781127, "percentage": 90.02, "elapsed_time": "0:56:58", "remaining_time": "0:06:18", "throughput": 2694.72, "total_tokens": 9210648}
6887
+ {"current_steps": 34345, "total_steps": 38150, "loss": 0.2131, "lr": 1.500651240959644e-06, "epoch": 9.00262123197903, "percentage": 90.03, "elapsed_time": "0:56:59", "remaining_time": "0:06:18", "throughput": 2693.62, "total_tokens": 9210792}
6888
+ {"current_steps": 34350, "total_steps": 38150, "loss": 0.0764, "lr": 1.4967507711586976e-06, "epoch": 9.003931847968545, "percentage": 90.04, "elapsed_time": "0:56:59", "remaining_time": "0:06:18", "throughput": 2693.61, "total_tokens": 9211864}
6889
+ {"current_steps": 34355, "total_steps": 38150, "loss": 0.1871, "lr": 1.4928552205472012e-06, "epoch": 9.00524246395806, "percentage": 90.05, "elapsed_time": "0:57:00", "remaining_time": "0:06:17", "throughput": 2693.65, "total_tokens": 9213288}
6890
+ {"current_steps": 34360, "total_steps": 38150, "loss": 0.1973, "lr": 1.488964589940489e-06, "epoch": 9.006553079947576, "percentage": 90.07, "elapsed_time": "0:57:00", "remaining_time": "0:06:17", "throughput": 2693.79, "total_tokens": 9215224}
6891
+ {"current_steps": 34365, "total_steps": 38150, "loss": 0.0915, "lr": 1.4850788801528653e-06, "epoch": 9.00786369593709, "percentage": 90.08, "elapsed_time": "0:57:01", "remaining_time": "0:06:16", "throughput": 2693.77, "total_tokens": 9216280}
6892
+ {"current_steps": 34370, "total_steps": 38150, "loss": 0.0635, "lr": 1.4811980919976043e-06, "epoch": 9.009174311926605, "percentage": 90.09, "elapsed_time": "0:57:01", "remaining_time": "0:06:16", "throughput": 2693.8, "total_tokens": 9217608}